xref: /plugin/aichat/helper.php (revision 6a18e0f40fd2d3238b0284483f1ee9aa53dad036)
10337f47fSAndreas Gohr<?php
20337f47fSAndreas Gohr
33379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
45e6dd16eSAndreas Gohruse dokuwiki\Extension\Plugin;
5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat;
6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings;
8*6a18e0f4SAndreas Gohruse dokuwiki\plugin\aichat\Model\AbstractChatModel;
9*6a18e0f4SAndreas Gohruse dokuwiki\plugin\aichat\Model\AbstractEmbeddingModel;
10*6a18e0f4SAndreas Gohruse dokuwiki\plugin\aichat\Model\OpenAI\EmbeddingAda02;
1101f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage;
125e6dd16eSAndreas Gohruse dokuwiki\plugin\aichat\Storage\ChromaStorage;
1313dbfc23SAndreas Gohruse dokuwiki\plugin\aichat\Storage\PineconeStorage;
144c0099a8SAndreas Gohruse dokuwiki\plugin\aichat\Storage\QdrantStorage;
15f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Storage\SQLiteStorage;
160337f47fSAndreas Gohr
170337f47fSAndreas Gohr/**
180337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component)
190337f47fSAndreas Gohr *
200337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
210337f47fSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
220337f47fSAndreas Gohr */
237ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin
240337f47fSAndreas Gohr{
253379af09SAndreas Gohr    /** @var CLIPlugin $logger */
263379af09SAndreas Gohr    protected $logger;
27*6a18e0f4SAndreas Gohr    /** @var AbstractChatModel */
28*6a18e0f4SAndreas Gohr    protected $chatModel;
29*6a18e0f4SAndreas Gohr    /** @var AbstractEmbeddingModel */
30*6a18e0f4SAndreas Gohr    protected $embedModel;
310337f47fSAndreas Gohr    /** @var Embeddings */
320337f47fSAndreas Gohr    protected $embeddings;
3301f06932SAndreas Gohr    /** @var AbstractStorage */
3401f06932SAndreas Gohr    protected $storage;
350337f47fSAndreas Gohr
36e75dc39fSAndreas Gohr    /** @var array where to store meta data on the last run */
37e75dc39fSAndreas Gohr    protected $runDataFile;
38e75dc39fSAndreas Gohr
390337f47fSAndreas Gohr    /**
40f8d5ae01SAndreas Gohr     * Constructor. Initializes vendor autoloader
41f8d5ae01SAndreas Gohr     */
42f8d5ae01SAndreas Gohr    public function __construct()
43f8d5ae01SAndreas Gohr    {
44e75dc39fSAndreas Gohr        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
45e75dc39fSAndreas Gohr        global $conf;
46e75dc39fSAndreas Gohr        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
47f8d5ae01SAndreas Gohr    }
48f8d5ae01SAndreas Gohr
49f8d5ae01SAndreas Gohr    /**
503379af09SAndreas Gohr     * Use the given CLI plugin for logging
513379af09SAndreas Gohr     *
523379af09SAndreas Gohr     * @param CLIPlugin $logger
533379af09SAndreas Gohr     * @return void
543379af09SAndreas Gohr     */
558285fff9SAndreas Gohr    public function setLogger($logger)
568285fff9SAndreas Gohr    {
573379af09SAndreas Gohr        $this->logger = $logger;
583379af09SAndreas Gohr    }
593379af09SAndreas Gohr
603379af09SAndreas Gohr    /**
61c4127b8eSAndreas Gohr     * Check if the current user is allowed to use the plugin (if it has been restricted)
62c4127b8eSAndreas Gohr     *
63c4127b8eSAndreas Gohr     * @return bool
64c4127b8eSAndreas Gohr     */
65c4127b8eSAndreas Gohr    public function userMayAccess()
66c4127b8eSAndreas Gohr    {
67c4127b8eSAndreas Gohr        global $auth;
68c4127b8eSAndreas Gohr        global $USERINFO;
69c4127b8eSAndreas Gohr        global $INPUT;
70c4127b8eSAndreas Gohr
71c4127b8eSAndreas Gohr        if (!$auth) return true;
72c4127b8eSAndreas Gohr        if (!$this->getConf('restrict')) return true;
73c4127b8eSAndreas Gohr        if (!isset($USERINFO)) return false;
74c4127b8eSAndreas Gohr
75c4127b8eSAndreas Gohr        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
76c4127b8eSAndreas Gohr    }
77c4127b8eSAndreas Gohr
78c4127b8eSAndreas Gohr    /**
79*6a18e0f4SAndreas Gohr     * Access the Chat Model
800337f47fSAndreas Gohr     *
81*6a18e0f4SAndreas Gohr     * @return AbstractChatModel
820337f47fSAndreas Gohr     */
83*6a18e0f4SAndreas Gohr    public function getChatModel()
840337f47fSAndreas Gohr    {
85*6a18e0f4SAndreas Gohr        if ($this->chatModel instanceof AbstractChatModel) {
86*6a18e0f4SAndreas Gohr            return $this->chatModel;
87*6a18e0f4SAndreas Gohr        }
88*6a18e0f4SAndreas Gohr
899f6b34c4SAndreas Gohr        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
909f6b34c4SAndreas Gohr
919f6b34c4SAndreas Gohr        if (!class_exists($class)) {
929f6b34c4SAndreas Gohr            throw new \RuntimeException('Configured model not found: ' . $class);
939f6b34c4SAndreas Gohr        }
949f6b34c4SAndreas Gohr        // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
95*6a18e0f4SAndreas Gohr        $this->chatModel = new $class([
969f6b34c4SAndreas Gohr            'key' => $this->getConf('openaikey'),
979f6b34c4SAndreas Gohr            'org' => $this->getConf('openaiorg')
989f6b34c4SAndreas Gohr        ]);
99*6a18e0f4SAndreas Gohr
100*6a18e0f4SAndreas Gohr        return $this->chatModel;
1019f6b34c4SAndreas Gohr    }
1029f6b34c4SAndreas Gohr
103*6a18e0f4SAndreas Gohr    /**
104*6a18e0f4SAndreas Gohr     * Access the Embedding Model
105*6a18e0f4SAndreas Gohr     *
106*6a18e0f4SAndreas Gohr     * @return AbstractEmbeddingModel
107*6a18e0f4SAndreas Gohr     */
108*6a18e0f4SAndreas Gohr    public function getEmbedModel()
109*6a18e0f4SAndreas Gohr    {
110*6a18e0f4SAndreas Gohr        // FIXME this is hardcoded to OpenAI for now
111*6a18e0f4SAndreas Gohr        if ($this->embedModel instanceof AbstractEmbeddingModel) {
112*6a18e0f4SAndreas Gohr            return $this->embedModel;
1130337f47fSAndreas Gohr        }
1140337f47fSAndreas Gohr
115*6a18e0f4SAndreas Gohr
116*6a18e0f4SAndreas Gohr        $this->embedModel = new EmbeddingAda02([
117*6a18e0f4SAndreas Gohr            'key' => $this->getConf('openaikey'),
118*6a18e0f4SAndreas Gohr            'org' => $this->getConf('openaiorg')
119*6a18e0f4SAndreas Gohr        ]);
120*6a18e0f4SAndreas Gohr
121*6a18e0f4SAndreas Gohr        return $this->embedModel;
122*6a18e0f4SAndreas Gohr    }
123*6a18e0f4SAndreas Gohr
124*6a18e0f4SAndreas Gohr
1250337f47fSAndreas Gohr    /**
1260337f47fSAndreas Gohr     * Access the Embeddings interface
1270337f47fSAndreas Gohr     *
1280337f47fSAndreas Gohr     * @return Embeddings
1290337f47fSAndreas Gohr     */
1300337f47fSAndreas Gohr    public function getEmbeddings()
1310337f47fSAndreas Gohr    {
132*6a18e0f4SAndreas Gohr        if ($this->embeddings instanceof Embeddings) {
133*6a18e0f4SAndreas Gohr            return $this->embeddings;
134*6a18e0f4SAndreas Gohr        }
135*6a18e0f4SAndreas Gohr
136*6a18e0f4SAndreas Gohr        $this->embeddings = new Embeddings($this->getChatModel(), $this->getEmbedModel(), $this->getStorage());
1373379af09SAndreas Gohr        if ($this->logger) {
1383379af09SAndreas Gohr            $this->embeddings->setLogger($this->logger);
1393379af09SAndreas Gohr        }
1409f6b34c4SAndreas Gohr
1410337f47fSAndreas Gohr        return $this->embeddings;
1420337f47fSAndreas Gohr    }
1430337f47fSAndreas Gohr
1440337f47fSAndreas Gohr    /**
14501f06932SAndreas Gohr     * Access the Storage interface
14601f06932SAndreas Gohr     *
14701f06932SAndreas Gohr     * @return AbstractStorage
14801f06932SAndreas Gohr     */
14901f06932SAndreas Gohr    public function getStorage()
15001f06932SAndreas Gohr    {
151*6a18e0f4SAndreas Gohr        if ($this->storage instanceof AbstractStorage) {
152*6a18e0f4SAndreas Gohr            return $this->storage;
153*6a18e0f4SAndreas Gohr        }
154*6a18e0f4SAndreas Gohr
15513dbfc23SAndreas Gohr        if ($this->getConf('pinecone_apikey')) {
15613dbfc23SAndreas Gohr            $this->storage = new PineconeStorage();
1575e6dd16eSAndreas Gohr        } elseif ($this->getConf('chroma_baseurl')) {
1585e6dd16eSAndreas Gohr            $this->storage = new ChromaStorage();
1594c0099a8SAndreas Gohr        } elseif ($this->getConf('qdrant_baseurl')) {
1604c0099a8SAndreas Gohr            $this->storage = new QdrantStorage();
16113dbfc23SAndreas Gohr        } else {
16201f06932SAndreas Gohr            $this->storage = new SQLiteStorage();
16368b6fa79SAndreas Gohr        }
1648285fff9SAndreas Gohr
1653379af09SAndreas Gohr        if ($this->logger) {
1663379af09SAndreas Gohr            $this->storage->setLogger($this->logger);
1673379af09SAndreas Gohr        }
16801f06932SAndreas Gohr
16901f06932SAndreas Gohr        return $this->storage;
17001f06932SAndreas Gohr    }
17101f06932SAndreas Gohr
17201f06932SAndreas Gohr    /**
1730337f47fSAndreas Gohr     * Ask a question with a chat history
1740337f47fSAndreas Gohr     *
1750337f47fSAndreas Gohr     * @param string $question
1760337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
1770337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1780337f47fSAndreas Gohr     * @throws Exception
1790337f47fSAndreas Gohr     */
1800337f47fSAndreas Gohr    public function askChatQuestion($question, $history = [])
1810337f47fSAndreas Gohr    {
1820337f47fSAndreas Gohr        if ($history) {
1830337f47fSAndreas Gohr            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
184754b8394SAndreas Gohr            $prev = end($history);
1850337f47fSAndreas Gohr        } else {
1860337f47fSAndreas Gohr            $standaloneQuestion = $question;
187754b8394SAndreas Gohr            $prev = [];
1880337f47fSAndreas Gohr        }
189754b8394SAndreas Gohr        return $this->askQuestion($standaloneQuestion, $prev);
1900337f47fSAndreas Gohr    }
1910337f47fSAndreas Gohr
1920337f47fSAndreas Gohr    /**
1930337f47fSAndreas Gohr     * Ask a single standalone question
1940337f47fSAndreas Gohr     *
1950337f47fSAndreas Gohr     * @param string $question
196754b8394SAndreas Gohr     * @param array $previous [user, ai] of the previous question
1970337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1980337f47fSAndreas Gohr     * @throws Exception
1990337f47fSAndreas Gohr     */
200754b8394SAndreas Gohr    public function askQuestion($question, $previous = [])
2010337f47fSAndreas Gohr    {
202e33a1d7aSAndreas Gohr        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
2039e81bea7SAndreas Gohr        if ($similar) {
204441edf84SAndreas Gohr            $context = implode(
205441edf84SAndreas Gohr                "\n",
206441edf84SAndreas Gohr                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
207441edf84SAndreas Gohr            );
208219268b1SAndreas Gohr            $prompt = $this->getPrompt('question', [
209219268b1SAndreas Gohr                'context' => $context,
210219268b1SAndreas Gohr                'language' => $this->getLanguagePrompt()
211219268b1SAndreas Gohr            ]);
2129e81bea7SAndreas Gohr        } else {
213cfaf6b32SAndreas Gohr            $prompt = $this->getPrompt('noanswer') . ' ' . $this->getLanguagePrompt();
2149e81bea7SAndreas Gohr        }
21568908844SAndreas Gohr
2160337f47fSAndreas Gohr        $messages = [
2170337f47fSAndreas Gohr            [
2180337f47fSAndreas Gohr                'role' => 'system',
2190337f47fSAndreas Gohr                'content' => $prompt
2200337f47fSAndreas Gohr            ],
2210337f47fSAndreas Gohr            [
2220337f47fSAndreas Gohr                'role' => 'user',
2230337f47fSAndreas Gohr                'content' => $question
2240337f47fSAndreas Gohr            ]
2250337f47fSAndreas Gohr        ];
2260337f47fSAndreas Gohr
227754b8394SAndreas Gohr        if ($previous) {
228754b8394SAndreas Gohr            array_unshift($messages, [
229754b8394SAndreas Gohr                'role' => 'assistant',
230754b8394SAndreas Gohr                'content' => $previous[1]
231754b8394SAndreas Gohr            ]);
232754b8394SAndreas Gohr            array_unshift($messages, [
233754b8394SAndreas Gohr                'role' => 'user',
234754b8394SAndreas Gohr                'content' => $previous[0]
235754b8394SAndreas Gohr            ]);
236754b8394SAndreas Gohr        }
237754b8394SAndreas Gohr
238*6a18e0f4SAndreas Gohr        $answer = $this->getChatModel()->getAnswer($messages);
2390337f47fSAndreas Gohr
2400337f47fSAndreas Gohr        return [
2410337f47fSAndreas Gohr            'question' => $question,
2420337f47fSAndreas Gohr            'answer' => $answer,
2430337f47fSAndreas Gohr            'sources' => $similar,
2440337f47fSAndreas Gohr        ];
2450337f47fSAndreas Gohr    }
2460337f47fSAndreas Gohr
2470337f47fSAndreas Gohr    /**
2480337f47fSAndreas Gohr     * Rephrase a question into a standalone question based on the chat history
2490337f47fSAndreas Gohr     *
2500337f47fSAndreas Gohr     * @param string $question The original user question
2510337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
2520337f47fSAndreas Gohr     * @return string The rephrased question
2530337f47fSAndreas Gohr     * @throws Exception
2540337f47fSAndreas Gohr     */
2550337f47fSAndreas Gohr    public function rephraseChatQuestion($question, $history)
2560337f47fSAndreas Gohr    {
2570337f47fSAndreas Gohr        // go back in history as far as possible without hitting the token limit
2580337f47fSAndreas Gohr        $chatHistory = '';
2590337f47fSAndreas Gohr        $history = array_reverse($history);
2600337f47fSAndreas Gohr        foreach ($history as $row) {
261f6ef2e50SAndreas Gohr            if (
2629f6b34c4SAndreas Gohr                count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) >
263*6a18e0f4SAndreas Gohr                $this->getChatModel()->getMaxRephrasingTokenLength()
264f6ef2e50SAndreas Gohr            ) {
2650337f47fSAndreas Gohr                break;
2660337f47fSAndreas Gohr            }
2670337f47fSAndreas Gohr
2680337f47fSAndreas Gohr            $chatHistory =
2690337f47fSAndreas Gohr                "Human: " . $row[0] . "\n" .
2700337f47fSAndreas Gohr                "Assistant: " . $row[1] . "\n" .
2710337f47fSAndreas Gohr                $chatHistory;
2720337f47fSAndreas Gohr        }
2730337f47fSAndreas Gohr
2740337f47fSAndreas Gohr        // ask openAI to rephrase the question
2750337f47fSAndreas Gohr        $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]);
2760337f47fSAndreas Gohr        $messages = [['role' => 'user', 'content' => $prompt]];
277*6a18e0f4SAndreas Gohr        return $this->getChatModel()->getRephrasedQuestion($messages);
2780337f47fSAndreas Gohr    }
2790337f47fSAndreas Gohr
2800337f47fSAndreas Gohr    /**
2810337f47fSAndreas Gohr     * Load the given prompt template and fill in the variables
2820337f47fSAndreas Gohr     *
2830337f47fSAndreas Gohr     * @param string $type
2840337f47fSAndreas Gohr     * @param string[] $vars
2850337f47fSAndreas Gohr     * @return string
2860337f47fSAndreas Gohr     */
2870337f47fSAndreas Gohr    protected function getPrompt($type, $vars = [])
2880337f47fSAndreas Gohr    {
2890337f47fSAndreas Gohr        $template = file_get_contents($this->localFN('prompt_' . $type));
2900337f47fSAndreas Gohr
2917ebc7895Ssplitbrain        $replace = [];
2920337f47fSAndreas Gohr        foreach ($vars as $key => $val) {
2930337f47fSAndreas Gohr            $replace['{{' . strtoupper($key) . '}}'] = $val;
2940337f47fSAndreas Gohr        }
2950337f47fSAndreas Gohr
2960337f47fSAndreas Gohr        return strtr($template, $replace);
2970337f47fSAndreas Gohr    }
298219268b1SAndreas Gohr
299219268b1SAndreas Gohr    /**
300219268b1SAndreas Gohr     * Construct the prompt to define the answer language
301219268b1SAndreas Gohr     *
302219268b1SAndreas Gohr     * @return string
303219268b1SAndreas Gohr     */
304219268b1SAndreas Gohr    protected function getLanguagePrompt()
305219268b1SAndreas Gohr    {
306219268b1SAndreas Gohr        global $conf;
307cfaf6b32SAndreas Gohr        $isoLangnames = include(__DIR__ . '/lang/languages.php');
308cfaf6b32SAndreas Gohr
309cfaf6b32SAndreas Gohr        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
310219268b1SAndreas Gohr
311e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
312219268b1SAndreas Gohr            if (isset($isoLangnames[$conf['lang']])) {
313219268b1SAndreas Gohr                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
314219268b1SAndreas Gohr                return $languagePrompt;
315219268b1SAndreas Gohr            }
316219268b1SAndreas Gohr        }
317219268b1SAndreas Gohr
318cfaf6b32SAndreas Gohr        $languagePrompt = 'Always answer in the user\'s language.' .
319cfaf6b32SAndreas Gohr            "If you are unsure about the language, speak $currentLang.";
320219268b1SAndreas Gohr        return $languagePrompt;
321219268b1SAndreas Gohr    }
322e33a1d7aSAndreas Gohr
323e33a1d7aSAndreas Gohr    /**
324e33a1d7aSAndreas Gohr     * Should sources be limited to current language?
325e33a1d7aSAndreas Gohr     *
326e33a1d7aSAndreas Gohr     * @return string The current language code or empty string
327e33a1d7aSAndreas Gohr     */
328e33a1d7aSAndreas Gohr    public function getLanguageLimit()
329e33a1d7aSAndreas Gohr    {
330e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
331e33a1d7aSAndreas Gohr            global $conf;
332e33a1d7aSAndreas Gohr            return $conf['lang'];
333e33a1d7aSAndreas Gohr        } else {
334e33a1d7aSAndreas Gohr            return '';
335e33a1d7aSAndreas Gohr        }
336e33a1d7aSAndreas Gohr    }
337e75dc39fSAndreas Gohr
338e75dc39fSAndreas Gohr    /**
339e75dc39fSAndreas Gohr     * Store info about the last run
340e75dc39fSAndreas Gohr     *
341e75dc39fSAndreas Gohr     * @param array $data
342e75dc39fSAndreas Gohr     * @return void
343e75dc39fSAndreas Gohr     */
344e75dc39fSAndreas Gohr    public function setRunData(array $data)
345e75dc39fSAndreas Gohr    {
346e75dc39fSAndreas Gohr        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
347e75dc39fSAndreas Gohr    }
348e75dc39fSAndreas Gohr
349e75dc39fSAndreas Gohr    /**
350e75dc39fSAndreas Gohr     * Get info about the last run
351e75dc39fSAndreas Gohr     *
352e75dc39fSAndreas Gohr     * @return array
353e75dc39fSAndreas Gohr     */
354e75dc39fSAndreas Gohr    public function getRunData()
355e75dc39fSAndreas Gohr    {
356e75dc39fSAndreas Gohr        if (!file_exists($this->runDataFile)) {
357e75dc39fSAndreas Gohr            return [];
358e75dc39fSAndreas Gohr        }
359e75dc39fSAndreas Gohr        return json_decode(file_get_contents($this->runDataFile), true);
360e75dc39fSAndreas Gohr    }
3610337f47fSAndreas Gohr}
362