xref: /plugin/aichat/helper.php (revision d02b793578c15c86b482725d129996df393f1890)
10337f47fSAndreas Gohr<?php
20337f47fSAndreas Gohr
33379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
45e6dd16eSAndreas Gohruse dokuwiki\Extension\Plugin;
5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat;
6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings;
8294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\ChatInterface;
9294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\EmbeddingInterface;
106a18e0f4SAndreas Gohruse dokuwiki\plugin\aichat\Model\OpenAI\EmbeddingAda02;
1101f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage;
125e6dd16eSAndreas Gohruse dokuwiki\plugin\aichat\Storage\ChromaStorage;
1313dbfc23SAndreas Gohruse dokuwiki\plugin\aichat\Storage\PineconeStorage;
144c0099a8SAndreas Gohruse dokuwiki\plugin\aichat\Storage\QdrantStorage;
15f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Storage\SQLiteStorage;
160337f47fSAndreas Gohr
170337f47fSAndreas Gohr/**
180337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component)
190337f47fSAndreas Gohr *
200337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
210337f47fSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
220337f47fSAndreas Gohr */
237ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin
240337f47fSAndreas Gohr{
253379af09SAndreas Gohr    /** @var CLIPlugin $logger */
263379af09SAndreas Gohr    protected $logger;
27294a9eafSAndreas Gohr    /** @var ChatInterface */
286a18e0f4SAndreas Gohr    protected $chatModel;
29294a9eafSAndreas Gohr    /** @var EmbeddingInterface */
306a18e0f4SAndreas Gohr    protected $embedModel;
310337f47fSAndreas Gohr    /** @var Embeddings */
320337f47fSAndreas Gohr    protected $embeddings;
3301f06932SAndreas Gohr    /** @var AbstractStorage */
3401f06932SAndreas Gohr    protected $storage;
350337f47fSAndreas Gohr
36e75dc39fSAndreas Gohr    /** @var array where to store meta data on the last run */
37e75dc39fSAndreas Gohr    protected $runDataFile;
38e75dc39fSAndreas Gohr
390337f47fSAndreas Gohr    /**
40f8d5ae01SAndreas Gohr     * Constructor. Initializes vendor autoloader
41f8d5ae01SAndreas Gohr     */
42f8d5ae01SAndreas Gohr    public function __construct()
43f8d5ae01SAndreas Gohr    {
44e75dc39fSAndreas Gohr        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
45e75dc39fSAndreas Gohr        global $conf;
46e75dc39fSAndreas Gohr        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
47*d02b7935SAndreas Gohr        $this->loadConfig();
48f8d5ae01SAndreas Gohr    }
49f8d5ae01SAndreas Gohr
50f8d5ae01SAndreas Gohr    /**
513379af09SAndreas Gohr     * Use the given CLI plugin for logging
523379af09SAndreas Gohr     *
533379af09SAndreas Gohr     * @param CLIPlugin $logger
543379af09SAndreas Gohr     * @return void
553379af09SAndreas Gohr     */
568285fff9SAndreas Gohr    public function setLogger($logger)
578285fff9SAndreas Gohr    {
583379af09SAndreas Gohr        $this->logger = $logger;
593379af09SAndreas Gohr    }
603379af09SAndreas Gohr
613379af09SAndreas Gohr    /**
62c4127b8eSAndreas Gohr     * Check if the current user is allowed to use the plugin (if it has been restricted)
63c4127b8eSAndreas Gohr     *
64c4127b8eSAndreas Gohr     * @return bool
65c4127b8eSAndreas Gohr     */
66c4127b8eSAndreas Gohr    public function userMayAccess()
67c4127b8eSAndreas Gohr    {
68c4127b8eSAndreas Gohr        global $auth;
69c4127b8eSAndreas Gohr        global $USERINFO;
70c4127b8eSAndreas Gohr        global $INPUT;
71c4127b8eSAndreas Gohr
72c4127b8eSAndreas Gohr        if (!$auth) return true;
73c4127b8eSAndreas Gohr        if (!$this->getConf('restrict')) return true;
74c4127b8eSAndreas Gohr        if (!isset($USERINFO)) return false;
75c4127b8eSAndreas Gohr
76c4127b8eSAndreas Gohr        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
77c4127b8eSAndreas Gohr    }
78c4127b8eSAndreas Gohr
79c4127b8eSAndreas Gohr    /**
806a18e0f4SAndreas Gohr     * Access the Chat Model
810337f47fSAndreas Gohr     *
82294a9eafSAndreas Gohr     * @return ChatInterface
830337f47fSAndreas Gohr     */
846a18e0f4SAndreas Gohr    public function getChatModel()
850337f47fSAndreas Gohr    {
86294a9eafSAndreas Gohr        if ($this->chatModel instanceof ChatInterface) {
876a18e0f4SAndreas Gohr            return $this->chatModel;
886a18e0f4SAndreas Gohr        }
896a18e0f4SAndreas Gohr
909f6b34c4SAndreas Gohr        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
919f6b34c4SAndreas Gohr
92*d02b7935SAndreas Gohr        //$class = Claude3Haiku::class;
93*d02b7935SAndreas Gohr
949f6b34c4SAndreas Gohr        if (!class_exists($class)) {
959f6b34c4SAndreas Gohr            throw new \RuntimeException('Configured model not found: ' . $class);
969f6b34c4SAndreas Gohr        }
97*d02b7935SAndreas Gohr
989f6b34c4SAndreas Gohr        // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
99*d02b7935SAndreas Gohr        $this->chatModel = new $class($this->conf);
1006a18e0f4SAndreas Gohr
1016a18e0f4SAndreas Gohr        return $this->chatModel;
1029f6b34c4SAndreas Gohr    }
1039f6b34c4SAndreas Gohr
1046a18e0f4SAndreas Gohr    /**
1056a18e0f4SAndreas Gohr     * Access the Embedding Model
1066a18e0f4SAndreas Gohr     *
107294a9eafSAndreas Gohr     * @return EmbeddingInterface
1086a18e0f4SAndreas Gohr     */
1096a18e0f4SAndreas Gohr    public function getEmbedModel()
1106a18e0f4SAndreas Gohr    {
1116a18e0f4SAndreas Gohr        // FIXME this is hardcoded to OpenAI for now
112294a9eafSAndreas Gohr        if ($this->embedModel instanceof EmbeddingInterface) {
1136a18e0f4SAndreas Gohr            return $this->embedModel;
1140337f47fSAndreas Gohr        }
1150337f47fSAndreas Gohr
116*d02b7935SAndreas Gohr        $this->embedModel = new EmbeddingAda02($this->conf);
1176a18e0f4SAndreas Gohr
1186a18e0f4SAndreas Gohr        return $this->embedModel;
1196a18e0f4SAndreas Gohr    }
1206a18e0f4SAndreas Gohr
1216a18e0f4SAndreas Gohr
1220337f47fSAndreas Gohr    /**
1230337f47fSAndreas Gohr     * Access the Embeddings interface
1240337f47fSAndreas Gohr     *
1250337f47fSAndreas Gohr     * @return Embeddings
1260337f47fSAndreas Gohr     */
1270337f47fSAndreas Gohr    public function getEmbeddings()
1280337f47fSAndreas Gohr    {
1296a18e0f4SAndreas Gohr        if ($this->embeddings instanceof Embeddings) {
1306a18e0f4SAndreas Gohr            return $this->embeddings;
1316a18e0f4SAndreas Gohr        }
1326a18e0f4SAndreas Gohr
1336a18e0f4SAndreas Gohr        $this->embeddings = new Embeddings($this->getChatModel(), $this->getEmbedModel(), $this->getStorage());
1343379af09SAndreas Gohr        if ($this->logger) {
1353379af09SAndreas Gohr            $this->embeddings->setLogger($this->logger);
1363379af09SAndreas Gohr        }
1379f6b34c4SAndreas Gohr
1380337f47fSAndreas Gohr        return $this->embeddings;
1390337f47fSAndreas Gohr    }
1400337f47fSAndreas Gohr
1410337f47fSAndreas Gohr    /**
14201f06932SAndreas Gohr     * Access the Storage interface
14301f06932SAndreas Gohr     *
14401f06932SAndreas Gohr     * @return AbstractStorage
14501f06932SAndreas Gohr     */
14601f06932SAndreas Gohr    public function getStorage()
14701f06932SAndreas Gohr    {
1486a18e0f4SAndreas Gohr        if ($this->storage instanceof AbstractStorage) {
1496a18e0f4SAndreas Gohr            return $this->storage;
1506a18e0f4SAndreas Gohr        }
1516a18e0f4SAndreas Gohr
15213dbfc23SAndreas Gohr        if ($this->getConf('pinecone_apikey')) {
15313dbfc23SAndreas Gohr            $this->storage = new PineconeStorage();
1545e6dd16eSAndreas Gohr        } elseif ($this->getConf('chroma_baseurl')) {
1555e6dd16eSAndreas Gohr            $this->storage = new ChromaStorage();
1564c0099a8SAndreas Gohr        } elseif ($this->getConf('qdrant_baseurl')) {
1574c0099a8SAndreas Gohr            $this->storage = new QdrantStorage();
15813dbfc23SAndreas Gohr        } else {
15901f06932SAndreas Gohr            $this->storage = new SQLiteStorage();
16068b6fa79SAndreas Gohr        }
1618285fff9SAndreas Gohr
1623379af09SAndreas Gohr        if ($this->logger) {
1633379af09SAndreas Gohr            $this->storage->setLogger($this->logger);
1643379af09SAndreas Gohr        }
16501f06932SAndreas Gohr
16601f06932SAndreas Gohr        return $this->storage;
16701f06932SAndreas Gohr    }
16801f06932SAndreas Gohr
16901f06932SAndreas Gohr    /**
1700337f47fSAndreas Gohr     * Ask a question with a chat history
1710337f47fSAndreas Gohr     *
1720337f47fSAndreas Gohr     * @param string $question
1730337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
1740337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1750337f47fSAndreas Gohr     * @throws Exception
1760337f47fSAndreas Gohr     */
1770337f47fSAndreas Gohr    public function askChatQuestion($question, $history = [])
1780337f47fSAndreas Gohr    {
1790337f47fSAndreas Gohr        if ($history) {
1800337f47fSAndreas Gohr            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
181754b8394SAndreas Gohr            $prev = end($history);
1820337f47fSAndreas Gohr        } else {
1830337f47fSAndreas Gohr            $standaloneQuestion = $question;
184754b8394SAndreas Gohr            $prev = [];
1850337f47fSAndreas Gohr        }
186754b8394SAndreas Gohr        return $this->askQuestion($standaloneQuestion, $prev);
1870337f47fSAndreas Gohr    }
1880337f47fSAndreas Gohr
1890337f47fSAndreas Gohr    /**
1900337f47fSAndreas Gohr     * Ask a single standalone question
1910337f47fSAndreas Gohr     *
1920337f47fSAndreas Gohr     * @param string $question
193754b8394SAndreas Gohr     * @param array $previous [user, ai] of the previous question
1940337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1950337f47fSAndreas Gohr     * @throws Exception
1960337f47fSAndreas Gohr     */
197754b8394SAndreas Gohr    public function askQuestion($question, $previous = [])
1980337f47fSAndreas Gohr    {
199e33a1d7aSAndreas Gohr        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
2009e81bea7SAndreas Gohr        if ($similar) {
201441edf84SAndreas Gohr            $context = implode(
202441edf84SAndreas Gohr                "\n",
203441edf84SAndreas Gohr                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
204441edf84SAndreas Gohr            );
205219268b1SAndreas Gohr            $prompt = $this->getPrompt('question', [
206219268b1SAndreas Gohr                'context' => $context,
207219268b1SAndreas Gohr                'language' => $this->getLanguagePrompt()
208219268b1SAndreas Gohr            ]);
2099e81bea7SAndreas Gohr        } else {
210cfaf6b32SAndreas Gohr            $prompt = $this->getPrompt('noanswer') . ' ' . $this->getLanguagePrompt();
2119e81bea7SAndreas Gohr        }
21268908844SAndreas Gohr
2130337f47fSAndreas Gohr        $messages = [
2140337f47fSAndreas Gohr            [
2150337f47fSAndreas Gohr                'role' => 'system',
2160337f47fSAndreas Gohr                'content' => $prompt
2170337f47fSAndreas Gohr            ],
2180337f47fSAndreas Gohr            [
2190337f47fSAndreas Gohr                'role' => 'user',
2200337f47fSAndreas Gohr                'content' => $question
2210337f47fSAndreas Gohr            ]
2220337f47fSAndreas Gohr        ];
2230337f47fSAndreas Gohr
224754b8394SAndreas Gohr        if ($previous) {
225754b8394SAndreas Gohr            array_unshift($messages, [
226754b8394SAndreas Gohr                'role' => 'assistant',
227754b8394SAndreas Gohr                'content' => $previous[1]
228754b8394SAndreas Gohr            ]);
229754b8394SAndreas Gohr            array_unshift($messages, [
230754b8394SAndreas Gohr                'role' => 'user',
231754b8394SAndreas Gohr                'content' => $previous[0]
232754b8394SAndreas Gohr            ]);
233754b8394SAndreas Gohr        }
234754b8394SAndreas Gohr
2356a18e0f4SAndreas Gohr        $answer = $this->getChatModel()->getAnswer($messages);
2360337f47fSAndreas Gohr
2370337f47fSAndreas Gohr        return [
2380337f47fSAndreas Gohr            'question' => $question,
2390337f47fSAndreas Gohr            'answer' => $answer,
2400337f47fSAndreas Gohr            'sources' => $similar,
2410337f47fSAndreas Gohr        ];
2420337f47fSAndreas Gohr    }
2430337f47fSAndreas Gohr
2440337f47fSAndreas Gohr    /**
2450337f47fSAndreas Gohr     * Rephrase a question into a standalone question based on the chat history
2460337f47fSAndreas Gohr     *
2470337f47fSAndreas Gohr     * @param string $question The original user question
2480337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
2490337f47fSAndreas Gohr     * @return string The rephrased question
2500337f47fSAndreas Gohr     * @throws Exception
2510337f47fSAndreas Gohr     */
2520337f47fSAndreas Gohr    public function rephraseChatQuestion($question, $history)
2530337f47fSAndreas Gohr    {
2540337f47fSAndreas Gohr        // go back in history as far as possible without hitting the token limit
2550337f47fSAndreas Gohr        $chatHistory = '';
2560337f47fSAndreas Gohr        $history = array_reverse($history);
2570337f47fSAndreas Gohr        foreach ($history as $row) {
258f6ef2e50SAndreas Gohr            if (
2599f6b34c4SAndreas Gohr                count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) >
2606a18e0f4SAndreas Gohr                $this->getChatModel()->getMaxRephrasingTokenLength()
261f6ef2e50SAndreas Gohr            ) {
2620337f47fSAndreas Gohr                break;
2630337f47fSAndreas Gohr            }
2640337f47fSAndreas Gohr
2650337f47fSAndreas Gohr            $chatHistory =
2660337f47fSAndreas Gohr                "Human: " . $row[0] . "\n" .
2670337f47fSAndreas Gohr                "Assistant: " . $row[1] . "\n" .
2680337f47fSAndreas Gohr                $chatHistory;
2690337f47fSAndreas Gohr        }
2700337f47fSAndreas Gohr
2710337f47fSAndreas Gohr        // ask openAI to rephrase the question
2720337f47fSAndreas Gohr        $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]);
2730337f47fSAndreas Gohr        $messages = [['role' => 'user', 'content' => $prompt]];
274294a9eafSAndreas Gohr        return $this->getChatModel()->getAnswer($messages);
2750337f47fSAndreas Gohr    }
2760337f47fSAndreas Gohr
2770337f47fSAndreas Gohr    /**
2780337f47fSAndreas Gohr     * Load the given prompt template and fill in the variables
2790337f47fSAndreas Gohr     *
2800337f47fSAndreas Gohr     * @param string $type
2810337f47fSAndreas Gohr     * @param string[] $vars
2820337f47fSAndreas Gohr     * @return string
2830337f47fSAndreas Gohr     */
2840337f47fSAndreas Gohr    protected function getPrompt($type, $vars = [])
2850337f47fSAndreas Gohr    {
2860337f47fSAndreas Gohr        $template = file_get_contents($this->localFN('prompt_' . $type));
2870337f47fSAndreas Gohr
2887ebc7895Ssplitbrain        $replace = [];
2890337f47fSAndreas Gohr        foreach ($vars as $key => $val) {
2900337f47fSAndreas Gohr            $replace['{{' . strtoupper($key) . '}}'] = $val;
2910337f47fSAndreas Gohr        }
2920337f47fSAndreas Gohr
2930337f47fSAndreas Gohr        return strtr($template, $replace);
2940337f47fSAndreas Gohr    }
295219268b1SAndreas Gohr
296219268b1SAndreas Gohr    /**
297219268b1SAndreas Gohr     * Construct the prompt to define the answer language
298219268b1SAndreas Gohr     *
299219268b1SAndreas Gohr     * @return string
300219268b1SAndreas Gohr     */
301219268b1SAndreas Gohr    protected function getLanguagePrompt()
302219268b1SAndreas Gohr    {
303219268b1SAndreas Gohr        global $conf;
304cfaf6b32SAndreas Gohr        $isoLangnames = include(__DIR__ . '/lang/languages.php');
305cfaf6b32SAndreas Gohr
306cfaf6b32SAndreas Gohr        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
307219268b1SAndreas Gohr
308e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
309219268b1SAndreas Gohr            if (isset($isoLangnames[$conf['lang']])) {
310219268b1SAndreas Gohr                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
311219268b1SAndreas Gohr                return $languagePrompt;
312219268b1SAndreas Gohr            }
313219268b1SAndreas Gohr        }
314219268b1SAndreas Gohr
315cfaf6b32SAndreas Gohr        $languagePrompt = 'Always answer in the user\'s language.' .
316cfaf6b32SAndreas Gohr            "If you are unsure about the language, speak $currentLang.";
317219268b1SAndreas Gohr        return $languagePrompt;
318219268b1SAndreas Gohr    }
319e33a1d7aSAndreas Gohr
320e33a1d7aSAndreas Gohr    /**
321e33a1d7aSAndreas Gohr     * Should sources be limited to current language?
322e33a1d7aSAndreas Gohr     *
323e33a1d7aSAndreas Gohr     * @return string The current language code or empty string
324e33a1d7aSAndreas Gohr     */
325e33a1d7aSAndreas Gohr    public function getLanguageLimit()
326e33a1d7aSAndreas Gohr    {
327e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
328e33a1d7aSAndreas Gohr            global $conf;
329e33a1d7aSAndreas Gohr            return $conf['lang'];
330e33a1d7aSAndreas Gohr        } else {
331e33a1d7aSAndreas Gohr            return '';
332e33a1d7aSAndreas Gohr        }
333e33a1d7aSAndreas Gohr    }
334e75dc39fSAndreas Gohr
335e75dc39fSAndreas Gohr    /**
336e75dc39fSAndreas Gohr     * Store info about the last run
337e75dc39fSAndreas Gohr     *
338e75dc39fSAndreas Gohr     * @param array $data
339e75dc39fSAndreas Gohr     * @return void
340e75dc39fSAndreas Gohr     */
341e75dc39fSAndreas Gohr    public function setRunData(array $data)
342e75dc39fSAndreas Gohr    {
343e75dc39fSAndreas Gohr        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
344e75dc39fSAndreas Gohr    }
345e75dc39fSAndreas Gohr
346e75dc39fSAndreas Gohr    /**
347e75dc39fSAndreas Gohr     * Get info about the last run
348e75dc39fSAndreas Gohr     *
349e75dc39fSAndreas Gohr     * @return array
350e75dc39fSAndreas Gohr     */
351e75dc39fSAndreas Gohr    public function getRunData()
352e75dc39fSAndreas Gohr    {
353e75dc39fSAndreas Gohr        if (!file_exists($this->runDataFile)) {
354e75dc39fSAndreas Gohr            return [];
355e75dc39fSAndreas Gohr        }
356e75dc39fSAndreas Gohr        return json_decode(file_get_contents($this->runDataFile), true);
357e75dc39fSAndreas Gohr    }
3580337f47fSAndreas Gohr}
359