xref: /plugin/aichat/helper.php (revision c2b7a1f7fd0f6c6579c9ee46f0437ff89c2fc4b3)
10337f47fSAndreas Gohr<?php
20337f47fSAndreas Gohr
33379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
45e6dd16eSAndreas Gohruse dokuwiki\Extension\Plugin;
5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat;
6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings;
8294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\ChatInterface;
9294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\EmbeddingInterface;
10*c2b7a1f7SAndreas Gohruse dokuwiki\plugin\aichat\ModelFactory;
1101f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage;
120337f47fSAndreas Gohr
130337f47fSAndreas Gohr/**
140337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component)
150337f47fSAndreas Gohr *
160337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
170337f47fSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
180337f47fSAndreas Gohr */
197ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin
200337f47fSAndreas Gohr{
21*c2b7a1f7SAndreas Gohr    /** @var ModelFactory */
22*c2b7a1f7SAndreas Gohr    public $factory;
23*c2b7a1f7SAndreas Gohr
243379af09SAndreas Gohr    /** @var CLIPlugin $logger */
253379af09SAndreas Gohr    protected $logger;
26*c2b7a1f7SAndreas Gohr
270337f47fSAndreas Gohr    /** @var Embeddings */
280337f47fSAndreas Gohr    protected $embeddings;
2901f06932SAndreas Gohr    /** @var AbstractStorage */
3001f06932SAndreas Gohr    protected $storage;
310337f47fSAndreas Gohr
32e75dc39fSAndreas Gohr    /** @var array where to store meta data on the last run */
33e75dc39fSAndreas Gohr    protected $runDataFile;
34e75dc39fSAndreas Gohr
3551aa8517SAndreas Gohr
360337f47fSAndreas Gohr    /**
37f8d5ae01SAndreas Gohr     * Constructor. Initializes vendor autoloader
38f8d5ae01SAndreas Gohr     */
39f8d5ae01SAndreas Gohr    public function __construct()
40f8d5ae01SAndreas Gohr    {
41e75dc39fSAndreas Gohr        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
42e75dc39fSAndreas Gohr        global $conf;
43e75dc39fSAndreas Gohr        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
44d02b7935SAndreas Gohr        $this->loadConfig();
45*c2b7a1f7SAndreas Gohr        $this->factory = new ModelFactory($this->conf);
46f8d5ae01SAndreas Gohr    }
47f8d5ae01SAndreas Gohr
48f8d5ae01SAndreas Gohr    /**
493379af09SAndreas Gohr     * Use the given CLI plugin for logging
503379af09SAndreas Gohr     *
513379af09SAndreas Gohr     * @param CLIPlugin $logger
523379af09SAndreas Gohr     * @return void
533379af09SAndreas Gohr     */
548285fff9SAndreas Gohr    public function setLogger($logger)
558285fff9SAndreas Gohr    {
563379af09SAndreas Gohr        $this->logger = $logger;
573379af09SAndreas Gohr    }
583379af09SAndreas Gohr
593379af09SAndreas Gohr    /**
60c4127b8eSAndreas Gohr     * Check if the current user is allowed to use the plugin (if it has been restricted)
61c4127b8eSAndreas Gohr     *
62c4127b8eSAndreas Gohr     * @return bool
63c4127b8eSAndreas Gohr     */
64c4127b8eSAndreas Gohr    public function userMayAccess()
65c4127b8eSAndreas Gohr    {
66c4127b8eSAndreas Gohr        global $auth;
67c4127b8eSAndreas Gohr        global $USERINFO;
68c4127b8eSAndreas Gohr        global $INPUT;
69c4127b8eSAndreas Gohr
70c4127b8eSAndreas Gohr        if (!$auth) return true;
71c4127b8eSAndreas Gohr        if (!$this->getConf('restrict')) return true;
72c4127b8eSAndreas Gohr        if (!isset($USERINFO)) return false;
73c4127b8eSAndreas Gohr
74c4127b8eSAndreas Gohr        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
75c4127b8eSAndreas Gohr    }
76c4127b8eSAndreas Gohr
77c4127b8eSAndreas Gohr    /**
786a18e0f4SAndreas Gohr     * Access the Chat Model
790337f47fSAndreas Gohr     *
80294a9eafSAndreas Gohr     * @return ChatInterface
810337f47fSAndreas Gohr     */
826a18e0f4SAndreas Gohr    public function getChatModel()
830337f47fSAndreas Gohr    {
84*c2b7a1f7SAndreas Gohr        return $this->factory->getChatModel();
859f6b34c4SAndreas Gohr    }
869f6b34c4SAndreas Gohr
876a18e0f4SAndreas Gohr    /**
8851aa8517SAndreas Gohr     * @return ChatInterface
8951aa8517SAndreas Gohr     */
9051aa8517SAndreas Gohr    public function getRephraseModel()
9151aa8517SAndreas Gohr    {
92*c2b7a1f7SAndreas Gohr        return $this->factory->getRephraseModel();
9351aa8517SAndreas Gohr    }
9451aa8517SAndreas Gohr
9551aa8517SAndreas Gohr    /**
966a18e0f4SAndreas Gohr     * Access the Embedding Model
976a18e0f4SAndreas Gohr     *
98294a9eafSAndreas Gohr     * @return EmbeddingInterface
996a18e0f4SAndreas Gohr     */
100*c2b7a1f7SAndreas Gohr    public function getEmbeddingModel()
1016a18e0f4SAndreas Gohr    {
102*c2b7a1f7SAndreas Gohr        return $this->factory->getEmbeddingModel();
1030337f47fSAndreas Gohr    }
1040337f47fSAndreas Gohr
1050337f47fSAndreas Gohr    /**
1060337f47fSAndreas Gohr     * Access the Embeddings interface
1070337f47fSAndreas Gohr     *
1080337f47fSAndreas Gohr     * @return Embeddings
1090337f47fSAndreas Gohr     */
1100337f47fSAndreas Gohr    public function getEmbeddings()
1110337f47fSAndreas Gohr    {
1126a18e0f4SAndreas Gohr        if ($this->embeddings instanceof Embeddings) {
1136a18e0f4SAndreas Gohr            return $this->embeddings;
1146a18e0f4SAndreas Gohr        }
1156a18e0f4SAndreas Gohr
11634a1c478SAndreas Gohr        $this->embeddings = new Embeddings(
11734a1c478SAndreas Gohr            $this->getChatModel(),
118*c2b7a1f7SAndreas Gohr            $this->getEmbeddingModel(),
11934a1c478SAndreas Gohr            $this->getStorage(),
12034a1c478SAndreas Gohr            $this->conf
12134a1c478SAndreas Gohr        );
1223379af09SAndreas Gohr        if ($this->logger) {
1233379af09SAndreas Gohr            $this->embeddings->setLogger($this->logger);
1243379af09SAndreas Gohr        }
1259f6b34c4SAndreas Gohr
1260337f47fSAndreas Gohr        return $this->embeddings;
1270337f47fSAndreas Gohr    }
1280337f47fSAndreas Gohr
1290337f47fSAndreas Gohr    /**
13001f06932SAndreas Gohr     * Access the Storage interface
13101f06932SAndreas Gohr     *
13201f06932SAndreas Gohr     * @return AbstractStorage
13301f06932SAndreas Gohr     */
13401f06932SAndreas Gohr    public function getStorage()
13501f06932SAndreas Gohr    {
1366a18e0f4SAndreas Gohr        if ($this->storage instanceof AbstractStorage) {
1376a18e0f4SAndreas Gohr            return $this->storage;
1386a18e0f4SAndreas Gohr        }
1396a18e0f4SAndreas Gohr
14004afb84fSAndreas Gohr        $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage';
14104afb84fSAndreas Gohr        $this->storage = new $class($this->conf);
1428285fff9SAndreas Gohr
1433379af09SAndreas Gohr        if ($this->logger) {
1443379af09SAndreas Gohr            $this->storage->setLogger($this->logger);
1453379af09SAndreas Gohr        }
14601f06932SAndreas Gohr
14701f06932SAndreas Gohr        return $this->storage;
14801f06932SAndreas Gohr    }
14901f06932SAndreas Gohr
15001f06932SAndreas Gohr    /**
1510337f47fSAndreas Gohr     * Ask a question with a chat history
1520337f47fSAndreas Gohr     *
1530337f47fSAndreas Gohr     * @param string $question
1540337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
1550337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1560337f47fSAndreas Gohr     * @throws Exception
1570337f47fSAndreas Gohr     */
1580337f47fSAndreas Gohr    public function askChatQuestion($question, $history = [])
1590337f47fSAndreas Gohr    {
16051aa8517SAndreas Gohr        if ($history && $this->getConf('rephraseHistory') > 0) {
1610337f47fSAndreas Gohr            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
1620337f47fSAndreas Gohr        } else {
1630337f47fSAndreas Gohr            $standaloneQuestion = $question;
1640337f47fSAndreas Gohr        }
16534a1c478SAndreas Gohr        return $this->askQuestion($standaloneQuestion, $history);
1660337f47fSAndreas Gohr    }
1670337f47fSAndreas Gohr
1680337f47fSAndreas Gohr    /**
1690337f47fSAndreas Gohr     * Ask a single standalone question
1700337f47fSAndreas Gohr     *
1710337f47fSAndreas Gohr     * @param string $question
17234a1c478SAndreas Gohr     * @param array $history [user, ai] of the previous question
1730337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1740337f47fSAndreas Gohr     * @throws Exception
1750337f47fSAndreas Gohr     */
17634a1c478SAndreas Gohr    public function askQuestion($question, $history = [])
1770337f47fSAndreas Gohr    {
178e33a1d7aSAndreas Gohr        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
1799e81bea7SAndreas Gohr        if ($similar) {
180441edf84SAndreas Gohr            $context = implode(
181441edf84SAndreas Gohr                "\n",
182441edf84SAndreas Gohr                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
183441edf84SAndreas Gohr            );
184219268b1SAndreas Gohr            $prompt = $this->getPrompt('question', [
185219268b1SAndreas Gohr                'context' => $context,
186219268b1SAndreas Gohr            ]);
1879e81bea7SAndreas Gohr        } else {
18834a1c478SAndreas Gohr            $prompt = $this->getPrompt('noanswer');
18934a1c478SAndreas Gohr            $history = [];
1909e81bea7SAndreas Gohr        }
19168908844SAndreas Gohr
19251aa8517SAndreas Gohr        $messages = $this->prepareMessages(
19351aa8517SAndreas Gohr            $this->getChatModel(), $prompt, $question, $history, $this->getConf('chatHistory')
19451aa8517SAndreas Gohr        );
1956a18e0f4SAndreas Gohr        $answer = $this->getChatModel()->getAnswer($messages);
1960337f47fSAndreas Gohr
1970337f47fSAndreas Gohr        return [
1980337f47fSAndreas Gohr            'question' => $question,
1990337f47fSAndreas Gohr            'answer' => $answer,
2000337f47fSAndreas Gohr            'sources' => $similar,
2010337f47fSAndreas Gohr        ];
2020337f47fSAndreas Gohr    }
2030337f47fSAndreas Gohr
2040337f47fSAndreas Gohr    /**
2050337f47fSAndreas Gohr     * Rephrase a question into a standalone question based on the chat history
2060337f47fSAndreas Gohr     *
2070337f47fSAndreas Gohr     * @param string $question The original user question
2080337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
2090337f47fSAndreas Gohr     * @return string The rephrased question
2100337f47fSAndreas Gohr     * @throws Exception
2110337f47fSAndreas Gohr     */
2120337f47fSAndreas Gohr    public function rephraseChatQuestion($question, $history)
2130337f47fSAndreas Gohr    {
21434a1c478SAndreas Gohr        $prompt = $this->getPrompt('rephrase');
21551aa8517SAndreas Gohr        $messages = $this->prepareMessages(
21651aa8517SAndreas Gohr            $this->getRephraseModel(), $prompt, $question, $history, $this->getConf('rephraseHistory')
21751aa8517SAndreas Gohr        );
21851aa8517SAndreas Gohr        return $this->getRephraseModel()->getAnswer($messages);
21934a1c478SAndreas Gohr    }
22034a1c478SAndreas Gohr
22134a1c478SAndreas Gohr    /**
22234a1c478SAndreas Gohr     * Prepare the messages for the AI
22334a1c478SAndreas Gohr     *
22451aa8517SAndreas Gohr     * @param ChatInterface $model The used model
22534a1c478SAndreas Gohr     * @param string $prompt The fully prepared system prompt
22634a1c478SAndreas Gohr     * @param string $question The user question
22734a1c478SAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
22851aa8517SAndreas Gohr     * @param int $historySize The maximum number of messages to use from the history
22934a1c478SAndreas Gohr     * @return array An OpenAI compatible array of messages
23034a1c478SAndreas Gohr     */
23151aa8517SAndreas Gohr    protected function prepareMessages(
23251aa8517SAndreas Gohr        ChatInterface $model, string $prompt, string $question, array $history, int $historySize
23351aa8517SAndreas Gohr    ): array
23434a1c478SAndreas Gohr    {
23534a1c478SAndreas Gohr        // calculate the space for context
23651aa8517SAndreas Gohr        $remainingContext = $model->getMaxInputTokenLength();
23734a1c478SAndreas Gohr        $remainingContext -= $this->countTokens($prompt);
23834a1c478SAndreas Gohr        $remainingContext -= $this->countTokens($question);
23934a1c478SAndreas Gohr        $safetyMargin = $remainingContext * 0.05; // 5% safety margin
24034a1c478SAndreas Gohr        $remainingContext -= $safetyMargin;
24134a1c478SAndreas Gohr        // FIXME we may want to also have an upper limit for the history and not always use the full context
24234a1c478SAndreas Gohr
24351aa8517SAndreas Gohr        $messages = $this->historyMessages($history, $remainingContext, $historySize);
24434a1c478SAndreas Gohr        $messages[] = [
24534a1c478SAndreas Gohr            'role' => 'system',
24634a1c478SAndreas Gohr            'content' => $prompt
24734a1c478SAndreas Gohr        ];
24834a1c478SAndreas Gohr        $messages[] = [
24934a1c478SAndreas Gohr            'role' => 'user',
25034a1c478SAndreas Gohr            'content' => $question
25134a1c478SAndreas Gohr        ];
25234a1c478SAndreas Gohr        return $messages;
25334a1c478SAndreas Gohr    }
25434a1c478SAndreas Gohr
25534a1c478SAndreas Gohr    /**
25634a1c478SAndreas Gohr     * Create an array of OpenAI compatible messages from the given history
25734a1c478SAndreas Gohr     *
25834a1c478SAndreas Gohr     * Only as many messages are used as fit into the token limit
25934a1c478SAndreas Gohr     *
26034a1c478SAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
26151aa8517SAndreas Gohr     * @param int $tokenLimit The maximum number of tokens to use
26251aa8517SAndreas Gohr     * @param int $sizeLimit The maximum number of messages to use
26334a1c478SAndreas Gohr     * @return array
26434a1c478SAndreas Gohr     */
26551aa8517SAndreas Gohr    protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array
26634a1c478SAndreas Gohr    {
26734a1c478SAndreas Gohr        $remainingContext = $tokenLimit;
26834a1c478SAndreas Gohr
26934a1c478SAndreas Gohr        $messages = [];
2700337f47fSAndreas Gohr        $history = array_reverse($history);
27151aa8517SAndreas Gohr        $history = array_slice($history, 0, $sizeLimit);
2720337f47fSAndreas Gohr        foreach ($history as $row) {
27334a1c478SAndreas Gohr            $length = $this->countTokens($row[0] . $row[1]);
27434a1c478SAndreas Gohr            if ($length > $remainingContext) {
2750337f47fSAndreas Gohr                break;
2760337f47fSAndreas Gohr            }
27734a1c478SAndreas Gohr            $remainingContext -= $length;
2780337f47fSAndreas Gohr
27934a1c478SAndreas Gohr            $messages[] = [
28034a1c478SAndreas Gohr                'role' => 'assistant',
28134a1c478SAndreas Gohr                'content' => $row[1]
28234a1c478SAndreas Gohr            ];
28334a1c478SAndreas Gohr            $messages[] = [
28434a1c478SAndreas Gohr                'role' => 'user',
28534a1c478SAndreas Gohr                'content' => $row[0]
28634a1c478SAndreas Gohr            ];
28734a1c478SAndreas Gohr        }
28834a1c478SAndreas Gohr        return array_reverse($messages);
2890337f47fSAndreas Gohr    }
2900337f47fSAndreas Gohr
29134a1c478SAndreas Gohr    /**
29234a1c478SAndreas Gohr     * Get an aproximation of the token count for the given text
29334a1c478SAndreas Gohr     *
29434a1c478SAndreas Gohr     * @param $text
29534a1c478SAndreas Gohr     * @return int
29634a1c478SAndreas Gohr     */
29734a1c478SAndreas Gohr    protected function countTokens($text)
29834a1c478SAndreas Gohr    {
29934a1c478SAndreas Gohr        return count($this->getEmbeddings()->getTokenEncoder()->encode($text));
3000337f47fSAndreas Gohr    }
3010337f47fSAndreas Gohr
3020337f47fSAndreas Gohr    /**
3030337f47fSAndreas Gohr     * Load the given prompt template and fill in the variables
3040337f47fSAndreas Gohr     *
3050337f47fSAndreas Gohr     * @param string $type
3060337f47fSAndreas Gohr     * @param string[] $vars
3070337f47fSAndreas Gohr     * @return string
3080337f47fSAndreas Gohr     */
3090337f47fSAndreas Gohr    protected function getPrompt($type, $vars = [])
3100337f47fSAndreas Gohr    {
3110337f47fSAndreas Gohr        $template = file_get_contents($this->localFN('prompt_' . $type));
31234a1c478SAndreas Gohr        $vars['language'] = $this->getLanguagePrompt();
3130337f47fSAndreas Gohr
3147ebc7895Ssplitbrain        $replace = [];
3150337f47fSAndreas Gohr        foreach ($vars as $key => $val) {
3160337f47fSAndreas Gohr            $replace['{{' . strtoupper($key) . '}}'] = $val;
3170337f47fSAndreas Gohr        }
3180337f47fSAndreas Gohr
3190337f47fSAndreas Gohr        return strtr($template, $replace);
3200337f47fSAndreas Gohr    }
321219268b1SAndreas Gohr
322219268b1SAndreas Gohr    /**
323219268b1SAndreas Gohr     * Construct the prompt to define the answer language
324219268b1SAndreas Gohr     *
325219268b1SAndreas Gohr     * @return string
326219268b1SAndreas Gohr     */
327219268b1SAndreas Gohr    protected function getLanguagePrompt()
328219268b1SAndreas Gohr    {
329219268b1SAndreas Gohr        global $conf;
330cfaf6b32SAndreas Gohr        $isoLangnames = include(__DIR__ . '/lang/languages.php');
331cfaf6b32SAndreas Gohr
332cfaf6b32SAndreas Gohr        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
333219268b1SAndreas Gohr
334e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
335219268b1SAndreas Gohr            if (isset($isoLangnames[$conf['lang']])) {
336219268b1SAndreas Gohr                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
337219268b1SAndreas Gohr                return $languagePrompt;
338219268b1SAndreas Gohr            }
339219268b1SAndreas Gohr        }
340219268b1SAndreas Gohr
341cfaf6b32SAndreas Gohr        $languagePrompt = 'Always answer in the user\'s language. ' .
342cfaf6b32SAndreas Gohr            "If you are unsure about the language, speak $currentLang.";
343219268b1SAndreas Gohr        return $languagePrompt;
344219268b1SAndreas Gohr    }
345e33a1d7aSAndreas Gohr
346e33a1d7aSAndreas Gohr    /**
347e33a1d7aSAndreas Gohr     * Should sources be limited to current language?
348e33a1d7aSAndreas Gohr     *
349e33a1d7aSAndreas Gohr     * @return string The current language code or empty string
350e33a1d7aSAndreas Gohr     */
351e33a1d7aSAndreas Gohr    public function getLanguageLimit()
352e33a1d7aSAndreas Gohr    {
353e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
354e33a1d7aSAndreas Gohr            global $conf;
355e33a1d7aSAndreas Gohr            return $conf['lang'];
356e33a1d7aSAndreas Gohr        } else {
357e33a1d7aSAndreas Gohr            return '';
358e33a1d7aSAndreas Gohr        }
359e33a1d7aSAndreas Gohr    }
360e75dc39fSAndreas Gohr
361e75dc39fSAndreas Gohr    /**
362e75dc39fSAndreas Gohr     * Store info about the last run
363e75dc39fSAndreas Gohr     *
364e75dc39fSAndreas Gohr     * @param array $data
365e75dc39fSAndreas Gohr     * @return void
366e75dc39fSAndreas Gohr     */
367e75dc39fSAndreas Gohr    public function setRunData(array $data)
368e75dc39fSAndreas Gohr    {
369e75dc39fSAndreas Gohr        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
370e75dc39fSAndreas Gohr    }
371e75dc39fSAndreas Gohr
372e75dc39fSAndreas Gohr    /**
373e75dc39fSAndreas Gohr     * Get info about the last run
374e75dc39fSAndreas Gohr     *
375e75dc39fSAndreas Gohr     * @return array
376e75dc39fSAndreas Gohr     */
377e75dc39fSAndreas Gohr    public function getRunData()
378e75dc39fSAndreas Gohr    {
379e75dc39fSAndreas Gohr        if (!file_exists($this->runDataFile)) {
380e75dc39fSAndreas Gohr            return [];
381e75dc39fSAndreas Gohr        }
382e75dc39fSAndreas Gohr        return json_decode(file_get_contents($this->runDataFile), true);
383e75dc39fSAndreas Gohr    }
3840337f47fSAndreas Gohr}
385