xref: /plugin/aichat/helper.php (revision 59a2a267f902aeba84cd501884b8086bd6c4af68)
10337f47fSAndreas Gohr<?php
20337f47fSAndreas Gohr
33379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
45e6dd16eSAndreas Gohruse dokuwiki\Extension\Plugin;
5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat;
6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings;
8294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\ChatInterface;
9294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\EmbeddingInterface;
10c2b7a1f7SAndreas Gohruse dokuwiki\plugin\aichat\ModelFactory;
1101f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage;
120337f47fSAndreas Gohr
130337f47fSAndreas Gohr/**
140337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component)
150337f47fSAndreas Gohr *
160337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
170337f47fSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
180337f47fSAndreas Gohr */
197ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin
200337f47fSAndreas Gohr{
21c2b7a1f7SAndreas Gohr    /** @var ModelFactory */
22c2b7a1f7SAndreas Gohr    public $factory;
23c2b7a1f7SAndreas Gohr
243379af09SAndreas Gohr    /** @var CLIPlugin $logger */
253379af09SAndreas Gohr    protected $logger;
26c2b7a1f7SAndreas Gohr
270337f47fSAndreas Gohr    /** @var Embeddings */
280337f47fSAndreas Gohr    protected $embeddings;
2901f06932SAndreas Gohr    /** @var AbstractStorage */
3001f06932SAndreas Gohr    protected $storage;
310337f47fSAndreas Gohr
32e75dc39fSAndreas Gohr    /** @var array where to store meta data on the last run */
33e75dc39fSAndreas Gohr    protected $runDataFile;
34e75dc39fSAndreas Gohr
3551aa8517SAndreas Gohr
360337f47fSAndreas Gohr    /**
37f8d5ae01SAndreas Gohr     * Constructor. Initializes vendor autoloader
38f8d5ae01SAndreas Gohr     */
39f8d5ae01SAndreas Gohr    public function __construct()
40f8d5ae01SAndreas Gohr    {
41e75dc39fSAndreas Gohr        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
42e75dc39fSAndreas Gohr        global $conf;
43e75dc39fSAndreas Gohr        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
44d02b7935SAndreas Gohr        $this->loadConfig();
45c2b7a1f7SAndreas Gohr        $this->factory = new ModelFactory($this->conf);
46f8d5ae01SAndreas Gohr    }
47f8d5ae01SAndreas Gohr
48f8d5ae01SAndreas Gohr    /**
493379af09SAndreas Gohr     * Use the given CLI plugin for logging
503379af09SAndreas Gohr     *
513379af09SAndreas Gohr     * @param CLIPlugin $logger
523379af09SAndreas Gohr     * @return void
533379af09SAndreas Gohr     */
548285fff9SAndreas Gohr    public function setLogger($logger)
558285fff9SAndreas Gohr    {
563379af09SAndreas Gohr        $this->logger = $logger;
573379af09SAndreas Gohr    }
583379af09SAndreas Gohr
593379af09SAndreas Gohr    /**
60c4127b8eSAndreas Gohr     * Check if the current user is allowed to use the plugin (if it has been restricted)
61c4127b8eSAndreas Gohr     *
62c4127b8eSAndreas Gohr     * @return bool
63c4127b8eSAndreas Gohr     */
64c4127b8eSAndreas Gohr    public function userMayAccess()
65c4127b8eSAndreas Gohr    {
66c4127b8eSAndreas Gohr        global $auth;
67c4127b8eSAndreas Gohr        global $USERINFO;
68c4127b8eSAndreas Gohr        global $INPUT;
69c4127b8eSAndreas Gohr
70c4127b8eSAndreas Gohr        if (!$auth) return true;
71c4127b8eSAndreas Gohr        if (!$this->getConf('restrict')) return true;
72c4127b8eSAndreas Gohr        if (!isset($USERINFO)) return false;
73c4127b8eSAndreas Gohr
74c4127b8eSAndreas Gohr        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
75c4127b8eSAndreas Gohr    }
76c4127b8eSAndreas Gohr
77c4127b8eSAndreas Gohr    /**
786a18e0f4SAndreas Gohr     * Access the Chat Model
790337f47fSAndreas Gohr     *
80294a9eafSAndreas Gohr     * @return ChatInterface
810337f47fSAndreas Gohr     */
826a18e0f4SAndreas Gohr    public function getChatModel()
830337f47fSAndreas Gohr    {
84c2b7a1f7SAndreas Gohr        return $this->factory->getChatModel();
859f6b34c4SAndreas Gohr    }
869f6b34c4SAndreas Gohr
876a18e0f4SAndreas Gohr    /**
8851aa8517SAndreas Gohr     * @return ChatInterface
8951aa8517SAndreas Gohr     */
9051aa8517SAndreas Gohr    public function getRephraseModel()
9151aa8517SAndreas Gohr    {
92c2b7a1f7SAndreas Gohr        return $this->factory->getRephraseModel();
9351aa8517SAndreas Gohr    }
9451aa8517SAndreas Gohr
9551aa8517SAndreas Gohr    /**
966a18e0f4SAndreas Gohr     * Access the Embedding Model
976a18e0f4SAndreas Gohr     *
98294a9eafSAndreas Gohr     * @return EmbeddingInterface
996a18e0f4SAndreas Gohr     */
100c2b7a1f7SAndreas Gohr    public function getEmbeddingModel()
1016a18e0f4SAndreas Gohr    {
102c2b7a1f7SAndreas Gohr        return $this->factory->getEmbeddingModel();
1030337f47fSAndreas Gohr    }
1040337f47fSAndreas Gohr
1050337f47fSAndreas Gohr    /**
1060337f47fSAndreas Gohr     * Access the Embeddings interface
1070337f47fSAndreas Gohr     *
1080337f47fSAndreas Gohr     * @return Embeddings
1090337f47fSAndreas Gohr     */
1100337f47fSAndreas Gohr    public function getEmbeddings()
1110337f47fSAndreas Gohr    {
1126a18e0f4SAndreas Gohr        if ($this->embeddings instanceof Embeddings) {
1136a18e0f4SAndreas Gohr            return $this->embeddings;
1146a18e0f4SAndreas Gohr        }
1156a18e0f4SAndreas Gohr
11634a1c478SAndreas Gohr        $this->embeddings = new Embeddings(
11734a1c478SAndreas Gohr            $this->getChatModel(),
118c2b7a1f7SAndreas Gohr            $this->getEmbeddingModel(),
11934a1c478SAndreas Gohr            $this->getStorage(),
12034a1c478SAndreas Gohr            $this->conf
12134a1c478SAndreas Gohr        );
1223379af09SAndreas Gohr        if ($this->logger) {
1233379af09SAndreas Gohr            $this->embeddings->setLogger($this->logger);
1243379af09SAndreas Gohr        }
1259f6b34c4SAndreas Gohr
1260337f47fSAndreas Gohr        return $this->embeddings;
1270337f47fSAndreas Gohr    }
1280337f47fSAndreas Gohr
1290337f47fSAndreas Gohr    /**
13001f06932SAndreas Gohr     * Access the Storage interface
13101f06932SAndreas Gohr     *
13201f06932SAndreas Gohr     * @return AbstractStorage
13301f06932SAndreas Gohr     */
13401f06932SAndreas Gohr    public function getStorage()
13501f06932SAndreas Gohr    {
1366a18e0f4SAndreas Gohr        if ($this->storage instanceof AbstractStorage) {
1376a18e0f4SAndreas Gohr            return $this->storage;
1386a18e0f4SAndreas Gohr        }
1396a18e0f4SAndreas Gohr
14004afb84fSAndreas Gohr        $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage';
14104afb84fSAndreas Gohr        $this->storage = new $class($this->conf);
1428285fff9SAndreas Gohr
1433379af09SAndreas Gohr        if ($this->logger) {
1443379af09SAndreas Gohr            $this->storage->setLogger($this->logger);
1453379af09SAndreas Gohr        }
14601f06932SAndreas Gohr
14701f06932SAndreas Gohr        return $this->storage;
14801f06932SAndreas Gohr    }
14901f06932SAndreas Gohr
15001f06932SAndreas Gohr    /**
1510337f47fSAndreas Gohr     * Ask a question with a chat history
1520337f47fSAndreas Gohr     *
1530337f47fSAndreas Gohr     * @param string $question
1540337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
1550337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1560337f47fSAndreas Gohr     * @throws Exception
1570337f47fSAndreas Gohr     */
1580337f47fSAndreas Gohr    public function askChatQuestion($question, $history = [])
1590337f47fSAndreas Gohr    {
16051aa8517SAndreas Gohr        if ($history && $this->getConf('rephraseHistory') > 0) {
1610337f47fSAndreas Gohr            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
1620337f47fSAndreas Gohr        } else {
1630337f47fSAndreas Gohr            $standaloneQuestion = $question;
1640337f47fSAndreas Gohr        }
16534a1c478SAndreas Gohr        return $this->askQuestion($standaloneQuestion, $history);
1660337f47fSAndreas Gohr    }
1670337f47fSAndreas Gohr
1680337f47fSAndreas Gohr    /**
1690337f47fSAndreas Gohr     * Ask a single standalone question
1700337f47fSAndreas Gohr     *
1710337f47fSAndreas Gohr     * @param string $question
17234a1c478SAndreas Gohr     * @param array $history [user, ai] of the previous question
1730337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1740337f47fSAndreas Gohr     * @throws Exception
1750337f47fSAndreas Gohr     */
17634a1c478SAndreas Gohr    public function askQuestion($question, $history = [])
1770337f47fSAndreas Gohr    {
178e33a1d7aSAndreas Gohr        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
1799e81bea7SAndreas Gohr        if ($similar) {
180441edf84SAndreas Gohr            $context = implode(
181441edf84SAndreas Gohr                "\n",
182441edf84SAndreas Gohr                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
183441edf84SAndreas Gohr            );
184219268b1SAndreas Gohr            $prompt = $this->getPrompt('question', [
185219268b1SAndreas Gohr                'context' => $context,
186*59a2a267SAndreas Gohr                'question' => $question,
187219268b1SAndreas Gohr            ]);
1889e81bea7SAndreas Gohr        } else {
189*59a2a267SAndreas Gohr            $prompt = $this->getPrompt('noanswer', [
190*59a2a267SAndreas Gohr                'question' => $question,
191*59a2a267SAndreas Gohr            ]);
19234a1c478SAndreas Gohr            $history = [];
1939e81bea7SAndreas Gohr        }
19468908844SAndreas Gohr
19551aa8517SAndreas Gohr        $messages = $this->prepareMessages(
196*59a2a267SAndreas Gohr            $this->getChatModel(), $prompt, $history, $this->getConf('chatHistory')
19751aa8517SAndreas Gohr        );
1986a18e0f4SAndreas Gohr        $answer = $this->getChatModel()->getAnswer($messages);
1990337f47fSAndreas Gohr
2000337f47fSAndreas Gohr        return [
2010337f47fSAndreas Gohr            'question' => $question,
2020337f47fSAndreas Gohr            'answer' => $answer,
2030337f47fSAndreas Gohr            'sources' => $similar,
2040337f47fSAndreas Gohr        ];
2050337f47fSAndreas Gohr    }
2060337f47fSAndreas Gohr
2070337f47fSAndreas Gohr    /**
2080337f47fSAndreas Gohr     * Rephrase a question into a standalone question based on the chat history
2090337f47fSAndreas Gohr     *
2100337f47fSAndreas Gohr     * @param string $question The original user question
2110337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
2120337f47fSAndreas Gohr     * @return string The rephrased question
2130337f47fSAndreas Gohr     * @throws Exception
2140337f47fSAndreas Gohr     */
2150337f47fSAndreas Gohr    public function rephraseChatQuestion($question, $history)
2160337f47fSAndreas Gohr    {
217*59a2a267SAndreas Gohr        $prompt = $this->getPrompt('rephrase', [
218*59a2a267SAndreas Gohr            'question' => $question,
219*59a2a267SAndreas Gohr        ]);
22051aa8517SAndreas Gohr        $messages = $this->prepareMessages(
221*59a2a267SAndreas Gohr            $this->getRephraseModel(), $prompt, $history, $this->getConf('rephraseHistory')
22251aa8517SAndreas Gohr        );
22351aa8517SAndreas Gohr        return $this->getRephraseModel()->getAnswer($messages);
22434a1c478SAndreas Gohr    }
22534a1c478SAndreas Gohr
22634a1c478SAndreas Gohr    /**
22734a1c478SAndreas Gohr     * Prepare the messages for the AI
22834a1c478SAndreas Gohr     *
22951aa8517SAndreas Gohr     * @param ChatInterface $model The used model
230*59a2a267SAndreas Gohr     * @param string $promptedQuestion The user question embedded in a prompt
23134a1c478SAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
23251aa8517SAndreas Gohr     * @param int $historySize The maximum number of messages to use from the history
23334a1c478SAndreas Gohr     * @return array An OpenAI compatible array of messages
23434a1c478SAndreas Gohr     */
23551aa8517SAndreas Gohr    protected function prepareMessages(
236*59a2a267SAndreas Gohr        ChatInterface $model, string $promptedQuestion, array $history, int $historySize
23751aa8517SAndreas Gohr    ): array
23834a1c478SAndreas Gohr    {
23934a1c478SAndreas Gohr        // calculate the space for context
24051aa8517SAndreas Gohr        $remainingContext = $model->getMaxInputTokenLength();
241*59a2a267SAndreas Gohr        $remainingContext -= $this->countTokens($promptedQuestion);
24234a1c478SAndreas Gohr        $safetyMargin = $remainingContext * 0.05; // 5% safety margin
24334a1c478SAndreas Gohr        $remainingContext -= $safetyMargin;
24434a1c478SAndreas Gohr        // FIXME we may want to also have an upper limit for the history and not always use the full context
24534a1c478SAndreas Gohr
24651aa8517SAndreas Gohr        $messages = $this->historyMessages($history, $remainingContext, $historySize);
24734a1c478SAndreas Gohr        $messages[] = [
24834a1c478SAndreas Gohr            'role' => 'user',
249*59a2a267SAndreas Gohr            'content' => $promptedQuestion
25034a1c478SAndreas Gohr        ];
25134a1c478SAndreas Gohr        return $messages;
25234a1c478SAndreas Gohr    }
25334a1c478SAndreas Gohr
25434a1c478SAndreas Gohr    /**
25534a1c478SAndreas Gohr     * Create an array of OpenAI compatible messages from the given history
25634a1c478SAndreas Gohr     *
25734a1c478SAndreas Gohr     * Only as many messages are used as fit into the token limit
25834a1c478SAndreas Gohr     *
25934a1c478SAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
26051aa8517SAndreas Gohr     * @param int $tokenLimit The maximum number of tokens to use
26151aa8517SAndreas Gohr     * @param int $sizeLimit The maximum number of messages to use
26234a1c478SAndreas Gohr     * @return array
26334a1c478SAndreas Gohr     */
26451aa8517SAndreas Gohr    protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array
26534a1c478SAndreas Gohr    {
26634a1c478SAndreas Gohr        $remainingContext = $tokenLimit;
26734a1c478SAndreas Gohr
26834a1c478SAndreas Gohr        $messages = [];
2690337f47fSAndreas Gohr        $history = array_reverse($history);
27051aa8517SAndreas Gohr        $history = array_slice($history, 0, $sizeLimit);
2710337f47fSAndreas Gohr        foreach ($history as $row) {
27234a1c478SAndreas Gohr            $length = $this->countTokens($row[0] . $row[1]);
27334a1c478SAndreas Gohr            if ($length > $remainingContext) {
2740337f47fSAndreas Gohr                break;
2750337f47fSAndreas Gohr            }
27634a1c478SAndreas Gohr            $remainingContext -= $length;
2770337f47fSAndreas Gohr
27834a1c478SAndreas Gohr            $messages[] = [
27934a1c478SAndreas Gohr                'role' => 'assistant',
28034a1c478SAndreas Gohr                'content' => $row[1]
28134a1c478SAndreas Gohr            ];
28234a1c478SAndreas Gohr            $messages[] = [
28334a1c478SAndreas Gohr                'role' => 'user',
28434a1c478SAndreas Gohr                'content' => $row[0]
28534a1c478SAndreas Gohr            ];
28634a1c478SAndreas Gohr        }
28734a1c478SAndreas Gohr        return array_reverse($messages);
2880337f47fSAndreas Gohr    }
2890337f47fSAndreas Gohr
29034a1c478SAndreas Gohr    /**
29134a1c478SAndreas Gohr     * Get an aproximation of the token count for the given text
29234a1c478SAndreas Gohr     *
29334a1c478SAndreas Gohr     * @param $text
29434a1c478SAndreas Gohr     * @return int
29534a1c478SAndreas Gohr     */
29634a1c478SAndreas Gohr    protected function countTokens($text)
29734a1c478SAndreas Gohr    {
29834a1c478SAndreas Gohr        return count($this->getEmbeddings()->getTokenEncoder()->encode($text));
2990337f47fSAndreas Gohr    }
3000337f47fSAndreas Gohr
3010337f47fSAndreas Gohr    /**
3020337f47fSAndreas Gohr     * Load the given prompt template and fill in the variables
3030337f47fSAndreas Gohr     *
3040337f47fSAndreas Gohr     * @param string $type
3050337f47fSAndreas Gohr     * @param string[] $vars
3060337f47fSAndreas Gohr     * @return string
3070337f47fSAndreas Gohr     */
3080337f47fSAndreas Gohr    protected function getPrompt($type, $vars = [])
3090337f47fSAndreas Gohr    {
310*59a2a267SAndreas Gohr        $template = file_get_contents($this->localFN($type, 'prompt'));
31134a1c478SAndreas Gohr        $vars['language'] = $this->getLanguagePrompt();
3120337f47fSAndreas Gohr
3137ebc7895Ssplitbrain        $replace = [];
3140337f47fSAndreas Gohr        foreach ($vars as $key => $val) {
3150337f47fSAndreas Gohr            $replace['{{' . strtoupper($key) . '}}'] = $val;
3160337f47fSAndreas Gohr        }
3170337f47fSAndreas Gohr
3180337f47fSAndreas Gohr        return strtr($template, $replace);
3190337f47fSAndreas Gohr    }
320219268b1SAndreas Gohr
321219268b1SAndreas Gohr    /**
322219268b1SAndreas Gohr     * Construct the prompt to define the answer language
323219268b1SAndreas Gohr     *
324219268b1SAndreas Gohr     * @return string
325219268b1SAndreas Gohr     */
326219268b1SAndreas Gohr    protected function getLanguagePrompt()
327219268b1SAndreas Gohr    {
328219268b1SAndreas Gohr        global $conf;
329cfaf6b32SAndreas Gohr        $isoLangnames = include(__DIR__ . '/lang/languages.php');
330cfaf6b32SAndreas Gohr
331cfaf6b32SAndreas Gohr        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
332219268b1SAndreas Gohr
333e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
334219268b1SAndreas Gohr            if (isset($isoLangnames[$conf['lang']])) {
335219268b1SAndreas Gohr                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
336219268b1SAndreas Gohr                return $languagePrompt;
337219268b1SAndreas Gohr            }
338219268b1SAndreas Gohr        }
339219268b1SAndreas Gohr
340cfaf6b32SAndreas Gohr        $languagePrompt = 'Always answer in the user\'s language. ' .
341cfaf6b32SAndreas Gohr            "If you are unsure about the language, speak $currentLang.";
342219268b1SAndreas Gohr        return $languagePrompt;
343219268b1SAndreas Gohr    }
344e33a1d7aSAndreas Gohr
345e33a1d7aSAndreas Gohr    /**
346e33a1d7aSAndreas Gohr     * Should sources be limited to current language?
347e33a1d7aSAndreas Gohr     *
348e33a1d7aSAndreas Gohr     * @return string The current language code or empty string
349e33a1d7aSAndreas Gohr     */
350e33a1d7aSAndreas Gohr    public function getLanguageLimit()
351e33a1d7aSAndreas Gohr    {
352e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
353e33a1d7aSAndreas Gohr            global $conf;
354e33a1d7aSAndreas Gohr            return $conf['lang'];
355e33a1d7aSAndreas Gohr        } else {
356e33a1d7aSAndreas Gohr            return '';
357e33a1d7aSAndreas Gohr        }
358e33a1d7aSAndreas Gohr    }
359e75dc39fSAndreas Gohr
360e75dc39fSAndreas Gohr    /**
361e75dc39fSAndreas Gohr     * Store info about the last run
362e75dc39fSAndreas Gohr     *
363e75dc39fSAndreas Gohr     * @param array $data
364e75dc39fSAndreas Gohr     * @return void
365e75dc39fSAndreas Gohr     */
366e75dc39fSAndreas Gohr    public function setRunData(array $data)
367e75dc39fSAndreas Gohr    {
368e75dc39fSAndreas Gohr        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
369e75dc39fSAndreas Gohr    }
370e75dc39fSAndreas Gohr
371e75dc39fSAndreas Gohr    /**
372e75dc39fSAndreas Gohr     * Get info about the last run
373e75dc39fSAndreas Gohr     *
374e75dc39fSAndreas Gohr     * @return array
375e75dc39fSAndreas Gohr     */
376e75dc39fSAndreas Gohr    public function getRunData()
377e75dc39fSAndreas Gohr    {
378e75dc39fSAndreas Gohr        if (!file_exists($this->runDataFile)) {
379e75dc39fSAndreas Gohr            return [];
380e75dc39fSAndreas Gohr        }
381e75dc39fSAndreas Gohr        return json_decode(file_get_contents($this->runDataFile), true);
382e75dc39fSAndreas Gohr    }
3830337f47fSAndreas Gohr}
384