xref: /plugin/aichat/helper.php (revision 04afb84f6cb8a0c9b1d4d807e18f90fe739ec371)
10337f47fSAndreas Gohr<?php
20337f47fSAndreas Gohr
33379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
45e6dd16eSAndreas Gohruse dokuwiki\Extension\Plugin;
5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat;
6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings;
8294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\ChatInterface;
9294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\EmbeddingInterface;
1034a1c478SAndreas Gohruse dokuwiki\plugin\aichat\Model\OpenAI\Embedding3Small;
1101f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage;
120337f47fSAndreas Gohr
130337f47fSAndreas Gohr/**
140337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component)
150337f47fSAndreas Gohr *
160337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
170337f47fSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
180337f47fSAndreas Gohr */
197ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin
200337f47fSAndreas Gohr{
213379af09SAndreas Gohr    /** @var CLIPlugin $logger */
223379af09SAndreas Gohr    protected $logger;
23294a9eafSAndreas Gohr    /** @var ChatInterface */
246a18e0f4SAndreas Gohr    protected $chatModel;
25294a9eafSAndreas Gohr    /** @var EmbeddingInterface */
266a18e0f4SAndreas Gohr    protected $embedModel;
270337f47fSAndreas Gohr    /** @var Embeddings */
280337f47fSAndreas Gohr    protected $embeddings;
2901f06932SAndreas Gohr    /** @var AbstractStorage */
3001f06932SAndreas Gohr    protected $storage;
310337f47fSAndreas Gohr
32e75dc39fSAndreas Gohr    /** @var array where to store meta data on the last run */
33e75dc39fSAndreas Gohr    protected $runDataFile;
34e75dc39fSAndreas Gohr
350337f47fSAndreas Gohr    /**
36f8d5ae01SAndreas Gohr     * Constructor. Initializes vendor autoloader
37f8d5ae01SAndreas Gohr     */
38f8d5ae01SAndreas Gohr    public function __construct()
39f8d5ae01SAndreas Gohr    {
40e75dc39fSAndreas Gohr        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
41e75dc39fSAndreas Gohr        global $conf;
42e75dc39fSAndreas Gohr        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
43d02b7935SAndreas Gohr        $this->loadConfig();
44f8d5ae01SAndreas Gohr    }
45f8d5ae01SAndreas Gohr
46f8d5ae01SAndreas Gohr    /**
473379af09SAndreas Gohr     * Use the given CLI plugin for logging
483379af09SAndreas Gohr     *
493379af09SAndreas Gohr     * @param CLIPlugin $logger
503379af09SAndreas Gohr     * @return void
513379af09SAndreas Gohr     */
528285fff9SAndreas Gohr    public function setLogger($logger)
538285fff9SAndreas Gohr    {
543379af09SAndreas Gohr        $this->logger = $logger;
553379af09SAndreas Gohr    }
563379af09SAndreas Gohr
573379af09SAndreas Gohr    /**
58c4127b8eSAndreas Gohr     * Check if the current user is allowed to use the plugin (if it has been restricted)
59c4127b8eSAndreas Gohr     *
60c4127b8eSAndreas Gohr     * @return bool
61c4127b8eSAndreas Gohr     */
62c4127b8eSAndreas Gohr    public function userMayAccess()
63c4127b8eSAndreas Gohr    {
64c4127b8eSAndreas Gohr        global $auth;
65c4127b8eSAndreas Gohr        global $USERINFO;
66c4127b8eSAndreas Gohr        global $INPUT;
67c4127b8eSAndreas Gohr
68c4127b8eSAndreas Gohr        if (!$auth) return true;
69c4127b8eSAndreas Gohr        if (!$this->getConf('restrict')) return true;
70c4127b8eSAndreas Gohr        if (!isset($USERINFO)) return false;
71c4127b8eSAndreas Gohr
72c4127b8eSAndreas Gohr        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
73c4127b8eSAndreas Gohr    }
74c4127b8eSAndreas Gohr
75c4127b8eSAndreas Gohr    /**
766a18e0f4SAndreas Gohr     * Access the Chat Model
770337f47fSAndreas Gohr     *
78294a9eafSAndreas Gohr     * @return ChatInterface
790337f47fSAndreas Gohr     */
806a18e0f4SAndreas Gohr    public function getChatModel()
810337f47fSAndreas Gohr    {
82294a9eafSAndreas Gohr        if ($this->chatModel instanceof ChatInterface) {
836a18e0f4SAndreas Gohr            return $this->chatModel;
846a18e0f4SAndreas Gohr        }
856a18e0f4SAndreas Gohr
86dce0dee5SAndreas Gohr        [$namespace, $name] = sexplode(' ', $this->getConf('chatmodel'), 2);
87dce0dee5SAndreas Gohr        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\ChatModel';
88d02b7935SAndreas Gohr
899f6b34c4SAndreas Gohr        if (!class_exists($class)) {
90dce0dee5SAndreas Gohr            throw new \RuntimeException('No ChatModel found for ' . $namespace);
919f6b34c4SAndreas Gohr        }
92d02b7935SAndreas Gohr
93dce0dee5SAndreas Gohr        $this->chatModel = new $class($name, $this->conf);
946a18e0f4SAndreas Gohr        return $this->chatModel;
959f6b34c4SAndreas Gohr    }
969f6b34c4SAndreas Gohr
976a18e0f4SAndreas Gohr    /**
986a18e0f4SAndreas Gohr     * Access the Embedding Model
996a18e0f4SAndreas Gohr     *
100294a9eafSAndreas Gohr     * @return EmbeddingInterface
1016a18e0f4SAndreas Gohr     */
1026a18e0f4SAndreas Gohr    public function getEmbedModel()
1036a18e0f4SAndreas Gohr    {
104294a9eafSAndreas Gohr        if ($this->embedModel instanceof EmbeddingInterface) {
1056a18e0f4SAndreas Gohr            return $this->embedModel;
1060337f47fSAndreas Gohr        }
1070337f47fSAndreas Gohr
108dce0dee5SAndreas Gohr        [$namespace, $name] = sexplode(' ', $this->getConf('embedmodel'), 2);
109dce0dee5SAndreas Gohr        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\EmbeddingModel';
1106a18e0f4SAndreas Gohr
111dce0dee5SAndreas Gohr        if (!class_exists($class)) {
112dce0dee5SAndreas Gohr            throw new \RuntimeException('No EmbeddingModel found for ' . $namespace);
113dce0dee5SAndreas Gohr        }
114dce0dee5SAndreas Gohr
115dce0dee5SAndreas Gohr        $this->embedModel = new $class($name, $this->conf);
1166a18e0f4SAndreas Gohr        return $this->embedModel;
1176a18e0f4SAndreas Gohr    }
1186a18e0f4SAndreas Gohr
1196a18e0f4SAndreas Gohr
1200337f47fSAndreas Gohr    /**
1210337f47fSAndreas Gohr     * Access the Embeddings interface
1220337f47fSAndreas Gohr     *
1230337f47fSAndreas Gohr     * @return Embeddings
1240337f47fSAndreas Gohr     */
1250337f47fSAndreas Gohr    public function getEmbeddings()
1260337f47fSAndreas Gohr    {
1276a18e0f4SAndreas Gohr        if ($this->embeddings instanceof Embeddings) {
1286a18e0f4SAndreas Gohr            return $this->embeddings;
1296a18e0f4SAndreas Gohr        }
1306a18e0f4SAndreas Gohr
13134a1c478SAndreas Gohr        $this->embeddings = new Embeddings(
13234a1c478SAndreas Gohr            $this->getChatModel(),
13334a1c478SAndreas Gohr            $this->getEmbedModel(),
13434a1c478SAndreas Gohr            $this->getStorage(),
13534a1c478SAndreas Gohr            $this->conf
13634a1c478SAndreas Gohr        );
1373379af09SAndreas Gohr        if ($this->logger) {
1383379af09SAndreas Gohr            $this->embeddings->setLogger($this->logger);
1393379af09SAndreas Gohr        }
1409f6b34c4SAndreas Gohr
1410337f47fSAndreas Gohr        return $this->embeddings;
1420337f47fSAndreas Gohr    }
1430337f47fSAndreas Gohr
1440337f47fSAndreas Gohr    /**
14501f06932SAndreas Gohr     * Access the Storage interface
14601f06932SAndreas Gohr     *
14701f06932SAndreas Gohr     * @return AbstractStorage
14801f06932SAndreas Gohr     */
14901f06932SAndreas Gohr    public function getStorage()
15001f06932SAndreas Gohr    {
1516a18e0f4SAndreas Gohr        if ($this->storage instanceof AbstractStorage) {
1526a18e0f4SAndreas Gohr            return $this->storage;
1536a18e0f4SAndreas Gohr        }
1546a18e0f4SAndreas Gohr
155*04afb84fSAndreas Gohr        $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage';
156*04afb84fSAndreas Gohr        $this->storage = new $class($this->conf);
1578285fff9SAndreas Gohr
1583379af09SAndreas Gohr        if ($this->logger) {
1593379af09SAndreas Gohr            $this->storage->setLogger($this->logger);
1603379af09SAndreas Gohr        }
16101f06932SAndreas Gohr
16201f06932SAndreas Gohr        return $this->storage;
16301f06932SAndreas Gohr    }
16401f06932SAndreas Gohr
16501f06932SAndreas Gohr    /**
1660337f47fSAndreas Gohr     * Ask a question with a chat history
1670337f47fSAndreas Gohr     *
1680337f47fSAndreas Gohr     * @param string $question
1690337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
1700337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1710337f47fSAndreas Gohr     * @throws Exception
1720337f47fSAndreas Gohr     */
1730337f47fSAndreas Gohr    public function askChatQuestion($question, $history = [])
1740337f47fSAndreas Gohr    {
1750337f47fSAndreas Gohr        if ($history) {
1760337f47fSAndreas Gohr            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
1770337f47fSAndreas Gohr        } else {
1780337f47fSAndreas Gohr            $standaloneQuestion = $question;
1790337f47fSAndreas Gohr        }
18034a1c478SAndreas Gohr        return $this->askQuestion($standaloneQuestion, $history);
1810337f47fSAndreas Gohr    }
1820337f47fSAndreas Gohr
1830337f47fSAndreas Gohr    /**
1840337f47fSAndreas Gohr     * Ask a single standalone question
1850337f47fSAndreas Gohr     *
1860337f47fSAndreas Gohr     * @param string $question
18734a1c478SAndreas Gohr     * @param array $history [user, ai] of the previous question
1880337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1890337f47fSAndreas Gohr     * @throws Exception
1900337f47fSAndreas Gohr     */
19134a1c478SAndreas Gohr    public function askQuestion($question, $history = [])
1920337f47fSAndreas Gohr    {
193e33a1d7aSAndreas Gohr        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
1949e81bea7SAndreas Gohr        if ($similar) {
195441edf84SAndreas Gohr            $context = implode(
196441edf84SAndreas Gohr                "\n",
197441edf84SAndreas Gohr                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
198441edf84SAndreas Gohr            );
199219268b1SAndreas Gohr            $prompt = $this->getPrompt('question', [
200219268b1SAndreas Gohr                'context' => $context,
201219268b1SAndreas Gohr            ]);
2029e81bea7SAndreas Gohr        } else {
20334a1c478SAndreas Gohr            $prompt = $this->getPrompt('noanswer');
20434a1c478SAndreas Gohr            $history = [];
2059e81bea7SAndreas Gohr        }
20668908844SAndreas Gohr
20734a1c478SAndreas Gohr        $messages = $this->prepareMessages($prompt, $question, $history);
2086a18e0f4SAndreas Gohr        $answer = $this->getChatModel()->getAnswer($messages);
2090337f47fSAndreas Gohr
2100337f47fSAndreas Gohr        return [
2110337f47fSAndreas Gohr            'question' => $question,
2120337f47fSAndreas Gohr            'answer' => $answer,
2130337f47fSAndreas Gohr            'sources' => $similar,
2140337f47fSAndreas Gohr        ];
2150337f47fSAndreas Gohr    }
2160337f47fSAndreas Gohr
2170337f47fSAndreas Gohr    /**
2180337f47fSAndreas Gohr     * Rephrase a question into a standalone question based on the chat history
2190337f47fSAndreas Gohr     *
2200337f47fSAndreas Gohr     * @param string $question The original user question
2210337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
2220337f47fSAndreas Gohr     * @return string The rephrased question
2230337f47fSAndreas Gohr     * @throws Exception
2240337f47fSAndreas Gohr     */
2250337f47fSAndreas Gohr    public function rephraseChatQuestion($question, $history)
2260337f47fSAndreas Gohr    {
22734a1c478SAndreas Gohr        $prompt = $this->getPrompt('rephrase');
22834a1c478SAndreas Gohr        $messages = $this->prepareMessages($prompt, $question, $history);
22934a1c478SAndreas Gohr        return $this->getChatModel()->getAnswer($messages);
23034a1c478SAndreas Gohr    }
23134a1c478SAndreas Gohr
23234a1c478SAndreas Gohr    /**
23334a1c478SAndreas Gohr     * Prepare the messages for the AI
23434a1c478SAndreas Gohr     *
23534a1c478SAndreas Gohr     * @param string $prompt The fully prepared system prompt
23634a1c478SAndreas Gohr     * @param string $question The user question
23734a1c478SAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
23834a1c478SAndreas Gohr     * @return array An OpenAI compatible array of messages
23934a1c478SAndreas Gohr     */
24034a1c478SAndreas Gohr    protected function prepareMessages($prompt, $question, $history)
24134a1c478SAndreas Gohr    {
24234a1c478SAndreas Gohr        // calculate the space for context
24334a1c478SAndreas Gohr        $remainingContext = $this->getChatModel()->getMaxInputTokenLength();
24434a1c478SAndreas Gohr        $remainingContext -= $this->countTokens($prompt);
24534a1c478SAndreas Gohr        $remainingContext -= $this->countTokens($question);
24634a1c478SAndreas Gohr        $safetyMargin = $remainingContext * 0.05; // 5% safety margin
24734a1c478SAndreas Gohr        $remainingContext -= $safetyMargin;
24834a1c478SAndreas Gohr        // FIXME we may want to also have an upper limit for the history and not always use the full context
24934a1c478SAndreas Gohr
25034a1c478SAndreas Gohr        $messages = $this->historyMessages($history, $remainingContext);
25134a1c478SAndreas Gohr        $messages[] = [
25234a1c478SAndreas Gohr            'role' => 'system',
25334a1c478SAndreas Gohr            'content' => $prompt
25434a1c478SAndreas Gohr        ];
25534a1c478SAndreas Gohr        $messages[] = [
25634a1c478SAndreas Gohr            'role' => 'user',
25734a1c478SAndreas Gohr            'content' => $question
25834a1c478SAndreas Gohr        ];
25934a1c478SAndreas Gohr        return $messages;
26034a1c478SAndreas Gohr    }
26134a1c478SAndreas Gohr
26234a1c478SAndreas Gohr    /**
26334a1c478SAndreas Gohr     * Create an array of OpenAI compatible messages from the given history
26434a1c478SAndreas Gohr     *
26534a1c478SAndreas Gohr     * Only as many messages are used as fit into the token limit
26634a1c478SAndreas Gohr     *
26734a1c478SAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
26834a1c478SAndreas Gohr     * @param int $tokenLimit
26934a1c478SAndreas Gohr     * @return array
27034a1c478SAndreas Gohr     */
27134a1c478SAndreas Gohr    protected function historyMessages($history, $tokenLimit)
27234a1c478SAndreas Gohr    {
27334a1c478SAndreas Gohr        $remainingContext = $tokenLimit;
27434a1c478SAndreas Gohr
27534a1c478SAndreas Gohr        $messages = [];
2760337f47fSAndreas Gohr        $history = array_reverse($history);
2770337f47fSAndreas Gohr        foreach ($history as $row) {
27834a1c478SAndreas Gohr            $length = $this->countTokens($row[0] . $row[1]);
27934a1c478SAndreas Gohr            if ($length > $remainingContext) {
2800337f47fSAndreas Gohr                break;
2810337f47fSAndreas Gohr            }
28234a1c478SAndreas Gohr            $remainingContext -= $length;
2830337f47fSAndreas Gohr
28434a1c478SAndreas Gohr            $messages[] = [
28534a1c478SAndreas Gohr                'role' => 'assistant',
28634a1c478SAndreas Gohr                'content' => $row[1]
28734a1c478SAndreas Gohr            ];
28834a1c478SAndreas Gohr            $messages[] = [
28934a1c478SAndreas Gohr                'role' => 'user',
29034a1c478SAndreas Gohr                'content' => $row[0]
29134a1c478SAndreas Gohr            ];
29234a1c478SAndreas Gohr        }
29334a1c478SAndreas Gohr        return array_reverse($messages);
2940337f47fSAndreas Gohr    }
2950337f47fSAndreas Gohr
29634a1c478SAndreas Gohr    /**
29734a1c478SAndreas Gohr     * Get an aproximation of the token count for the given text
29834a1c478SAndreas Gohr     *
29934a1c478SAndreas Gohr     * @param $text
30034a1c478SAndreas Gohr     * @return int
30134a1c478SAndreas Gohr     */
30234a1c478SAndreas Gohr    protected function countTokens($text)
30334a1c478SAndreas Gohr    {
30434a1c478SAndreas Gohr        return count($this->getEmbeddings()->getTokenEncoder()->encode($text));
3050337f47fSAndreas Gohr    }
3060337f47fSAndreas Gohr
3070337f47fSAndreas Gohr    /**
3080337f47fSAndreas Gohr     * Load the given prompt template and fill in the variables
3090337f47fSAndreas Gohr     *
3100337f47fSAndreas Gohr     * @param string $type
3110337f47fSAndreas Gohr     * @param string[] $vars
3120337f47fSAndreas Gohr     * @return string
3130337f47fSAndreas Gohr     */
3140337f47fSAndreas Gohr    protected function getPrompt($type, $vars = [])
3150337f47fSAndreas Gohr    {
3160337f47fSAndreas Gohr        $template = file_get_contents($this->localFN('prompt_' . $type));
31734a1c478SAndreas Gohr        $vars['language'] = $this->getLanguagePrompt();
3180337f47fSAndreas Gohr
3197ebc7895Ssplitbrain        $replace = [];
3200337f47fSAndreas Gohr        foreach ($vars as $key => $val) {
3210337f47fSAndreas Gohr            $replace['{{' . strtoupper($key) . '}}'] = $val;
3220337f47fSAndreas Gohr        }
3230337f47fSAndreas Gohr
3240337f47fSAndreas Gohr        return strtr($template, $replace);
3250337f47fSAndreas Gohr    }
326219268b1SAndreas Gohr
327219268b1SAndreas Gohr    /**
328219268b1SAndreas Gohr     * Construct the prompt to define the answer language
329219268b1SAndreas Gohr     *
330219268b1SAndreas Gohr     * @return string
331219268b1SAndreas Gohr     */
332219268b1SAndreas Gohr    protected function getLanguagePrompt()
333219268b1SAndreas Gohr    {
334219268b1SAndreas Gohr        global $conf;
335cfaf6b32SAndreas Gohr        $isoLangnames = include(__DIR__ . '/lang/languages.php');
336cfaf6b32SAndreas Gohr
337cfaf6b32SAndreas Gohr        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
338219268b1SAndreas Gohr
339e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
340219268b1SAndreas Gohr            if (isset($isoLangnames[$conf['lang']])) {
341219268b1SAndreas Gohr                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
342219268b1SAndreas Gohr                return $languagePrompt;
343219268b1SAndreas Gohr            }
344219268b1SAndreas Gohr        }
345219268b1SAndreas Gohr
346cfaf6b32SAndreas Gohr        $languagePrompt = 'Always answer in the user\'s language. ' .
347cfaf6b32SAndreas Gohr            "If you are unsure about the language, speak $currentLang.";
348219268b1SAndreas Gohr        return $languagePrompt;
349219268b1SAndreas Gohr    }
350e33a1d7aSAndreas Gohr
351e33a1d7aSAndreas Gohr    /**
352e33a1d7aSAndreas Gohr     * Should sources be limited to current language?
353e33a1d7aSAndreas Gohr     *
354e33a1d7aSAndreas Gohr     * @return string The current language code or empty string
355e33a1d7aSAndreas Gohr     */
356e33a1d7aSAndreas Gohr    public function getLanguageLimit()
357e33a1d7aSAndreas Gohr    {
358e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
359e33a1d7aSAndreas Gohr            global $conf;
360e33a1d7aSAndreas Gohr            return $conf['lang'];
361e33a1d7aSAndreas Gohr        } else {
362e33a1d7aSAndreas Gohr            return '';
363e33a1d7aSAndreas Gohr        }
364e33a1d7aSAndreas Gohr    }
365e75dc39fSAndreas Gohr
366e75dc39fSAndreas Gohr    /**
367e75dc39fSAndreas Gohr     * Store info about the last run
368e75dc39fSAndreas Gohr     *
369e75dc39fSAndreas Gohr     * @param array $data
370e75dc39fSAndreas Gohr     * @return void
371e75dc39fSAndreas Gohr     */
372e75dc39fSAndreas Gohr    public function setRunData(array $data)
373e75dc39fSAndreas Gohr    {
374e75dc39fSAndreas Gohr        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
375e75dc39fSAndreas Gohr    }
376e75dc39fSAndreas Gohr
377e75dc39fSAndreas Gohr    /**
378e75dc39fSAndreas Gohr     * Get info about the last run
379e75dc39fSAndreas Gohr     *
380e75dc39fSAndreas Gohr     * @return array
381e75dc39fSAndreas Gohr     */
382e75dc39fSAndreas Gohr    public function getRunData()
383e75dc39fSAndreas Gohr    {
384e75dc39fSAndreas Gohr        if (!file_exists($this->runDataFile)) {
385e75dc39fSAndreas Gohr            return [];
386e75dc39fSAndreas Gohr        }
387e75dc39fSAndreas Gohr        return json_decode(file_get_contents($this->runDataFile), true);
388e75dc39fSAndreas Gohr    }
3890337f47fSAndreas Gohr}
390