xref: /plugin/aichat/helper.php (revision 2071dced6f96936ea7b9bf5dbe8a117eef598448)
10337f47fSAndreas Gohr<?php
20337f47fSAndreas Gohr
33379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
45e6dd16eSAndreas Gohruse dokuwiki\Extension\Plugin;
5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat;
6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings;
8294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\ChatInterface;
9294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\EmbeddingInterface;
10c2b7a1f7SAndreas Gohruse dokuwiki\plugin\aichat\ModelFactory;
1101f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage;
120337f47fSAndreas Gohr
130337f47fSAndreas Gohr/**
140337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component)
150337f47fSAndreas Gohr *
160337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
170337f47fSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
180337f47fSAndreas Gohr */
197ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin
200337f47fSAndreas Gohr{
21c2b7a1f7SAndreas Gohr    /** @var ModelFactory */
22c2b7a1f7SAndreas Gohr    public $factory;
23c2b7a1f7SAndreas Gohr
243379af09SAndreas Gohr    /** @var CLIPlugin $logger */
253379af09SAndreas Gohr    protected $logger;
26c2b7a1f7SAndreas Gohr
270337f47fSAndreas Gohr    /** @var Embeddings */
280337f47fSAndreas Gohr    protected $embeddings;
2901f06932SAndreas Gohr    /** @var AbstractStorage */
3001f06932SAndreas Gohr    protected $storage;
310337f47fSAndreas Gohr
32e75dc39fSAndreas Gohr    /** @var array where to store meta data on the last run */
33e75dc39fSAndreas Gohr    protected $runDataFile;
34e75dc39fSAndreas Gohr
3551aa8517SAndreas Gohr
360337f47fSAndreas Gohr    /**
37f8d5ae01SAndreas Gohr     * Constructor. Initializes vendor autoloader
38f8d5ae01SAndreas Gohr     */
39f8d5ae01SAndreas Gohr    public function __construct()
40f8d5ae01SAndreas Gohr    {
41e75dc39fSAndreas Gohr        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
42e75dc39fSAndreas Gohr        global $conf;
43e75dc39fSAndreas Gohr        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
44d02b7935SAndreas Gohr        $this->loadConfig();
45c2b7a1f7SAndreas Gohr        $this->factory = new ModelFactory($this->conf);
46f8d5ae01SAndreas Gohr    }
47f8d5ae01SAndreas Gohr
48f8d5ae01SAndreas Gohr    /**
493379af09SAndreas Gohr     * Use the given CLI plugin for logging
503379af09SAndreas Gohr     *
513379af09SAndreas Gohr     * @param CLIPlugin $logger
523379af09SAndreas Gohr     * @return void
533379af09SAndreas Gohr     */
548285fff9SAndreas Gohr    public function setLogger($logger)
558285fff9SAndreas Gohr    {
563379af09SAndreas Gohr        $this->logger = $logger;
573379af09SAndreas Gohr    }
583379af09SAndreas Gohr
593379af09SAndreas Gohr    /**
60c4127b8eSAndreas Gohr     * Check if the current user is allowed to use the plugin (if it has been restricted)
61c4127b8eSAndreas Gohr     *
62c4127b8eSAndreas Gohr     * @return bool
63c4127b8eSAndreas Gohr     */
64c4127b8eSAndreas Gohr    public function userMayAccess()
65c4127b8eSAndreas Gohr    {
66c4127b8eSAndreas Gohr        global $auth;
67c4127b8eSAndreas Gohr        global $USERINFO;
68c4127b8eSAndreas Gohr        global $INPUT;
69c4127b8eSAndreas Gohr
70c4127b8eSAndreas Gohr        if (!$auth) return true;
71c4127b8eSAndreas Gohr        if (!$this->getConf('restrict')) return true;
72c4127b8eSAndreas Gohr        if (!isset($USERINFO)) return false;
73c4127b8eSAndreas Gohr
74c4127b8eSAndreas Gohr        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
75c4127b8eSAndreas Gohr    }
76c4127b8eSAndreas Gohr
77c4127b8eSAndreas Gohr    /**
786a18e0f4SAndreas Gohr     * Access the Chat Model
790337f47fSAndreas Gohr     *
80294a9eafSAndreas Gohr     * @return ChatInterface
810337f47fSAndreas Gohr     */
826a18e0f4SAndreas Gohr    public function getChatModel()
830337f47fSAndreas Gohr    {
84c2b7a1f7SAndreas Gohr        return $this->factory->getChatModel();
859f6b34c4SAndreas Gohr    }
869f6b34c4SAndreas Gohr
876a18e0f4SAndreas Gohr    /**
8851aa8517SAndreas Gohr     * @return ChatInterface
8951aa8517SAndreas Gohr     */
9051aa8517SAndreas Gohr    public function getRephraseModel()
9151aa8517SAndreas Gohr    {
92c2b7a1f7SAndreas Gohr        return $this->factory->getRephraseModel();
9351aa8517SAndreas Gohr    }
9451aa8517SAndreas Gohr
9551aa8517SAndreas Gohr    /**
966a18e0f4SAndreas Gohr     * Access the Embedding Model
976a18e0f4SAndreas Gohr     *
98294a9eafSAndreas Gohr     * @return EmbeddingInterface
996a18e0f4SAndreas Gohr     */
100c2b7a1f7SAndreas Gohr    public function getEmbeddingModel()
1016a18e0f4SAndreas Gohr    {
102c2b7a1f7SAndreas Gohr        return $this->factory->getEmbeddingModel();
1030337f47fSAndreas Gohr    }
1040337f47fSAndreas Gohr
1050337f47fSAndreas Gohr    /**
1060337f47fSAndreas Gohr     * Access the Embeddings interface
1070337f47fSAndreas Gohr     *
1080337f47fSAndreas Gohr     * @return Embeddings
1090337f47fSAndreas Gohr     */
1100337f47fSAndreas Gohr    public function getEmbeddings()
1110337f47fSAndreas Gohr    {
1126a18e0f4SAndreas Gohr        if ($this->embeddings instanceof Embeddings) {
1136a18e0f4SAndreas Gohr            return $this->embeddings;
1146a18e0f4SAndreas Gohr        }
1156a18e0f4SAndreas Gohr
11634a1c478SAndreas Gohr        $this->embeddings = new Embeddings(
11734a1c478SAndreas Gohr            $this->getChatModel(),
118c2b7a1f7SAndreas Gohr            $this->getEmbeddingModel(),
11934a1c478SAndreas Gohr            $this->getStorage(),
12034a1c478SAndreas Gohr            $this->conf
12134a1c478SAndreas Gohr        );
1223379af09SAndreas Gohr        if ($this->logger) {
1233379af09SAndreas Gohr            $this->embeddings->setLogger($this->logger);
1243379af09SAndreas Gohr        }
1259f6b34c4SAndreas Gohr
1260337f47fSAndreas Gohr        return $this->embeddings;
1270337f47fSAndreas Gohr    }
1280337f47fSAndreas Gohr
1290337f47fSAndreas Gohr    /**
13001f06932SAndreas Gohr     * Access the Storage interface
13101f06932SAndreas Gohr     *
13201f06932SAndreas Gohr     * @return AbstractStorage
13301f06932SAndreas Gohr     */
13401f06932SAndreas Gohr    public function getStorage()
13501f06932SAndreas Gohr    {
1366a18e0f4SAndreas Gohr        if ($this->storage instanceof AbstractStorage) {
1376a18e0f4SAndreas Gohr            return $this->storage;
1386a18e0f4SAndreas Gohr        }
1396a18e0f4SAndreas Gohr
14004afb84fSAndreas Gohr        $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage';
14104afb84fSAndreas Gohr        $this->storage = new $class($this->conf);
1428285fff9SAndreas Gohr
1433379af09SAndreas Gohr        if ($this->logger) {
1443379af09SAndreas Gohr            $this->storage->setLogger($this->logger);
1453379af09SAndreas Gohr        }
14601f06932SAndreas Gohr
14701f06932SAndreas Gohr        return $this->storage;
14801f06932SAndreas Gohr    }
14901f06932SAndreas Gohr
15001f06932SAndreas Gohr    /**
1510337f47fSAndreas Gohr     * Ask a question with a chat history
1520337f47fSAndreas Gohr     *
1530337f47fSAndreas Gohr     * @param string $question
1540337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
1550337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1560337f47fSAndreas Gohr     * @throws Exception
1570337f47fSAndreas Gohr     */
1580337f47fSAndreas Gohr    public function askChatQuestion($question, $history = [])
1590337f47fSAndreas Gohr    {
16051aa8517SAndreas Gohr        if ($history && $this->getConf('rephraseHistory') > 0) {
1610337f47fSAndreas Gohr            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
1620337f47fSAndreas Gohr        } else {
1630337f47fSAndreas Gohr            $standaloneQuestion = $question;
1640337f47fSAndreas Gohr        }
16534a1c478SAndreas Gohr        return $this->askQuestion($standaloneQuestion, $history);
1660337f47fSAndreas Gohr    }
1670337f47fSAndreas Gohr
1680337f47fSAndreas Gohr    /**
1690337f47fSAndreas Gohr     * Ask a single standalone question
1700337f47fSAndreas Gohr     *
1710337f47fSAndreas Gohr     * @param string $question
17234a1c478SAndreas Gohr     * @param array $history [user, ai] of the previous question
1730337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1740337f47fSAndreas Gohr     * @throws Exception
1750337f47fSAndreas Gohr     */
17634a1c478SAndreas Gohr    public function askQuestion($question, $history = [])
1770337f47fSAndreas Gohr    {
178e33a1d7aSAndreas Gohr        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
1799e81bea7SAndreas Gohr        if ($similar) {
180441edf84SAndreas Gohr            $context = implode(
181441edf84SAndreas Gohr                "\n",
182441edf84SAndreas Gohr                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
183441edf84SAndreas Gohr            );
184219268b1SAndreas Gohr            $prompt = $this->getPrompt('question', [
185219268b1SAndreas Gohr                'context' => $context,
18659a2a267SAndreas Gohr                'question' => $question,
187219268b1SAndreas Gohr            ]);
1889e81bea7SAndreas Gohr        } else {
18959a2a267SAndreas Gohr            $prompt = $this->getPrompt('noanswer', [
19059a2a267SAndreas Gohr                'question' => $question,
19159a2a267SAndreas Gohr            ]);
19234a1c478SAndreas Gohr            $history = [];
1939e81bea7SAndreas Gohr        }
19468908844SAndreas Gohr
19551aa8517SAndreas Gohr        $messages = $this->prepareMessages(
196*2071dcedSAndreas Gohr            $this->getChatModel(),
197*2071dcedSAndreas Gohr            $prompt,
198*2071dcedSAndreas Gohr            $history,
199*2071dcedSAndreas Gohr            $this->getConf('chatHistory')
20051aa8517SAndreas Gohr        );
2016a18e0f4SAndreas Gohr        $answer = $this->getChatModel()->getAnswer($messages);
2020337f47fSAndreas Gohr
2030337f47fSAndreas Gohr        return [
2040337f47fSAndreas Gohr            'question' => $question,
2050337f47fSAndreas Gohr            'answer' => $answer,
2060337f47fSAndreas Gohr            'sources' => $similar,
2070337f47fSAndreas Gohr        ];
2080337f47fSAndreas Gohr    }
2090337f47fSAndreas Gohr
2100337f47fSAndreas Gohr    /**
2110337f47fSAndreas Gohr     * Rephrase a question into a standalone question based on the chat history
2120337f47fSAndreas Gohr     *
2130337f47fSAndreas Gohr     * @param string $question The original user question
2140337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
2150337f47fSAndreas Gohr     * @return string The rephrased question
2160337f47fSAndreas Gohr     * @throws Exception
2170337f47fSAndreas Gohr     */
2180337f47fSAndreas Gohr    public function rephraseChatQuestion($question, $history)
2190337f47fSAndreas Gohr    {
22059a2a267SAndreas Gohr        $prompt = $this->getPrompt('rephrase', [
22159a2a267SAndreas Gohr            'question' => $question,
22259a2a267SAndreas Gohr        ]);
22351aa8517SAndreas Gohr        $messages = $this->prepareMessages(
224*2071dcedSAndreas Gohr            $this->getRephraseModel(),
225*2071dcedSAndreas Gohr            $prompt,
226*2071dcedSAndreas Gohr            $history,
227*2071dcedSAndreas Gohr            $this->getConf('rephraseHistory')
22851aa8517SAndreas Gohr        );
22951aa8517SAndreas Gohr        return $this->getRephraseModel()->getAnswer($messages);
23034a1c478SAndreas Gohr    }
23134a1c478SAndreas Gohr
23234a1c478SAndreas Gohr    /**
23334a1c478SAndreas Gohr     * Prepare the messages for the AI
23434a1c478SAndreas Gohr     *
23551aa8517SAndreas Gohr     * @param ChatInterface $model The used model
23659a2a267SAndreas Gohr     * @param string $promptedQuestion The user question embedded in a prompt
23734a1c478SAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
23851aa8517SAndreas Gohr     * @param int $historySize The maximum number of messages to use from the history
23934a1c478SAndreas Gohr     * @return array An OpenAI compatible array of messages
24034a1c478SAndreas Gohr     */
24151aa8517SAndreas Gohr    protected function prepareMessages(
242*2071dcedSAndreas Gohr        ChatInterface $model,
243*2071dcedSAndreas Gohr        string $promptedQuestion,
244*2071dcedSAndreas Gohr        array $history,
245*2071dcedSAndreas Gohr        int $historySize
246*2071dcedSAndreas Gohr    ): array {
24734a1c478SAndreas Gohr        // calculate the space for context
24851aa8517SAndreas Gohr        $remainingContext = $model->getMaxInputTokenLength();
24959a2a267SAndreas Gohr        $remainingContext -= $this->countTokens($promptedQuestion);
25034a1c478SAndreas Gohr        $safetyMargin = $remainingContext * 0.05; // 5% safety margin
25134a1c478SAndreas Gohr        $remainingContext -= $safetyMargin;
25234a1c478SAndreas Gohr        // FIXME we may want to also have an upper limit for the history and not always use the full context
25334a1c478SAndreas Gohr
25451aa8517SAndreas Gohr        $messages = $this->historyMessages($history, $remainingContext, $historySize);
25534a1c478SAndreas Gohr        $messages[] = [
25634a1c478SAndreas Gohr            'role' => 'user',
25759a2a267SAndreas Gohr            'content' => $promptedQuestion
25834a1c478SAndreas Gohr        ];
25934a1c478SAndreas Gohr        return $messages;
26034a1c478SAndreas Gohr    }
26134a1c478SAndreas Gohr
26234a1c478SAndreas Gohr    /**
26334a1c478SAndreas Gohr     * Create an array of OpenAI compatible messages from the given history
26434a1c478SAndreas Gohr     *
26534a1c478SAndreas Gohr     * Only as many messages are used as fit into the token limit
26634a1c478SAndreas Gohr     *
26734a1c478SAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
26851aa8517SAndreas Gohr     * @param int $tokenLimit The maximum number of tokens to use
26951aa8517SAndreas Gohr     * @param int $sizeLimit The maximum number of messages to use
27034a1c478SAndreas Gohr     * @return array
27134a1c478SAndreas Gohr     */
27251aa8517SAndreas Gohr    protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array
27334a1c478SAndreas Gohr    {
27434a1c478SAndreas Gohr        $remainingContext = $tokenLimit;
27534a1c478SAndreas Gohr
27634a1c478SAndreas Gohr        $messages = [];
2770337f47fSAndreas Gohr        $history = array_reverse($history);
27851aa8517SAndreas Gohr        $history = array_slice($history, 0, $sizeLimit);
2790337f47fSAndreas Gohr        foreach ($history as $row) {
28034a1c478SAndreas Gohr            $length = $this->countTokens($row[0] . $row[1]);
28134a1c478SAndreas Gohr            if ($length > $remainingContext) {
2820337f47fSAndreas Gohr                break;
2830337f47fSAndreas Gohr            }
28434a1c478SAndreas Gohr            $remainingContext -= $length;
2850337f47fSAndreas Gohr
28634a1c478SAndreas Gohr            $messages[] = [
28734a1c478SAndreas Gohr                'role' => 'assistant',
28834a1c478SAndreas Gohr                'content' => $row[1]
28934a1c478SAndreas Gohr            ];
29034a1c478SAndreas Gohr            $messages[] = [
29134a1c478SAndreas Gohr                'role' => 'user',
29234a1c478SAndreas Gohr                'content' => $row[0]
29334a1c478SAndreas Gohr            ];
29434a1c478SAndreas Gohr        }
29534a1c478SAndreas Gohr        return array_reverse($messages);
2960337f47fSAndreas Gohr    }
2970337f47fSAndreas Gohr
29834a1c478SAndreas Gohr    /**
29934a1c478SAndreas Gohr     * Get an aproximation of the token count for the given text
30034a1c478SAndreas Gohr     *
30134a1c478SAndreas Gohr     * @param $text
30234a1c478SAndreas Gohr     * @return int
30334a1c478SAndreas Gohr     */
30434a1c478SAndreas Gohr    protected function countTokens($text)
30534a1c478SAndreas Gohr    {
30634a1c478SAndreas Gohr        return count($this->getEmbeddings()->getTokenEncoder()->encode($text));
3070337f47fSAndreas Gohr    }
3080337f47fSAndreas Gohr
3090337f47fSAndreas Gohr    /**
3100337f47fSAndreas Gohr     * Load the given prompt template and fill in the variables
3110337f47fSAndreas Gohr     *
3120337f47fSAndreas Gohr     * @param string $type
3130337f47fSAndreas Gohr     * @param string[] $vars
3140337f47fSAndreas Gohr     * @return string
3150337f47fSAndreas Gohr     */
3160337f47fSAndreas Gohr    protected function getPrompt($type, $vars = [])
3170337f47fSAndreas Gohr    {
31859a2a267SAndreas Gohr        $template = file_get_contents($this->localFN($type, 'prompt'));
31934a1c478SAndreas Gohr        $vars['language'] = $this->getLanguagePrompt();
3200337f47fSAndreas Gohr
3217ebc7895Ssplitbrain        $replace = [];
3220337f47fSAndreas Gohr        foreach ($vars as $key => $val) {
3230337f47fSAndreas Gohr            $replace['{{' . strtoupper($key) . '}}'] = $val;
3240337f47fSAndreas Gohr        }
3250337f47fSAndreas Gohr
3260337f47fSAndreas Gohr        return strtr($template, $replace);
3270337f47fSAndreas Gohr    }
328219268b1SAndreas Gohr
329219268b1SAndreas Gohr    /**
330219268b1SAndreas Gohr     * Construct the prompt to define the answer language
331219268b1SAndreas Gohr     *
332219268b1SAndreas Gohr     * @return string
333219268b1SAndreas Gohr     */
334219268b1SAndreas Gohr    protected function getLanguagePrompt()
335219268b1SAndreas Gohr    {
336219268b1SAndreas Gohr        global $conf;
337cfaf6b32SAndreas Gohr        $isoLangnames = include(__DIR__ . '/lang/languages.php');
338cfaf6b32SAndreas Gohr
339cfaf6b32SAndreas Gohr        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
340219268b1SAndreas Gohr
341e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
342219268b1SAndreas Gohr            if (isset($isoLangnames[$conf['lang']])) {
343219268b1SAndreas Gohr                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
344219268b1SAndreas Gohr                return $languagePrompt;
345219268b1SAndreas Gohr            }
346219268b1SAndreas Gohr        }
347219268b1SAndreas Gohr
348cfaf6b32SAndreas Gohr        $languagePrompt = 'Always answer in the user\'s language. ' .
349cfaf6b32SAndreas Gohr            "If you are unsure about the language, speak $currentLang.";
350219268b1SAndreas Gohr        return $languagePrompt;
351219268b1SAndreas Gohr    }
352e33a1d7aSAndreas Gohr
353e33a1d7aSAndreas Gohr    /**
354e33a1d7aSAndreas Gohr     * Should sources be limited to current language?
355e33a1d7aSAndreas Gohr     *
356e33a1d7aSAndreas Gohr     * @return string The current language code or empty string
357e33a1d7aSAndreas Gohr     */
358e33a1d7aSAndreas Gohr    public function getLanguageLimit()
359e33a1d7aSAndreas Gohr    {
360e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
361e33a1d7aSAndreas Gohr            global $conf;
362e33a1d7aSAndreas Gohr            return $conf['lang'];
363e33a1d7aSAndreas Gohr        } else {
364e33a1d7aSAndreas Gohr            return '';
365e33a1d7aSAndreas Gohr        }
366e33a1d7aSAndreas Gohr    }
367e75dc39fSAndreas Gohr
368e75dc39fSAndreas Gohr    /**
369e75dc39fSAndreas Gohr     * Store info about the last run
370e75dc39fSAndreas Gohr     *
371e75dc39fSAndreas Gohr     * @param array $data
372e75dc39fSAndreas Gohr     * @return void
373e75dc39fSAndreas Gohr     */
374e75dc39fSAndreas Gohr    public function setRunData(array $data)
375e75dc39fSAndreas Gohr    {
376e75dc39fSAndreas Gohr        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
377e75dc39fSAndreas Gohr    }
378e75dc39fSAndreas Gohr
379e75dc39fSAndreas Gohr    /**
380e75dc39fSAndreas Gohr     * Get info about the last run
381e75dc39fSAndreas Gohr     *
382e75dc39fSAndreas Gohr     * @return array
383e75dc39fSAndreas Gohr     */
384e75dc39fSAndreas Gohr    public function getRunData()
385e75dc39fSAndreas Gohr    {
386e75dc39fSAndreas Gohr        if (!file_exists($this->runDataFile)) {
387e75dc39fSAndreas Gohr            return [];
388e75dc39fSAndreas Gohr        }
389e75dc39fSAndreas Gohr        return json_decode(file_get_contents($this->runDataFile), true);
390e75dc39fSAndreas Gohr    }
3910337f47fSAndreas Gohr}
392