xref: /plugin/aichat/helper.php (revision bdf0ac5445aa1ff823b70743fb995bd12a7fe360)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\Extension\Plugin;
5use dokuwiki\plugin\aichat\AIChat;
6use dokuwiki\plugin\aichat\Chunk;
7use dokuwiki\plugin\aichat\Embeddings;
8use dokuwiki\plugin\aichat\Model\ChatInterface;
9use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
10use dokuwiki\plugin\aichat\ModelFactory;
11use dokuwiki\plugin\aichat\Storage\AbstractStorage;
12
13/**
14 * DokuWiki Plugin aichat (Helper Component)
15 *
16 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
17 * @author  Andreas Gohr <gohr@cosmocode.de>
18 */
19class helper_plugin_aichat extends Plugin
20{
21    /** @var ModelFactory */
22    public $factory;
23
24    /** @var CLIPlugin $logger */
25    protected $logger;
26
27    /** @var Embeddings */
28    protected $embeddings;
29    /** @var AbstractStorage */
30    protected $storage;
31
32    /** @var array where to store meta data on the last run */
33    protected $runDataFile;
34
35
36    /**
37     * Constructor. Initializes vendor autoloader
38     */
39    public function __construct()
40    {
41        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
42        global $conf;
43        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
44        $this->loadConfig();
45        $this->factory = new ModelFactory($this->conf);
46    }
47
48    /**
49     * Use the given CLI plugin for logging
50     *
51     * @param CLIPlugin $logger
52     * @return void
53     */
54    public function setLogger($logger)
55    {
56        $this->logger = $logger;
57    }
58
59    /**
60     * Update the configuration
61     *
62     * @param array $config
63     * @return void
64     */
65    public function updateConfig(array $config)
66    {
67        $this->conf = array_merge($this->conf, $config);
68        $this->factory->updateConfig($config);
69    }
70
71    /**
72     * Check if the current user is allowed to use the plugin (if it has been restricted)
73     *
74     * @return bool
75     */
76    public function userMayAccess()
77    {
78        global $auth;
79        global $USERINFO;
80        global $INPUT;
81
82        if (!$auth) return true;
83        if (!$this->getConf('restrict')) return true;
84        if (!isset($USERINFO)) return false;
85
86        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
87    }
88
89    /**
90     * Access the Chat Model
91     *
92     * @return ChatInterface
93     */
94    public function getChatModel()
95    {
96        return $this->factory->getChatModel();
97    }
98
99    /**
100     * @return ChatInterface
101     */
102    public function getRephraseModel()
103    {
104        return $this->factory->getRephraseModel();
105    }
106
107    /**
108     * Access the Embedding Model
109     *
110     * @return EmbeddingInterface
111     */
112    public function getEmbeddingModel()
113    {
114        return $this->factory->getEmbeddingModel();
115    }
116
117    /**
118     * Access the Embeddings interface
119     *
120     * @return Embeddings
121     */
122    public function getEmbeddings()
123    {
124        if ($this->embeddings instanceof Embeddings) {
125            return $this->embeddings;
126        }
127
128        $this->embeddings = new Embeddings(
129            $this->getChatModel(),
130            $this->getEmbeddingModel(),
131            $this->getStorage(),
132            $this->conf
133        );
134        if ($this->logger) {
135            $this->embeddings->setLogger($this->logger);
136        }
137
138        return $this->embeddings;
139    }
140
141    /**
142     * Access the Storage interface
143     *
144     * @return AbstractStorage
145     */
146    public function getStorage()
147    {
148        if ($this->storage instanceof AbstractStorage) {
149            return $this->storage;
150        }
151
152        $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage';
153        $this->storage = new $class($this->conf);
154
155        if ($this->logger) {
156            $this->storage->setLogger($this->logger);
157        }
158
159        return $this->storage;
160    }
161
162    /**
163     * Ask a question with a chat history
164     *
165     * @param string $question
166     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
167     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
168     * @throws Exception
169     */
170    public function askChatQuestion($question, $history = [], $sourcePage = '')
171    {
172        if ($history && $this->getConf('rephraseHistory') > 0) {
173            $contextQuestion = $this->rephraseChatQuestion($question, $history);
174
175            // Only use the rephrased question if it has more history than the chat history provides
176            if ($this->getConf('rephraseHistory') > $this->getConf('chatHistory')) {
177                $question = $contextQuestion;
178            }
179        } else {
180            $contextQuestion = $question;
181        }
182        return $this->askQuestion($question, $history, $contextQuestion, $sourcePage);
183    }
184
185    /**
186     * Ask a single standalone question
187     *
188     * @param string $question The question to ask
189     * @param array $history [user, ai] of the previous question
190     * @param string $contextQuestion The question to use for context search
191     * @param string $sourcePage The page the question was asked on
192     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
193     * @throws Exception
194     */
195    public function askQuestion($question, $history = [], $contextQuestion = '', $sourcePage = '')
196    {
197        if ($sourcePage) {
198            // only the current page is context
199            $similar = $this->getEmbeddings()->getPageChunks($sourcePage);
200        } else {
201            if ($this->getConf('fullpagecontext')) {
202                // match chunks but use full pages as context
203                $similar = $this->getEmbeddings()->getSimilarPages(
204                    $contextQuestion ?: $question,
205                    $this->getLanguageLimit(),
206                    (int) $this->getConf('fullpagecontext')
207                );
208            } else {
209                // use the chunks as context
210                $similar = $this->getEmbeddings()->getSimilarChunks(
211                    $contextQuestion ?: $question, $this->getLanguageLimit()
212                );
213            }
214        }
215
216        if ($similar) {
217            $context = implode(
218                "\n",
219                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
220            );
221            $prompt = $this->getPrompt('question', [
222                'context' => $context,
223                'question' => $question,
224                'customprompt' => $this->getConf('customprompt'),
225            ]);
226        } else {
227            $prompt = $this->getPrompt('noanswer', [
228                'question' => $question,
229            ]);
230            $history = [];
231        }
232
233        $messages = $this->prepareMessages(
234            $this->getChatModel(),
235            $prompt,
236            $history,
237            $this->getConf('chatHistory')
238        );
239        $answer = $this->getChatModel()->getAnswer($messages);
240
241        return [
242            'question' => $question,
243            'contextQuestion' => $contextQuestion,
244            'answer' => $answer,
245            'sources' => $similar,
246        ];
247    }
248
249    /**
250     * Rephrase a question into a standalone question based on the chat history
251     *
252     * @param string $question The original user question
253     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
254     * @return string The rephrased question
255     * @throws Exception
256     */
257    public function rephraseChatQuestion($question, $history)
258    {
259        $prompt = $this->getPrompt('rephrase', [
260            'question' => $question,
261        ]);
262        $messages = $this->prepareMessages(
263            $this->getRephraseModel(),
264            $prompt,
265            $history,
266            $this->getConf('rephraseHistory')
267        );
268        return $this->getRephraseModel()->getAnswer($messages);
269    }
270
271    /**
272     * Prepare the messages for the AI
273     *
274     * @param ChatInterface $model The used model
275     * @param string $promptedQuestion The user question embedded in a prompt
276     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
277     * @param int $historySize The maximum number of messages to use from the history
278     * @return array An OpenAI compatible array of messages
279     */
280    protected function prepareMessages(
281        ChatInterface $model,
282        string        $promptedQuestion,
283        array         $history,
284        int           $historySize
285    ): array
286    {
287        // calculate the space for context
288        $remainingContext = $model->getMaxInputTokenLength(); // might be 0
289        $remainingContext -= $this->countTokens($promptedQuestion);
290        $safetyMargin = abs($remainingContext) * 0.05; // 5% safety margin
291        $remainingContext -= $safetyMargin; // may be negative, it will be ignored then
292
293        $messages = $this->historyMessages($history, $remainingContext, $historySize);
294        $messages[] = [
295            'role' => 'user',
296            'content' => $promptedQuestion
297        ];
298        return $messages;
299    }
300
301    /**
302     * Create an array of OpenAI compatible messages from the given history
303     *
304     * Only as many messages are used as fit into the token limit
305     *
306     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
307     * @param int $tokenLimit The maximum number of tokens to use, negative limit disables this check
308     * @param int $sizeLimit The maximum number of messages to use
309     * @return array
310     */
311    protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array
312    {
313        $remainingContext = $tokenLimit;
314
315        $messages = [];
316        $history = array_reverse($history);
317        $history = array_slice($history, 0, $sizeLimit);
318        foreach ($history as $row) {
319            $length = $this->countTokens($row[0] . $row[1]);
320
321            if ($tokenLimit > 0 && $length > $remainingContext) {
322                break;
323            }
324            $remainingContext -= $length;
325
326            $messages[] = [
327                'role' => 'assistant',
328                'content' => $row[1]
329            ];
330            $messages[] = [
331                'role' => 'user',
332                'content' => $row[0]
333            ];
334        }
335        return array_reverse($messages);
336    }
337
338    /**
339     * Get an aproximation of the token count for the given text
340     *
341     * @param $text
342     * @return int
343     */
344    protected function countTokens($text)
345    {
346        return count($this->getEmbeddings()->getTokenEncoder()->encode($text));
347    }
348
349    /**
350     * Load the given prompt template and fill in the variables
351     *
352     * @param string $type
353     * @param string[] $vars
354     * @return string
355     */
356    protected function getPrompt($type, $vars = [])
357    {
358        $template = file_get_contents($this->localFN($type, 'prompt'));
359        $vars['language'] = $this->getLanguagePrompt();
360
361        $replace = [];
362        foreach ($vars as $key => $val) {
363            $replace['{{' . strtoupper($key) . '}}'] = $val;
364        }
365
366        return strtr($template, $replace);
367    }
368
369    /**
370     * Construct the prompt to define the answer language
371     *
372     * @return string
373     */
374    protected function getLanguagePrompt()
375    {
376        global $conf;
377        $isoLangnames = include(__DIR__ . '/lang/languages.php');
378
379        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
380
381        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
382            if (isset($isoLangnames[$conf['lang']])) {
383                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
384                return $languagePrompt;
385            }
386        }
387
388        $languagePrompt = 'Always answer in the user\'s language. ' .
389            "If you are unsure about the language, speak $currentLang.";
390        return $languagePrompt;
391    }
392
393    /**
394     * Should sources be limited to current language?
395     *
396     * @return string The current language code or empty string
397     */
398    public function getLanguageLimit()
399    {
400        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
401            global $conf;
402            return $conf['lang'];
403        } else {
404            return '';
405        }
406    }
407
408    /**
409     * Store info about the last run
410     *
411     * @param array $data
412     * @return void
413     */
414    public function setRunData(array $data)
415    {
416        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
417    }
418
419    /**
420     * Get info about the last run
421     *
422     * @return array
423     */
424    public function getRunData()
425    {
426        if (!file_exists($this->runDataFile)) {
427            return [];
428        }
429        return json_decode(file_get_contents($this->runDataFile), true);
430    }
431}
432