xref: /plugin/aichat/helper.php (revision 51aa8517a15244890eb0132c8019c9857c046a12)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\Extension\Plugin;
5use dokuwiki\plugin\aichat\AIChat;
6use dokuwiki\plugin\aichat\Chunk;
7use dokuwiki\plugin\aichat\Embeddings;
8use dokuwiki\plugin\aichat\Model\ChatInterface;
9use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
10use dokuwiki\plugin\aichat\Storage\AbstractStorage;
11
12/**
13 * DokuWiki Plugin aichat (Helper Component)
14 *
15 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
16 * @author  Andreas Gohr <gohr@cosmocode.de>
17 */
18class helper_plugin_aichat extends Plugin
19{
20    /** @var CLIPlugin $logger */
21    protected $logger;
22    /** @var ChatInterface */
23    protected $chatModel;
24    /** @var ChatInterface */
25    protected $rephraseModel;
26    /** @var EmbeddingInterface */
27    protected $embedModel;
28    /** @var Embeddings */
29    protected $embeddings;
30    /** @var AbstractStorage */
31    protected $storage;
32
33    /** @var array where to store meta data on the last run */
34    protected $runDataFile;
35
36
37    /**
38     * Constructor. Initializes vendor autoloader
39     */
40    public function __construct()
41    {
42        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
43        global $conf;
44        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
45        $this->loadConfig();
46    }
47
48    /**
49     * Use the given CLI plugin for logging
50     *
51     * @param CLIPlugin $logger
52     * @return void
53     */
54    public function setLogger($logger)
55    {
56        $this->logger = $logger;
57    }
58
59    /**
60     * Check if the current user is allowed to use the plugin (if it has been restricted)
61     *
62     * @return bool
63     */
64    public function userMayAccess()
65    {
66        global $auth;
67        global $USERINFO;
68        global $INPUT;
69
70        if (!$auth) return true;
71        if (!$this->getConf('restrict')) return true;
72        if (!isset($USERINFO)) return false;
73
74        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
75    }
76
77    /**
78     * Access the Chat Model
79     *
80     * @return ChatInterface
81     */
82    public function getChatModel()
83    {
84        if ($this->chatModel instanceof ChatInterface) {
85            return $this->chatModel;
86        }
87
88        [$namespace, $name] = sexplode(' ', $this->getConf('chatmodel'), 2);
89        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\ChatModel';
90
91        if (!class_exists($class)) {
92            throw new \RuntimeException('No ChatModel found for ' . $namespace);
93        }
94
95        $this->chatModel = new $class($name, $this->conf);
96        return $this->chatModel;
97    }
98
99    /**
100     * @return ChatInterface
101     */
102    public function getRephraseModel()
103    {
104        if ($this->rephraseModel instanceof ChatInterface) {
105            return $this->rephraseModel;
106        }
107
108        [$namespace, $name] = sexplode(' ', $this->getConf('rephrasemodel'), 2);
109        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\ChatModel';
110
111        if (!class_exists($class)) {
112            throw new \RuntimeException('No ChatModel found for ' . $namespace);
113        }
114
115        $this->rephraseModel = new $class($name, $this->conf);
116        return $this->rephraseModel;
117    }
118
119    /**
120     * Access the Embedding Model
121     *
122     * @return EmbeddingInterface
123     */
124    public function getEmbedModel()
125    {
126        if ($this->embedModel instanceof EmbeddingInterface) {
127            return $this->embedModel;
128        }
129
130        [$namespace, $name] = sexplode(' ', $this->getConf('embedmodel'), 2);
131        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\EmbeddingModel';
132
133        if (!class_exists($class)) {
134            throw new \RuntimeException('No EmbeddingModel found for ' . $namespace);
135        }
136
137        $this->embedModel = new $class($name, $this->conf);
138        return $this->embedModel;
139    }
140
141
142    /**
143     * Access the Embeddings interface
144     *
145     * @return Embeddings
146     */
147    public function getEmbeddings()
148    {
149        if ($this->embeddings instanceof Embeddings) {
150            return $this->embeddings;
151        }
152
153        $this->embeddings = new Embeddings(
154            $this->getChatModel(),
155            $this->getEmbedModel(),
156            $this->getStorage(),
157            $this->conf
158        );
159        if ($this->logger) {
160            $this->embeddings->setLogger($this->logger);
161        }
162
163        return $this->embeddings;
164    }
165
166    /**
167     * Access the Storage interface
168     *
169     * @return AbstractStorage
170     */
171    public function getStorage()
172    {
173        if ($this->storage instanceof AbstractStorage) {
174            return $this->storage;
175        }
176
177        $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage';
178        $this->storage = new $class($this->conf);
179
180        if ($this->logger) {
181            $this->storage->setLogger($this->logger);
182        }
183
184        return $this->storage;
185    }
186
187    /**
188     * Ask a question with a chat history
189     *
190     * @param string $question
191     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
192     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
193     * @throws Exception
194     */
195    public function askChatQuestion($question, $history = [])
196    {
197        if ($history && $this->getConf('rephraseHistory') > 0) {
198            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
199        } else {
200            $standaloneQuestion = $question;
201        }
202        return $this->askQuestion($standaloneQuestion, $history);
203    }
204
205    /**
206     * Ask a single standalone question
207     *
208     * @param string $question
209     * @param array $history [user, ai] of the previous question
210     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
211     * @throws Exception
212     */
213    public function askQuestion($question, $history = [])
214    {
215        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
216        if ($similar) {
217            $context = implode(
218                "\n",
219                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
220            );
221            $prompt = $this->getPrompt('question', [
222                'context' => $context,
223            ]);
224        } else {
225            $prompt = $this->getPrompt('noanswer');
226            $history = [];
227        }
228
229        $messages = $this->prepareMessages(
230            $this->getChatModel(), $prompt, $question, $history, $this->getConf('chatHistory')
231        );
232        $answer = $this->getChatModel()->getAnswer($messages);
233
234        return [
235            'question' => $question,
236            'answer' => $answer,
237            'sources' => $similar,
238        ];
239    }
240
241    /**
242     * Rephrase a question into a standalone question based on the chat history
243     *
244     * @param string $question The original user question
245     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
246     * @return string The rephrased question
247     * @throws Exception
248     */
249    public function rephraseChatQuestion($question, $history)
250    {
251        $prompt = $this->getPrompt('rephrase');
252        $messages = $this->prepareMessages(
253            $this->getRephraseModel(), $prompt, $question, $history, $this->getConf('rephraseHistory')
254        );
255        return $this->getRephraseModel()->getAnswer($messages);
256    }
257
258    /**
259     * Prepare the messages for the AI
260     *
261     * @param ChatInterface $model The used model
262     * @param string $prompt The fully prepared system prompt
263     * @param string $question The user question
264     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
265     * @param int $historySize The maximum number of messages to use from the history
266     * @return array An OpenAI compatible array of messages
267     */
268    protected function prepareMessages(
269        ChatInterface $model, string $prompt, string $question, array $history, int $historySize
270    ): array
271    {
272        // calculate the space for context
273        $remainingContext = $model->getMaxInputTokenLength();
274        $remainingContext -= $this->countTokens($prompt);
275        $remainingContext -= $this->countTokens($question);
276        $safetyMargin = $remainingContext * 0.05; // 5% safety margin
277        $remainingContext -= $safetyMargin;
278        // FIXME we may want to also have an upper limit for the history and not always use the full context
279
280        $messages = $this->historyMessages($history, $remainingContext, $historySize);
281        $messages[] = [
282            'role' => 'system',
283            'content' => $prompt
284        ];
285        $messages[] = [
286            'role' => 'user',
287            'content' => $question
288        ];
289        return $messages;
290    }
291
292    /**
293     * Create an array of OpenAI compatible messages from the given history
294     *
295     * Only as many messages are used as fit into the token limit
296     *
297     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
298     * @param int $tokenLimit The maximum number of tokens to use
299     * @param int $sizeLimit The maximum number of messages to use
300     * @return array
301     */
302    protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array
303    {
304        $remainingContext = $tokenLimit;
305
306        $messages = [];
307        $history = array_reverse($history);
308        $history = array_slice($history, 0, $sizeLimit);
309        foreach ($history as $row) {
310            $length = $this->countTokens($row[0] . $row[1]);
311            if ($length > $remainingContext) {
312                break;
313            }
314            $remainingContext -= $length;
315
316            $messages[] = [
317                'role' => 'assistant',
318                'content' => $row[1]
319            ];
320            $messages[] = [
321                'role' => 'user',
322                'content' => $row[0]
323            ];
324        }
325        return array_reverse($messages);
326    }
327
328    /**
329     * Get an aproximation of the token count for the given text
330     *
331     * @param $text
332     * @return int
333     */
334    protected function countTokens($text)
335    {
336        return count($this->getEmbeddings()->getTokenEncoder()->encode($text));
337    }
338
339    /**
340     * Load the given prompt template and fill in the variables
341     *
342     * @param string $type
343     * @param string[] $vars
344     * @return string
345     */
346    protected function getPrompt($type, $vars = [])
347    {
348        $template = file_get_contents($this->localFN('prompt_' . $type));
349        $vars['language'] = $this->getLanguagePrompt();
350
351        $replace = [];
352        foreach ($vars as $key => $val) {
353            $replace['{{' . strtoupper($key) . '}}'] = $val;
354        }
355
356        return strtr($template, $replace);
357    }
358
359    /**
360     * Construct the prompt to define the answer language
361     *
362     * @return string
363     */
364    protected function getLanguagePrompt()
365    {
366        global $conf;
367        $isoLangnames = include(__DIR__ . '/lang/languages.php');
368
369        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
370
371        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
372            if (isset($isoLangnames[$conf['lang']])) {
373                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
374                return $languagePrompt;
375            }
376        }
377
378        $languagePrompt = 'Always answer in the user\'s language. ' .
379            "If you are unsure about the language, speak $currentLang.";
380        return $languagePrompt;
381    }
382
383    /**
384     * Should sources be limited to current language?
385     *
386     * @return string The current language code or empty string
387     */
388    public function getLanguageLimit()
389    {
390        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
391            global $conf;
392            return $conf['lang'];
393        } else {
394            return '';
395        }
396    }
397
398    /**
399     * Store info about the last run
400     *
401     * @param array $data
402     * @return void
403     */
404    public function setRunData(array $data)
405    {
406        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
407    }
408
409    /**
410     * Get info about the last run
411     *
412     * @return array
413     */
414    public function getRunData()
415    {
416        if (!file_exists($this->runDataFile)) {
417            return [];
418        }
419        return json_decode(file_get_contents($this->runDataFile), true);
420    }
421}
422