xref: /plugin/aichat/helper.php (revision c2b7a1f7fd0f6c6579c9ee46f0437ff89c2fc4b3)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\Extension\Plugin;
5use dokuwiki\plugin\aichat\AIChat;
6use dokuwiki\plugin\aichat\Chunk;
7use dokuwiki\plugin\aichat\Embeddings;
8use dokuwiki\plugin\aichat\Model\ChatInterface;
9use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
10use dokuwiki\plugin\aichat\ModelFactory;
11use dokuwiki\plugin\aichat\Storage\AbstractStorage;
12
13/**
14 * DokuWiki Plugin aichat (Helper Component)
15 *
16 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
17 * @author  Andreas Gohr <gohr@cosmocode.de>
18 */
19class helper_plugin_aichat extends Plugin
20{
21    /** @var ModelFactory */
22    public $factory;
23
24    /** @var CLIPlugin $logger */
25    protected $logger;
26
27    /** @var Embeddings */
28    protected $embeddings;
29    /** @var AbstractStorage */
30    protected $storage;
31
32    /** @var array where to store meta data on the last run */
33    protected $runDataFile;
34
35
36    /**
37     * Constructor. Initializes vendor autoloader
38     */
39    public function __construct()
40    {
41        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
42        global $conf;
43        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
44        $this->loadConfig();
45        $this->factory = new ModelFactory($this->conf);
46    }
47
48    /**
49     * Use the given CLI plugin for logging
50     *
51     * @param CLIPlugin $logger
52     * @return void
53     */
54    public function setLogger($logger)
55    {
56        $this->logger = $logger;
57    }
58
59    /**
60     * Check if the current user is allowed to use the plugin (if it has been restricted)
61     *
62     * @return bool
63     */
64    public function userMayAccess()
65    {
66        global $auth;
67        global $USERINFO;
68        global $INPUT;
69
70        if (!$auth) return true;
71        if (!$this->getConf('restrict')) return true;
72        if (!isset($USERINFO)) return false;
73
74        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
75    }
76
77    /**
78     * Access the Chat Model
79     *
80     * @return ChatInterface
81     */
82    public function getChatModel()
83    {
84        return $this->factory->getChatModel();
85    }
86
87    /**
88     * @return ChatInterface
89     */
90    public function getRephraseModel()
91    {
92        return $this->factory->getRephraseModel();
93    }
94
95    /**
96     * Access the Embedding Model
97     *
98     * @return EmbeddingInterface
99     */
100    public function getEmbeddingModel()
101    {
102        return $this->factory->getEmbeddingModel();
103    }
104
105    /**
106     * Access the Embeddings interface
107     *
108     * @return Embeddings
109     */
110    public function getEmbeddings()
111    {
112        if ($this->embeddings instanceof Embeddings) {
113            return $this->embeddings;
114        }
115
116        $this->embeddings = new Embeddings(
117            $this->getChatModel(),
118            $this->getEmbeddingModel(),
119            $this->getStorage(),
120            $this->conf
121        );
122        if ($this->logger) {
123            $this->embeddings->setLogger($this->logger);
124        }
125
126        return $this->embeddings;
127    }
128
129    /**
130     * Access the Storage interface
131     *
132     * @return AbstractStorage
133     */
134    public function getStorage()
135    {
136        if ($this->storage instanceof AbstractStorage) {
137            return $this->storage;
138        }
139
140        $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage';
141        $this->storage = new $class($this->conf);
142
143        if ($this->logger) {
144            $this->storage->setLogger($this->logger);
145        }
146
147        return $this->storage;
148    }
149
150    /**
151     * Ask a question with a chat history
152     *
153     * @param string $question
154     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
155     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
156     * @throws Exception
157     */
158    public function askChatQuestion($question, $history = [])
159    {
160        if ($history && $this->getConf('rephraseHistory') > 0) {
161            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
162        } else {
163            $standaloneQuestion = $question;
164        }
165        return $this->askQuestion($standaloneQuestion, $history);
166    }
167
168    /**
169     * Ask a single standalone question
170     *
171     * @param string $question
172     * @param array $history [user, ai] of the previous question
173     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
174     * @throws Exception
175     */
176    public function askQuestion($question, $history = [])
177    {
178        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
179        if ($similar) {
180            $context = implode(
181                "\n",
182                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
183            );
184            $prompt = $this->getPrompt('question', [
185                'context' => $context,
186            ]);
187        } else {
188            $prompt = $this->getPrompt('noanswer');
189            $history = [];
190        }
191
192        $messages = $this->prepareMessages(
193            $this->getChatModel(), $prompt, $question, $history, $this->getConf('chatHistory')
194        );
195        $answer = $this->getChatModel()->getAnswer($messages);
196
197        return [
198            'question' => $question,
199            'answer' => $answer,
200            'sources' => $similar,
201        ];
202    }
203
204    /**
205     * Rephrase a question into a standalone question based on the chat history
206     *
207     * @param string $question The original user question
208     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
209     * @return string The rephrased question
210     * @throws Exception
211     */
212    public function rephraseChatQuestion($question, $history)
213    {
214        $prompt = $this->getPrompt('rephrase');
215        $messages = $this->prepareMessages(
216            $this->getRephraseModel(), $prompt, $question, $history, $this->getConf('rephraseHistory')
217        );
218        return $this->getRephraseModel()->getAnswer($messages);
219    }
220
221    /**
222     * Prepare the messages for the AI
223     *
224     * @param ChatInterface $model The used model
225     * @param string $prompt The fully prepared system prompt
226     * @param string $question The user question
227     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
228     * @param int $historySize The maximum number of messages to use from the history
229     * @return array An OpenAI compatible array of messages
230     */
231    protected function prepareMessages(
232        ChatInterface $model, string $prompt, string $question, array $history, int $historySize
233    ): array
234    {
235        // calculate the space for context
236        $remainingContext = $model->getMaxInputTokenLength();
237        $remainingContext -= $this->countTokens($prompt);
238        $remainingContext -= $this->countTokens($question);
239        $safetyMargin = $remainingContext * 0.05; // 5% safety margin
240        $remainingContext -= $safetyMargin;
241        // FIXME we may want to also have an upper limit for the history and not always use the full context
242
243        $messages = $this->historyMessages($history, $remainingContext, $historySize);
244        $messages[] = [
245            'role' => 'system',
246            'content' => $prompt
247        ];
248        $messages[] = [
249            'role' => 'user',
250            'content' => $question
251        ];
252        return $messages;
253    }
254
255    /**
256     * Create an array of OpenAI compatible messages from the given history
257     *
258     * Only as many messages are used as fit into the token limit
259     *
260     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
261     * @param int $tokenLimit The maximum number of tokens to use
262     * @param int $sizeLimit The maximum number of messages to use
263     * @return array
264     */
265    protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array
266    {
267        $remainingContext = $tokenLimit;
268
269        $messages = [];
270        $history = array_reverse($history);
271        $history = array_slice($history, 0, $sizeLimit);
272        foreach ($history as $row) {
273            $length = $this->countTokens($row[0] . $row[1]);
274            if ($length > $remainingContext) {
275                break;
276            }
277            $remainingContext -= $length;
278
279            $messages[] = [
280                'role' => 'assistant',
281                'content' => $row[1]
282            ];
283            $messages[] = [
284                'role' => 'user',
285                'content' => $row[0]
286            ];
287        }
288        return array_reverse($messages);
289    }
290
291    /**
292     * Get an aproximation of the token count for the given text
293     *
294     * @param $text
295     * @return int
296     */
297    protected function countTokens($text)
298    {
299        return count($this->getEmbeddings()->getTokenEncoder()->encode($text));
300    }
301
302    /**
303     * Load the given prompt template and fill in the variables
304     *
305     * @param string $type
306     * @param string[] $vars
307     * @return string
308     */
309    protected function getPrompt($type, $vars = [])
310    {
311        $template = file_get_contents($this->localFN('prompt_' . $type));
312        $vars['language'] = $this->getLanguagePrompt();
313
314        $replace = [];
315        foreach ($vars as $key => $val) {
316            $replace['{{' . strtoupper($key) . '}}'] = $val;
317        }
318
319        return strtr($template, $replace);
320    }
321
322    /**
323     * Construct the prompt to define the answer language
324     *
325     * @return string
326     */
327    protected function getLanguagePrompt()
328    {
329        global $conf;
330        $isoLangnames = include(__DIR__ . '/lang/languages.php');
331
332        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
333
334        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
335            if (isset($isoLangnames[$conf['lang']])) {
336                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
337                return $languagePrompt;
338            }
339        }
340
341        $languagePrompt = 'Always answer in the user\'s language. ' .
342            "If you are unsure about the language, speak $currentLang.";
343        return $languagePrompt;
344    }
345
346    /**
347     * Should sources be limited to current language?
348     *
349     * @return string The current language code or empty string
350     */
351    public function getLanguageLimit()
352    {
353        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
354            global $conf;
355            return $conf['lang'];
356        } else {
357            return '';
358        }
359    }
360
361    /**
362     * Store info about the last run
363     *
364     * @param array $data
365     * @return void
366     */
367    public function setRunData(array $data)
368    {
369        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
370    }
371
372    /**
373     * Get info about the last run
374     *
375     * @return array
376     */
377    public function getRunData()
378    {
379        if (!file_exists($this->runDataFile)) {
380            return [];
381        }
382        return json_decode(file_get_contents($this->runDataFile), true);
383    }
384}
385