xref: /plugin/aichat/helper.php (revision dce0dee5ef27bcbbc5570fc278f3e75f426c19c5)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\Extension\Plugin;
5use dokuwiki\plugin\aichat\AIChat;
6use dokuwiki\plugin\aichat\Chunk;
7use dokuwiki\plugin\aichat\Embeddings;
8use dokuwiki\plugin\aichat\Model\ChatInterface;
9use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
10use dokuwiki\plugin\aichat\Model\OpenAI\Embedding3Small;
11use dokuwiki\plugin\aichat\Storage\AbstractStorage;
12use dokuwiki\plugin\aichat\Storage\ChromaStorage;
13use dokuwiki\plugin\aichat\Storage\PineconeStorage;
14use dokuwiki\plugin\aichat\Storage\QdrantStorage;
15use dokuwiki\plugin\aichat\Storage\SQLiteStorage;
16
17/**
18 * DokuWiki Plugin aichat (Helper Component)
19 *
20 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
21 * @author  Andreas Gohr <gohr@cosmocode.de>
22 */
23class helper_plugin_aichat extends Plugin
24{
25    /** @var CLIPlugin $logger */
26    protected $logger;
27    /** @var ChatInterface */
28    protected $chatModel;
29    /** @var EmbeddingInterface */
30    protected $embedModel;
31    /** @var Embeddings */
32    protected $embeddings;
33    /** @var AbstractStorage */
34    protected $storage;
35
36    /** @var array where to store meta data on the last run */
37    protected $runDataFile;
38
39    /**
40     * Constructor. Initializes vendor autoloader
41     */
42    public function __construct()
43    {
44        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
45        global $conf;
46        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
47        $this->loadConfig();
48    }
49
50    /**
51     * Use the given CLI plugin for logging
52     *
53     * @param CLIPlugin $logger
54     * @return void
55     */
56    public function setLogger($logger)
57    {
58        $this->logger = $logger;
59    }
60
61    /**
62     * Check if the current user is allowed to use the plugin (if it has been restricted)
63     *
64     * @return bool
65     */
66    public function userMayAccess()
67    {
68        global $auth;
69        global $USERINFO;
70        global $INPUT;
71
72        if (!$auth) return true;
73        if (!$this->getConf('restrict')) return true;
74        if (!isset($USERINFO)) return false;
75
76        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
77    }
78
79    /**
80     * Access the Chat Model
81     *
82     * @return ChatInterface
83     */
84    public function getChatModel()
85    {
86        if ($this->chatModel instanceof ChatInterface) {
87            return $this->chatModel;
88        }
89
90        [$namespace, $name] = sexplode(' ', $this->getConf('chatmodel'), 2);
91        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\ChatModel';
92
93        if (!class_exists($class)) {
94            throw new \RuntimeException('No ChatModel found for ' . $namespace);
95        }
96
97        $this->chatModel = new $class($name, $this->conf);
98        return $this->chatModel;
99    }
100
101    /**
102     * Access the Embedding Model
103     *
104     * @return EmbeddingInterface
105     */
106    public function getEmbedModel()
107    {
108        if ($this->embedModel instanceof EmbeddingInterface) {
109            return $this->embedModel;
110        }
111
112        [$namespace, $name] = sexplode(' ', $this->getConf('embedmodel'), 2);
113        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\EmbeddingModel';
114
115        if (!class_exists($class)) {
116            throw new \RuntimeException('No EmbeddingModel found for ' . $namespace);
117        }
118
119        $this->embedModel = new $class($name, $this->conf);
120        return $this->embedModel;
121    }
122
123
124    /**
125     * Access the Embeddings interface
126     *
127     * @return Embeddings
128     */
129    public function getEmbeddings()
130    {
131        if ($this->embeddings instanceof Embeddings) {
132            return $this->embeddings;
133        }
134
135        $this->embeddings = new Embeddings(
136            $this->getChatModel(),
137            $this->getEmbedModel(),
138            $this->getStorage(),
139            $this->conf
140        );
141        if ($this->logger) {
142            $this->embeddings->setLogger($this->logger);
143        }
144
145        return $this->embeddings;
146    }
147
148    /**
149     * Access the Storage interface
150     *
151     * @return AbstractStorage
152     */
153    public function getStorage()
154    {
155        if ($this->storage instanceof AbstractStorage) {
156            return $this->storage;
157        }
158
159        if ($this->getConf('pinecone_apikey')) {
160            $this->storage = new PineconeStorage();
161        } elseif ($this->getConf('chroma_baseurl')) {
162            $this->storage = new ChromaStorage();
163        } elseif ($this->getConf('qdrant_baseurl')) {
164            $this->storage = new QdrantStorage();
165        } else {
166            $this->storage = new SQLiteStorage();
167        }
168
169        if ($this->logger) {
170            $this->storage->setLogger($this->logger);
171        }
172
173        return $this->storage;
174    }
175
176    /**
177     * Ask a question with a chat history
178     *
179     * @param string $question
180     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
181     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
182     * @throws Exception
183     */
184    public function askChatQuestion($question, $history = [])
185    {
186        if ($history) {
187            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
188        } else {
189            $standaloneQuestion = $question;
190        }
191        return $this->askQuestion($standaloneQuestion, $history);
192    }
193
194    /**
195     * Ask a single standalone question
196     *
197     * @param string $question
198     * @param array $history [user, ai] of the previous question
199     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
200     * @throws Exception
201     */
202    public function askQuestion($question, $history = [])
203    {
204        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
205        if ($similar) {
206            $context = implode(
207                "\n",
208                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
209            );
210            $prompt = $this->getPrompt('question', [
211                'context' => $context,
212            ]);
213        } else {
214            $prompt = $this->getPrompt('noanswer');
215            $history = [];
216        }
217
218        $messages = $this->prepareMessages($prompt, $question, $history);
219        $answer = $this->getChatModel()->getAnswer($messages);
220
221        return [
222            'question' => $question,
223            'answer' => $answer,
224            'sources' => $similar,
225        ];
226    }
227
228    /**
229     * Rephrase a question into a standalone question based on the chat history
230     *
231     * @param string $question The original user question
232     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
233     * @return string The rephrased question
234     * @throws Exception
235     */
236    public function rephraseChatQuestion($question, $history)
237    {
238        $prompt = $this->getPrompt('rephrase');
239        $messages = $this->prepareMessages($prompt, $question, $history);
240        return $this->getChatModel()->getAnswer($messages);
241    }
242
243    /**
244     * Prepare the messages for the AI
245     *
246     * @param string $prompt The fully prepared system prompt
247     * @param string $question The user question
248     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
249     * @return array An OpenAI compatible array of messages
250     */
251    protected function prepareMessages($prompt, $question, $history)
252    {
253        // calculate the space for context
254        $remainingContext = $this->getChatModel()->getMaxInputTokenLength();
255        $remainingContext -= $this->countTokens($prompt);
256        $remainingContext -= $this->countTokens($question);
257        $safetyMargin = $remainingContext * 0.05; // 5% safety margin
258        $remainingContext -= $safetyMargin;
259        // FIXME we may want to also have an upper limit for the history and not always use the full context
260
261        $messages = $this->historyMessages($history, $remainingContext);
262        $messages[] = [
263            'role' => 'system',
264            'content' => $prompt
265        ];
266        $messages[] = [
267            'role' => 'user',
268            'content' => $question
269        ];
270        return $messages;
271    }
272
273    /**
274     * Create an array of OpenAI compatible messages from the given history
275     *
276     * Only as many messages are used as fit into the token limit
277     *
278     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
279     * @param int $tokenLimit
280     * @return array
281     */
282    protected function historyMessages($history, $tokenLimit)
283    {
284        $remainingContext = $tokenLimit;
285
286        $messages = [];
287        $history = array_reverse($history);
288        foreach ($history as $row) {
289            $length = $this->countTokens($row[0] . $row[1]);
290            if ($length > $remainingContext) {
291                break;
292            }
293            $remainingContext -= $length;
294
295            $messages[] = [
296                'role' => 'assistant',
297                'content' => $row[1]
298            ];
299            $messages[] = [
300                'role' => 'user',
301                'content' => $row[0]
302            ];
303        }
304        return array_reverse($messages);
305    }
306
307    /**
308     * Get an aproximation of the token count for the given text
309     *
310     * @param $text
311     * @return int
312     */
313    protected function countTokens($text)
314    {
315        return count($this->getEmbeddings()->getTokenEncoder()->encode($text));
316    }
317
318    /**
319     * Load the given prompt template and fill in the variables
320     *
321     * @param string $type
322     * @param string[] $vars
323     * @return string
324     */
325    protected function getPrompt($type, $vars = [])
326    {
327        $template = file_get_contents($this->localFN('prompt_' . $type));
328        $vars['language'] = $this->getLanguagePrompt();
329
330        $replace = [];
331        foreach ($vars as $key => $val) {
332            $replace['{{' . strtoupper($key) . '}}'] = $val;
333        }
334
335        return strtr($template, $replace);
336    }
337
338    /**
339     * Construct the prompt to define the answer language
340     *
341     * @return string
342     */
343    protected function getLanguagePrompt()
344    {
345        global $conf;
346        $isoLangnames = include(__DIR__ . '/lang/languages.php');
347
348        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
349
350        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
351            if (isset($isoLangnames[$conf['lang']])) {
352                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
353                return $languagePrompt;
354            }
355        }
356
357        $languagePrompt = 'Always answer in the user\'s language. ' .
358            "If you are unsure about the language, speak $currentLang.";
359        return $languagePrompt;
360    }
361
362    /**
363     * Should sources be limited to current language?
364     *
365     * @return string The current language code or empty string
366     */
367    public function getLanguageLimit()
368    {
369        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
370            global $conf;
371            return $conf['lang'];
372        } else {
373            return '';
374        }
375    }
376
377    /**
378     * Store info about the last run
379     *
380     * @param array $data
381     * @return void
382     */
383    public function setRunData(array $data)
384    {
385        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
386    }
387
388    /**
389     * Get info about the last run
390     *
391     * @return array
392     */
393    public function getRunData()
394    {
395        if (!file_exists($this->runDataFile)) {
396            return [];
397        }
398        return json_decode(file_get_contents($this->runDataFile), true);
399    }
400}
401