xref: /plugin/aichat/helper.php (revision 34a1c47875552330ce367360d99f2c3f9f69af94)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\Extension\Plugin;
5use dokuwiki\plugin\aichat\AIChat;
6use dokuwiki\plugin\aichat\Chunk;
7use dokuwiki\plugin\aichat\Embeddings;
8use dokuwiki\plugin\aichat\Model\ChatInterface;
9use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
10use dokuwiki\plugin\aichat\Model\OpenAI\Embedding3Small;
11use dokuwiki\plugin\aichat\Model\OpenAI\EmbeddingAda02;
12use dokuwiki\plugin\aichat\Storage\AbstractStorage;
13use dokuwiki\plugin\aichat\Storage\ChromaStorage;
14use dokuwiki\plugin\aichat\Storage\PineconeStorage;
15use dokuwiki\plugin\aichat\Storage\QdrantStorage;
16use dokuwiki\plugin\aichat\Storage\SQLiteStorage;
17
18/**
19 * DokuWiki Plugin aichat (Helper Component)
20 *
21 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
22 * @author  Andreas Gohr <gohr@cosmocode.de>
23 */
24class helper_plugin_aichat extends Plugin
25{
26    /** @var CLIPlugin $logger */
27    protected $logger;
28    /** @var ChatInterface */
29    protected $chatModel;
30    /** @var EmbeddingInterface */
31    protected $embedModel;
32    /** @var Embeddings */
33    protected $embeddings;
34    /** @var AbstractStorage */
35    protected $storage;
36
37    /** @var array where to store meta data on the last run */
38    protected $runDataFile;
39
40    /**
41     * Constructor. Initializes vendor autoloader
42     */
43    public function __construct()
44    {
45        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
46        global $conf;
47        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
48        $this->loadConfig();
49    }
50
51    /**
52     * Use the given CLI plugin for logging
53     *
54     * @param CLIPlugin $logger
55     * @return void
56     */
57    public function setLogger($logger)
58    {
59        $this->logger = $logger;
60    }
61
62    /**
63     * Check if the current user is allowed to use the plugin (if it has been restricted)
64     *
65     * @return bool
66     */
67    public function userMayAccess()
68    {
69        global $auth;
70        global $USERINFO;
71        global $INPUT;
72
73        if (!$auth) return true;
74        if (!$this->getConf('restrict')) return true;
75        if (!isset($USERINFO)) return false;
76
77        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
78    }
79
80    /**
81     * Access the Chat Model
82     *
83     * @return ChatInterface
84     */
85    public function getChatModel()
86    {
87        if ($this->chatModel instanceof ChatInterface) {
88            return $this->chatModel;
89        }
90
91        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
92
93        //$class = Claude3Haiku::class;
94
95        if (!class_exists($class)) {
96            throw new \RuntimeException('Configured model not found: ' . $class);
97        }
98
99        // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
100        $this->chatModel = new $class($this->conf);
101
102        return $this->chatModel;
103    }
104
105    /**
106     * Access the Embedding Model
107     *
108     * @return EmbeddingInterface
109     */
110    public function getEmbedModel()
111    {
112        // FIXME this is hardcoded to OpenAI for now
113        if ($this->embedModel instanceof EmbeddingInterface) {
114            return $this->embedModel;
115        }
116
117        //$this->embedModel = new Embedding3Small($this->conf);
118        $this->embedModel = new EmbeddingAda02($this->conf);
119
120        return $this->embedModel;
121    }
122
123
124    /**
125     * Access the Embeddings interface
126     *
127     * @return Embeddings
128     */
129    public function getEmbeddings()
130    {
131        if ($this->embeddings instanceof Embeddings) {
132            return $this->embeddings;
133        }
134
135        $this->embeddings = new Embeddings(
136            $this->getChatModel(),
137            $this->getEmbedModel(),
138            $this->getStorage(),
139            $this->conf
140        );
141        if ($this->logger) {
142            $this->embeddings->setLogger($this->logger);
143        }
144
145        return $this->embeddings;
146    }
147
148    /**
149     * Access the Storage interface
150     *
151     * @return AbstractStorage
152     */
153    public function getStorage()
154    {
155        if ($this->storage instanceof AbstractStorage) {
156            return $this->storage;
157        }
158
159        if ($this->getConf('pinecone_apikey')) {
160            $this->storage = new PineconeStorage();
161        } elseif ($this->getConf('chroma_baseurl')) {
162            $this->storage = new ChromaStorage();
163        } elseif ($this->getConf('qdrant_baseurl')) {
164            $this->storage = new QdrantStorage();
165        } else {
166            $this->storage = new SQLiteStorage();
167        }
168
169        if ($this->logger) {
170            $this->storage->setLogger($this->logger);
171        }
172
173        return $this->storage;
174    }
175
176    /**
177     * Ask a question with a chat history
178     *
179     * @param string $question
180     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
181     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
182     * @throws Exception
183     */
184    public function askChatQuestion($question, $history = [])
185    {
186        if ($history) {
187            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
188        } else {
189            $standaloneQuestion = $question;
190        }
191        return $this->askQuestion($standaloneQuestion, $history);
192    }
193
194    /**
195     * Ask a single standalone question
196     *
197     * @param string $question
198     * @param array $history [user, ai] of the previous question
199     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
200     * @throws Exception
201     */
202    public function askQuestion($question, $history = [])
203    {
204        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
205        if ($similar) {
206            $context = implode(
207                "\n",
208                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
209            );
210            $prompt = $this->getPrompt('question', [
211                'context' => $context,
212            ]);
213        } else {
214            $prompt = $this->getPrompt('noanswer');
215            $history = [];
216        }
217
218        $messages = $this->prepareMessages($prompt, $question, $history);
219        $answer = $this->getChatModel()->getAnswer($messages);
220
221        return [
222            'question' => $question,
223            'answer' => $answer,
224            'sources' => $similar,
225        ];
226    }
227
228    /**
229     * Rephrase a question into a standalone question based on the chat history
230     *
231     * @param string $question The original user question
232     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
233     * @return string The rephrased question
234     * @throws Exception
235     */
236    public function rephraseChatQuestion($question, $history)
237    {
238        $prompt = $this->getPrompt('rephrase');
239        $messages = $this->prepareMessages($prompt, $question, $history);
240        return $this->getChatModel()->getAnswer($messages);
241    }
242
243    /**
244     * Prepare the messages for the AI
245     *
246     * @param string $prompt The fully prepared system prompt
247     * @param string $question The user question
248     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
249     * @return array An OpenAI compatible array of messages
250     */
251    protected function prepareMessages($prompt, $question, $history)
252    {
253        // calculate the space for context
254        $remainingContext = $this->getChatModel()->getMaxInputTokenLength();
255        $remainingContext -= $this->countTokens($prompt);
256        $remainingContext -= $this->countTokens($question);
257        $safetyMargin = $remainingContext * 0.05; // 5% safety margin
258        $remainingContext -= $safetyMargin;
259        // FIXME we may want to also have an upper limit for the history and not always use the full context
260
261        $messages = $this->historyMessages($history, $remainingContext);
262        $messages[] = [
263            'role' => 'system',
264            'content' => $prompt
265        ];
266        $messages[] = [
267            'role' => 'user',
268            'content' => $question
269        ];
270        return $messages;
271    }
272
273    /**
274     * Create an array of OpenAI compatible messages from the given history
275     *
276     * Only as many messages are used as fit into the token limit
277     *
278     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
279     * @param int $tokenLimit
280     * @return array
281     */
282    protected function historyMessages($history, $tokenLimit)
283    {
284        $remainingContext = $tokenLimit;
285
286        $messages = [];
287        $history = array_reverse($history);
288        foreach ($history as $row) {
289            $length = $this->countTokens($row[0] . $row[1]);
290            if ($length > $remainingContext) {
291                break;
292            }
293            $remainingContext -= $length;
294
295            $messages[] = [
296                'role' => 'assistant',
297                'content' => $row[1]
298            ];
299            $messages[] = [
300                'role' => 'user',
301                'content' => $row[0]
302            ];
303        }
304        return array_reverse($messages);
305    }
306
307    /**
308     * Get an aproximation of the token count for the given text
309     *
310     * @param $text
311     * @return int
312     */
313    protected function countTokens($text)
314    {
315        return count($this->getEmbeddings()->getTokenEncoder()->encode($text));
316    }
317
318    /**
319     * Load the given prompt template and fill in the variables
320     *
321     * @param string $type
322     * @param string[] $vars
323     * @return string
324     */
325    protected function getPrompt($type, $vars = [])
326    {
327        $template = file_get_contents($this->localFN('prompt_' . $type));
328        $vars['language'] = $this->getLanguagePrompt();
329
330        $replace = [];
331        foreach ($vars as $key => $val) {
332            $replace['{{' . strtoupper($key) . '}}'] = $val;
333        }
334
335        return strtr($template, $replace);
336    }
337
338    /**
339     * Construct the prompt to define the answer language
340     *
341     * @return string
342     */
343    protected function getLanguagePrompt()
344    {
345        global $conf;
346        $isoLangnames = include(__DIR__ . '/lang/languages.php');
347
348        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
349
350        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
351            if (isset($isoLangnames[$conf['lang']])) {
352                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
353                return $languagePrompt;
354            }
355        }
356
357        $languagePrompt = 'Always answer in the user\'s language. ' .
358            "If you are unsure about the language, speak $currentLang.";
359        return $languagePrompt;
360    }
361
362    /**
363     * Should sources be limited to current language?
364     *
365     * @return string The current language code or empty string
366     */
367    public function getLanguageLimit()
368    {
369        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
370            global $conf;
371            return $conf['lang'];
372        } else {
373            return '';
374        }
375    }
376
377    /**
378     * Store info about the last run
379     *
380     * @param array $data
381     * @return void
382     */
383    public function setRunData(array $data)
384    {
385        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
386    }
387
388    /**
389     * Get info about the last run
390     *
391     * @return array
392     */
393    public function getRunData()
394    {
395        if (!file_exists($this->runDataFile)) {
396            return [];
397        }
398        return json_decode(file_get_contents($this->runDataFile), true);
399    }
400}
401