xref: /plugin/aichat/helper.php (revision 294a9eaf76b94a3f99dceca7f1750a7898de3dd9)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\Extension\Plugin;
5use dokuwiki\plugin\aichat\AIChat;
6use dokuwiki\plugin\aichat\Chunk;
7use dokuwiki\plugin\aichat\Embeddings;
8use dokuwiki\plugin\aichat\Model\ChatInterface;
9use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
10use dokuwiki\plugin\aichat\Model\OpenAI\EmbeddingAda02;
11use dokuwiki\plugin\aichat\Storage\AbstractStorage;
12use dokuwiki\plugin\aichat\Storage\ChromaStorage;
13use dokuwiki\plugin\aichat\Storage\PineconeStorage;
14use dokuwiki\plugin\aichat\Storage\QdrantStorage;
15use dokuwiki\plugin\aichat\Storage\SQLiteStorage;
16
17/**
18 * DokuWiki Plugin aichat (Helper Component)
19 *
20 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
21 * @author  Andreas Gohr <gohr@cosmocode.de>
22 */
23class helper_plugin_aichat extends Plugin
24{
25    /** @var CLIPlugin $logger */
26    protected $logger;
27    /** @var ChatInterface */
28    protected $chatModel;
29    /** @var EmbeddingInterface */
30    protected $embedModel;
31    /** @var Embeddings */
32    protected $embeddings;
33    /** @var AbstractStorage */
34    protected $storage;
35
36    /** @var array where to store meta data on the last run */
37    protected $runDataFile;
38
39    /**
40     * Constructor. Initializes vendor autoloader
41     */
42    public function __construct()
43    {
44        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
45        global $conf;
46        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
47    }
48
49    /**
50     * Use the given CLI plugin for logging
51     *
52     * @param CLIPlugin $logger
53     * @return void
54     */
55    public function setLogger($logger)
56    {
57        $this->logger = $logger;
58    }
59
60    /**
61     * Check if the current user is allowed to use the plugin (if it has been restricted)
62     *
63     * @return bool
64     */
65    public function userMayAccess()
66    {
67        global $auth;
68        global $USERINFO;
69        global $INPUT;
70
71        if (!$auth) return true;
72        if (!$this->getConf('restrict')) return true;
73        if (!isset($USERINFO)) return false;
74
75        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
76    }
77
78    /**
79     * Access the Chat Model
80     *
81     * @return ChatInterface
82     */
83    public function getChatModel()
84    {
85        if ($this->chatModel instanceof ChatInterface) {
86            return $this->chatModel;
87        }
88
89        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
90
91        if (!class_exists($class)) {
92            throw new \RuntimeException('Configured model not found: ' . $class);
93        }
94        // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
95        $this->chatModel = new $class([
96            'key' => $this->getConf('openaikey'),
97            'org' => $this->getConf('openaiorg')
98        ]);
99
100        return $this->chatModel;
101    }
102
103    /**
104     * Access the Embedding Model
105     *
106     * @return EmbeddingInterface
107     */
108    public function getEmbedModel()
109    {
110        // FIXME this is hardcoded to OpenAI for now
111        if ($this->embedModel instanceof EmbeddingInterface) {
112            return $this->embedModel;
113        }
114
115
116        $this->embedModel = new EmbeddingAda02([
117            'key' => $this->getConf('openaikey'),
118            'org' => $this->getConf('openaiorg')
119        ]);
120
121        return $this->embedModel;
122    }
123
124
125    /**
126     * Access the Embeddings interface
127     *
128     * @return Embeddings
129     */
130    public function getEmbeddings()
131    {
132        if ($this->embeddings instanceof Embeddings) {
133            return $this->embeddings;
134        }
135
136        $this->embeddings = new Embeddings($this->getChatModel(), $this->getEmbedModel(), $this->getStorage());
137        if ($this->logger) {
138            $this->embeddings->setLogger($this->logger);
139        }
140
141        return $this->embeddings;
142    }
143
144    /**
145     * Access the Storage interface
146     *
147     * @return AbstractStorage
148     */
149    public function getStorage()
150    {
151        if ($this->storage instanceof AbstractStorage) {
152            return $this->storage;
153        }
154
155        if ($this->getConf('pinecone_apikey')) {
156            $this->storage = new PineconeStorage();
157        } elseif ($this->getConf('chroma_baseurl')) {
158            $this->storage = new ChromaStorage();
159        } elseif ($this->getConf('qdrant_baseurl')) {
160            $this->storage = new QdrantStorage();
161        } else {
162            $this->storage = new SQLiteStorage();
163        }
164
165        if ($this->logger) {
166            $this->storage->setLogger($this->logger);
167        }
168
169        return $this->storage;
170    }
171
172    /**
173     * Ask a question with a chat history
174     *
175     * @param string $question
176     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
177     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
178     * @throws Exception
179     */
180    public function askChatQuestion($question, $history = [])
181    {
182        if ($history) {
183            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
184            $prev = end($history);
185        } else {
186            $standaloneQuestion = $question;
187            $prev = [];
188        }
189        return $this->askQuestion($standaloneQuestion, $prev);
190    }
191
192    /**
193     * Ask a single standalone question
194     *
195     * @param string $question
196     * @param array $previous [user, ai] of the previous question
197     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
198     * @throws Exception
199     */
200    public function askQuestion($question, $previous = [])
201    {
202        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
203        if ($similar) {
204            $context = implode(
205                "\n",
206                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
207            );
208            $prompt = $this->getPrompt('question', [
209                'context' => $context,
210                'language' => $this->getLanguagePrompt()
211            ]);
212        } else {
213            $prompt = $this->getPrompt('noanswer') . ' ' . $this->getLanguagePrompt();
214        }
215
216        $messages = [
217            [
218                'role' => 'system',
219                'content' => $prompt
220            ],
221            [
222                'role' => 'user',
223                'content' => $question
224            ]
225        ];
226
227        if ($previous) {
228            array_unshift($messages, [
229                'role' => 'assistant',
230                'content' => $previous[1]
231            ]);
232            array_unshift($messages, [
233                'role' => 'user',
234                'content' => $previous[0]
235            ]);
236        }
237
238        $answer = $this->getChatModel()->getAnswer($messages);
239
240        return [
241            'question' => $question,
242            'answer' => $answer,
243            'sources' => $similar,
244        ];
245    }
246
247    /**
248     * Rephrase a question into a standalone question based on the chat history
249     *
250     * @param string $question The original user question
251     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
252     * @return string The rephrased question
253     * @throws Exception
254     */
255    public function rephraseChatQuestion($question, $history)
256    {
257        // go back in history as far as possible without hitting the token limit
258        $chatHistory = '';
259        $history = array_reverse($history);
260        foreach ($history as $row) {
261            if (
262                count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) >
263                $this->getChatModel()->getMaxRephrasingTokenLength()
264            ) {
265                break;
266            }
267
268            $chatHistory =
269                "Human: " . $row[0] . "\n" .
270                "Assistant: " . $row[1] . "\n" .
271                $chatHistory;
272        }
273
274        // ask openAI to rephrase the question
275        $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]);
276        $messages = [['role' => 'user', 'content' => $prompt]];
277        return $this->getChatModel()->getAnswer($messages);
278    }
279
280    /**
281     * Load the given prompt template and fill in the variables
282     *
283     * @param string $type
284     * @param string[] $vars
285     * @return string
286     */
287    protected function getPrompt($type, $vars = [])
288    {
289        $template = file_get_contents($this->localFN('prompt_' . $type));
290
291        $replace = [];
292        foreach ($vars as $key => $val) {
293            $replace['{{' . strtoupper($key) . '}}'] = $val;
294        }
295
296        return strtr($template, $replace);
297    }
298
299    /**
300     * Construct the prompt to define the answer language
301     *
302     * @return string
303     */
304    protected function getLanguagePrompt()
305    {
306        global $conf;
307        $isoLangnames = include(__DIR__ . '/lang/languages.php');
308
309        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
310
311        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
312            if (isset($isoLangnames[$conf['lang']])) {
313                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
314                return $languagePrompt;
315            }
316        }
317
318        $languagePrompt = 'Always answer in the user\'s language.' .
319            "If you are unsure about the language, speak $currentLang.";
320        return $languagePrompt;
321    }
322
323    /**
324     * Should sources be limited to current language?
325     *
326     * @return string The current language code or empty string
327     */
328    public function getLanguageLimit()
329    {
330        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
331            global $conf;
332            return $conf['lang'];
333        } else {
334            return '';
335        }
336    }
337
338    /**
339     * Store info about the last run
340     *
341     * @param array $data
342     * @return void
343     */
344    public function setRunData(array $data)
345    {
346        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
347    }
348
349    /**
350     * Get info about the last run
351     *
352     * @return array
353     */
354    public function getRunData()
355    {
356        if (!file_exists($this->runDataFile)) {
357            return [];
358        }
359        return json_decode(file_get_contents($this->runDataFile), true);
360    }
361}
362