xref: /plugin/aichat/helper.php (revision d02b793578c15c86b482725d129996df393f1890)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\Extension\Plugin;
5use dokuwiki\plugin\aichat\AIChat;
6use dokuwiki\plugin\aichat\Chunk;
7use dokuwiki\plugin\aichat\Embeddings;
8use dokuwiki\plugin\aichat\Model\ChatInterface;
9use dokuwiki\plugin\aichat\Model\EmbeddingInterface;
10use dokuwiki\plugin\aichat\Model\OpenAI\EmbeddingAda02;
11use dokuwiki\plugin\aichat\Storage\AbstractStorage;
12use dokuwiki\plugin\aichat\Storage\ChromaStorage;
13use dokuwiki\plugin\aichat\Storage\PineconeStorage;
14use dokuwiki\plugin\aichat\Storage\QdrantStorage;
15use dokuwiki\plugin\aichat\Storage\SQLiteStorage;
16
17/**
18 * DokuWiki Plugin aichat (Helper Component)
19 *
20 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
21 * @author  Andreas Gohr <gohr@cosmocode.de>
22 */
23class helper_plugin_aichat extends Plugin
24{
25    /** @var CLIPlugin $logger */
26    protected $logger;
27    /** @var ChatInterface */
28    protected $chatModel;
29    /** @var EmbeddingInterface */
30    protected $embedModel;
31    /** @var Embeddings */
32    protected $embeddings;
33    /** @var AbstractStorage */
34    protected $storage;
35
36    /** @var array where to store meta data on the last run */
37    protected $runDataFile;
38
39    /**
40     * Constructor. Initializes vendor autoloader
41     */
42    public function __construct()
43    {
44        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
45        global $conf;
46        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
47        $this->loadConfig();
48    }
49
50    /**
51     * Use the given CLI plugin for logging
52     *
53     * @param CLIPlugin $logger
54     * @return void
55     */
56    public function setLogger($logger)
57    {
58        $this->logger = $logger;
59    }
60
61    /**
62     * Check if the current user is allowed to use the plugin (if it has been restricted)
63     *
64     * @return bool
65     */
66    public function userMayAccess()
67    {
68        global $auth;
69        global $USERINFO;
70        global $INPUT;
71
72        if (!$auth) return true;
73        if (!$this->getConf('restrict')) return true;
74        if (!isset($USERINFO)) return false;
75
76        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
77    }
78
79    /**
80     * Access the Chat Model
81     *
82     * @return ChatInterface
83     */
84    public function getChatModel()
85    {
86        if ($this->chatModel instanceof ChatInterface) {
87            return $this->chatModel;
88        }
89
90        $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
91
92        //$class = Claude3Haiku::class;
93
94        if (!class_exists($class)) {
95            throw new \RuntimeException('Configured model not found: ' . $class);
96        }
97
98        // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
99        $this->chatModel = new $class($this->conf);
100
101        return $this->chatModel;
102    }
103
104    /**
105     * Access the Embedding Model
106     *
107     * @return EmbeddingInterface
108     */
109    public function getEmbedModel()
110    {
111        // FIXME this is hardcoded to OpenAI for now
112        if ($this->embedModel instanceof EmbeddingInterface) {
113            return $this->embedModel;
114        }
115
116        $this->embedModel = new EmbeddingAda02($this->conf);
117
118        return $this->embedModel;
119    }
120
121
122    /**
123     * Access the Embeddings interface
124     *
125     * @return Embeddings
126     */
127    public function getEmbeddings()
128    {
129        if ($this->embeddings instanceof Embeddings) {
130            return $this->embeddings;
131        }
132
133        $this->embeddings = new Embeddings($this->getChatModel(), $this->getEmbedModel(), $this->getStorage());
134        if ($this->logger) {
135            $this->embeddings->setLogger($this->logger);
136        }
137
138        return $this->embeddings;
139    }
140
141    /**
142     * Access the Storage interface
143     *
144     * @return AbstractStorage
145     */
146    public function getStorage()
147    {
148        if ($this->storage instanceof AbstractStorage) {
149            return $this->storage;
150        }
151
152        if ($this->getConf('pinecone_apikey')) {
153            $this->storage = new PineconeStorage();
154        } elseif ($this->getConf('chroma_baseurl')) {
155            $this->storage = new ChromaStorage();
156        } elseif ($this->getConf('qdrant_baseurl')) {
157            $this->storage = new QdrantStorage();
158        } else {
159            $this->storage = new SQLiteStorage();
160        }
161
162        if ($this->logger) {
163            $this->storage->setLogger($this->logger);
164        }
165
166        return $this->storage;
167    }
168
169    /**
170     * Ask a question with a chat history
171     *
172     * @param string $question
173     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
174     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
175     * @throws Exception
176     */
177    public function askChatQuestion($question, $history = [])
178    {
179        if ($history) {
180            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
181            $prev = end($history);
182        } else {
183            $standaloneQuestion = $question;
184            $prev = [];
185        }
186        return $this->askQuestion($standaloneQuestion, $prev);
187    }
188
189    /**
190     * Ask a single standalone question
191     *
192     * @param string $question
193     * @param array $previous [user, ai] of the previous question
194     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
195     * @throws Exception
196     */
197    public function askQuestion($question, $previous = [])
198    {
199        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
200        if ($similar) {
201            $context = implode(
202                "\n",
203                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
204            );
205            $prompt = $this->getPrompt('question', [
206                'context' => $context,
207                'language' => $this->getLanguagePrompt()
208            ]);
209        } else {
210            $prompt = $this->getPrompt('noanswer') . ' ' . $this->getLanguagePrompt();
211        }
212
213        $messages = [
214            [
215                'role' => 'system',
216                'content' => $prompt
217            ],
218            [
219                'role' => 'user',
220                'content' => $question
221            ]
222        ];
223
224        if ($previous) {
225            array_unshift($messages, [
226                'role' => 'assistant',
227                'content' => $previous[1]
228            ]);
229            array_unshift($messages, [
230                'role' => 'user',
231                'content' => $previous[0]
232            ]);
233        }
234
235        $answer = $this->getChatModel()->getAnswer($messages);
236
237        return [
238            'question' => $question,
239            'answer' => $answer,
240            'sources' => $similar,
241        ];
242    }
243
244    /**
245     * Rephrase a question into a standalone question based on the chat history
246     *
247     * @param string $question The original user question
248     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
249     * @return string The rephrased question
250     * @throws Exception
251     */
252    public function rephraseChatQuestion($question, $history)
253    {
254        // go back in history as far as possible without hitting the token limit
255        $chatHistory = '';
256        $history = array_reverse($history);
257        foreach ($history as $row) {
258            if (
259                count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) >
260                $this->getChatModel()->getMaxRephrasingTokenLength()
261            ) {
262                break;
263            }
264
265            $chatHistory =
266                "Human: " . $row[0] . "\n" .
267                "Assistant: " . $row[1] . "\n" .
268                $chatHistory;
269        }
270
271        // ask openAI to rephrase the question
272        $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]);
273        $messages = [['role' => 'user', 'content' => $prompt]];
274        return $this->getChatModel()->getAnswer($messages);
275    }
276
277    /**
278     * Load the given prompt template and fill in the variables
279     *
280     * @param string $type
281     * @param string[] $vars
282     * @return string
283     */
284    protected function getPrompt($type, $vars = [])
285    {
286        $template = file_get_contents($this->localFN('prompt_' . $type));
287
288        $replace = [];
289        foreach ($vars as $key => $val) {
290            $replace['{{' . strtoupper($key) . '}}'] = $val;
291        }
292
293        return strtr($template, $replace);
294    }
295
296    /**
297     * Construct the prompt to define the answer language
298     *
299     * @return string
300     */
301    protected function getLanguagePrompt()
302    {
303        global $conf;
304        $isoLangnames = include(__DIR__ . '/lang/languages.php');
305
306        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
307
308        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
309            if (isset($isoLangnames[$conf['lang']])) {
310                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
311                return $languagePrompt;
312            }
313        }
314
315        $languagePrompt = 'Always answer in the user\'s language.' .
316            "If you are unsure about the language, speak $currentLang.";
317        return $languagePrompt;
318    }
319
320    /**
321     * Should sources be limited to current language?
322     *
323     * @return string The current language code or empty string
324     */
325    public function getLanguageLimit()
326    {
327        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
328            global $conf;
329            return $conf['lang'];
330        } else {
331            return '';
332        }
333    }
334
335    /**
336     * Store info about the last run
337     *
338     * @param array $data
339     * @return void
340     */
341    public function setRunData(array $data)
342    {
343        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
344    }
345
346    /**
347     * Get info about the last run
348     *
349     * @return array
350     */
351    public function getRunData()
352    {
353        if (!file_exists($this->runDataFile)) {
354            return [];
355        }
356        return json_decode(file_get_contents($this->runDataFile), true);
357    }
358}
359