xref: /plugin/aichat/helper.php (revision cfaf6b321bcb5d5ea077a134a5f8812a379d20c1)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\Extension\Plugin;
5use dokuwiki\plugin\aichat\AIChat;
6use dokuwiki\plugin\aichat\Chunk;
7use dokuwiki\plugin\aichat\Embeddings;
8use dokuwiki\plugin\aichat\Model\AbstractModel;
9use dokuwiki\plugin\aichat\Model\OpenAI\GPT35Turbo;
10use dokuwiki\plugin\aichat\Storage\AbstractStorage;
11use dokuwiki\plugin\aichat\Storage\ChromaStorage;
12use dokuwiki\plugin\aichat\Storage\PineconeStorage;
13use dokuwiki\plugin\aichat\Storage\QdrantStorage;
14use dokuwiki\plugin\aichat\Storage\SQLiteStorage;
15
16/**
17 * DokuWiki Plugin aichat (Helper Component)
18 *
19 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
20 * @author  Andreas Gohr <gohr@cosmocode.de>
21 */
22class helper_plugin_aichat extends Plugin
23{
24    /** @var CLIPlugin $logger */
25    protected $logger;
26    /** @var AbstractModel */
27    protected $model;
28    /** @var Embeddings */
29    protected $embeddings;
30    /** @var AbstractStorage */
31    protected $storage;
32
33    /** @var array where to store meta data on the last run */
34    protected $runDataFile;
35
36    /**
37     * Constructor. Initializes vendor autoloader
38     */
39    public function __construct()
40    {
41        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
42        global $conf;
43        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
44    }
45
46    /**
47     * Use the given CLI plugin for logging
48     *
49     * @param CLIPlugin $logger
50     * @return void
51     */
52    public function setLogger($logger)
53    {
54        $this->logger = $logger;
55    }
56
57    /**
58     * Check if the current user is allowed to use the plugin (if it has been restricted)
59     *
60     * @return bool
61     */
62    public function userMayAccess()
63    {
64        global $auth;
65        global $USERINFO;
66        global $INPUT;
67
68        if (!$auth) return true;
69        if (!$this->getConf('restrict')) return true;
70        if (!isset($USERINFO)) return false;
71
72        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
73    }
74
75    /**
76     * Access the OpenAI client
77     *
78     * @return GPT35Turbo
79     */
80    public function getModel()
81    {
82        if (!$this->model instanceof AbstractModel) {
83            $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
84
85            if (!class_exists($class)) {
86                throw new \RuntimeException('Configured model not found: ' . $class);
87            }
88            // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
89            $this->model = new $class([
90                'key' => $this->getConf('openaikey'),
91                'org' => $this->getConf('openaiorg')
92            ]);
93        }
94
95        return $this->model;
96    }
97
98    /**
99     * Access the Embeddings interface
100     *
101     * @return Embeddings
102     */
103    public function getEmbeddings()
104    {
105        if (!$this->embeddings instanceof Embeddings) {
106            $this->embeddings = new Embeddings($this->getModel(), $this->getStorage());
107            if ($this->logger) {
108                $this->embeddings->setLogger($this->logger);
109            }
110        }
111
112        return $this->embeddings;
113    }
114
115    /**
116     * Access the Storage interface
117     *
118     * @return AbstractStorage
119     */
120    public function getStorage()
121    {
122        if (!$this->storage instanceof AbstractStorage) {
123            if ($this->getConf('pinecone_apikey')) {
124                $this->storage = new PineconeStorage();
125            } elseif ($this->getConf('chroma_baseurl')) {
126                $this->storage = new ChromaStorage();
127            } elseif ($this->getConf('qdrant_baseurl')) {
128                $this->storage = new QdrantStorage();
129            } else {
130                $this->storage = new SQLiteStorage();
131            }
132
133            if ($this->logger) {
134                $this->storage->setLogger($this->logger);
135            }
136        }
137
138        return $this->storage;
139    }
140
141    /**
142     * Ask a question with a chat history
143     *
144     * @param string $question
145     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
146     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
147     * @throws Exception
148     */
149    public function askChatQuestion($question, $history = [])
150    {
151        if ($history) {
152            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
153            $prev = end($history);
154        } else {
155            $standaloneQuestion = $question;
156            $prev = [];
157        }
158        return $this->askQuestion($standaloneQuestion, $prev);
159    }
160
161    /**
162     * Ask a single standalone question
163     *
164     * @param string $question
165     * @param array $previous [user, ai] of the previous question
166     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
167     * @throws Exception
168     */
169    public function askQuestion($question, $previous = [])
170    {
171        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
172        if ($similar) {
173            $context = implode(
174                "\n",
175                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
176            );
177            $prompt = $this->getPrompt('question', [
178                'context' => $context,
179                'language' => $this->getLanguagePrompt()
180            ]);
181        } else {
182            $prompt = $this->getPrompt('noanswer') . ' ' . $this->getLanguagePrompt();
183        }
184
185        $messages = [
186            [
187                'role' => 'system',
188                'content' => $prompt
189            ],
190            [
191                'role' => 'user',
192                'content' => $question
193            ]
194        ];
195
196        if ($previous) {
197            array_unshift($messages, [
198                'role' => 'assistant',
199                'content' => $previous[1]
200            ]);
201            array_unshift($messages, [
202                'role' => 'user',
203                'content' => $previous[0]
204            ]);
205        }
206
207        $answer = $this->getModel()->getAnswer($messages);
208
209        return [
210            'question' => $question,
211            'answer' => $answer,
212            'sources' => $similar,
213        ];
214    }
215
216    /**
217     * Rephrase a question into a standalone question based on the chat history
218     *
219     * @param string $question The original user question
220     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
221     * @return string The rephrased question
222     * @throws Exception
223     */
224    public function rephraseChatQuestion($question, $history)
225    {
226        // go back in history as far as possible without hitting the token limit
227        $chatHistory = '';
228        $history = array_reverse($history);
229        foreach ($history as $row) {
230            if (
231                count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) >
232                $this->getModel()->getMaxRephrasingTokenLength()
233            ) {
234                break;
235            }
236
237            $chatHistory =
238                "Human: " . $row[0] . "\n" .
239                "Assistant: " . $row[1] . "\n" .
240                $chatHistory;
241        }
242
243        // ask openAI to rephrase the question
244        $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]);
245        $messages = [['role' => 'user', 'content' => $prompt]];
246        return $this->getModel()->getRephrasedQuestion($messages);
247    }
248
249    /**
250     * Load the given prompt template and fill in the variables
251     *
252     * @param string $type
253     * @param string[] $vars
254     * @return string
255     */
256    protected function getPrompt($type, $vars = [])
257    {
258        $template = file_get_contents($this->localFN('prompt_' . $type));
259
260        $replace = [];
261        foreach ($vars as $key => $val) {
262            $replace['{{' . strtoupper($key) . '}}'] = $val;
263        }
264
265        return strtr($template, $replace);
266    }
267
268    /**
269     * Construct the prompt to define the answer language
270     *
271     * @return string
272     */
273    protected function getLanguagePrompt()
274    {
275        global $conf;
276        $isoLangnames = include(__DIR__ . '/lang/languages.php');
277
278        $currentLang = $isoLangnames[$conf['lang']] ?? 'English';
279
280        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
281            if (isset($isoLangnames[$conf['lang']])) {
282                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
283                return $languagePrompt;
284            }
285        }
286
287        $languagePrompt = 'Always answer in the user\'s language.' .
288            "If you are unsure about the language, speak $currentLang.";
289        return $languagePrompt;
290    }
291
292    /**
293     * Should sources be limited to current language?
294     *
295     * @return string The current language code or empty string
296     */
297    public function getLanguageLimit()
298    {
299        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
300            global $conf;
301            return $conf['lang'];
302        } else {
303            return '';
304        }
305    }
306
307    /**
308     * Store info about the last run
309     *
310     * @param array $data
311     * @return void
312     */
313    public function setRunData(array $data)
314    {
315        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
316    }
317
318    /**
319     * Get info about the last run
320     *
321     * @return array
322     */
323    public function getRunData()
324    {
325        if (!file_exists($this->runDataFile)) {
326            return [];
327        }
328        return json_decode(file_get_contents($this->runDataFile), true);
329    }
330}
331