xref: /plugin/aichat/helper.php (revision 4c0099a889ba3b789f9c81b3cd963aadb567ea68)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\Extension\Plugin;
5use dokuwiki\plugin\aichat\AIChat;
6use dokuwiki\plugin\aichat\Chunk;
7use dokuwiki\plugin\aichat\Embeddings;
8use dokuwiki\plugin\aichat\Model\AbstractModel;
9use dokuwiki\plugin\aichat\Model\OpenAI\GPT35Turbo;
10use dokuwiki\plugin\aichat\Storage\AbstractStorage;
11use dokuwiki\plugin\aichat\Storage\ChromaStorage;
12use dokuwiki\plugin\aichat\Storage\PineconeStorage;
13use dokuwiki\plugin\aichat\Storage\QdrantStorage;
14use dokuwiki\plugin\aichat\Storage\SQLiteStorage;
15
16/**
17 * DokuWiki Plugin aichat (Helper Component)
18 *
19 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
20 * @author  Andreas Gohr <gohr@cosmocode.de>
21 */
22class helper_plugin_aichat extends Plugin
23{
24    /** @var CLIPlugin $logger */
25    protected $logger;
26    /** @var AbstractModel */
27    protected $model;
28    /** @var Embeddings */
29    protected $embeddings;
30    /** @var AbstractStorage */
31    protected $storage;
32
33    /**
34     * Constructor. Initializes vendor autoloader
35     */
36    public function __construct()
37    {
38        require_once __DIR__ . '/vendor/autoload.php';
39    }
40
41    /**
42     * Use the given CLI plugin for logging
43     *
44     * @param CLIPlugin $logger
45     * @return void
46     */
47    public function setLogger($logger)
48    {
49        $this->logger = $logger;
50    }
51
52    /**
53     * Check if the current user is allowed to use the plugin (if it has been restricted)
54     *
55     * @return bool
56     */
57    public function userMayAccess()
58    {
59        global $auth;
60        global $USERINFO;
61        global $INPUT;
62
63        if (!$auth) return true;
64        if (!$this->getConf('restrict')) return true;
65        if (!isset($USERINFO)) return false;
66
67        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
68    }
69
70    /**
71     * Access the OpenAI client
72     *
73     * @return GPT35Turbo
74     */
75    public function getModel()
76    {
77        if (!$this->model instanceof AbstractModel) {
78            $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
79
80            if (!class_exists($class)) {
81                throw new \RuntimeException('Configured model not found: ' . $class);
82            }
83            // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
84            $this->model = new $class([
85                'key' => $this->getConf('openaikey'),
86                'org' => $this->getConf('openaiorg')
87            ]);
88        }
89
90        return $this->model;
91    }
92
93    /**
94     * Access the Embeddings interface
95     *
96     * @return Embeddings
97     */
98    public function getEmbeddings()
99    {
100        if (!$this->embeddings instanceof Embeddings) {
101            $this->embeddings = new Embeddings($this->getModel(), $this->getStorage());
102            if ($this->logger) {
103                $this->embeddings->setLogger($this->logger);
104            }
105        }
106
107        return $this->embeddings;
108    }
109
110    /**
111     * Access the Storage interface
112     *
113     * @return AbstractStorage
114     */
115    public function getStorage()
116    {
117        if (!$this->storage instanceof AbstractStorage) {
118            if ($this->getConf('pinecone_apikey')) {
119                $this->storage = new PineconeStorage();
120            } elseif ($this->getConf('chroma_baseurl')) {
121                $this->storage = new ChromaStorage();
122            } elseif ($this->getConf('qdrant_baseurl')) {
123                $this->storage = new QdrantStorage();
124            } else {
125                $this->storage = new SQLiteStorage();
126            }
127
128            if ($this->logger) {
129                $this->storage->setLogger($this->logger);
130            }
131        }
132
133        return $this->storage;
134    }
135
136    /**
137     * Ask a question with a chat history
138     *
139     * @param string $question
140     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
141     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
142     * @throws Exception
143     */
144    public function askChatQuestion($question, $history = [])
145    {
146        if ($history) {
147            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
148            $prev = end($history);
149        } else {
150            $standaloneQuestion = $question;
151            $prev = [];
152        }
153        return $this->askQuestion($standaloneQuestion, $prev);
154    }
155
156    /**
157     * Ask a single standalone question
158     *
159     * @param string $question
160     * @param array $previous [user, ai] of the previous question
161     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
162     * @throws Exception
163     */
164    public function askQuestion($question, $previous = [])
165    {
166        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
167        if ($similar) {
168            $context = implode(
169                "\n",
170                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
171            );
172            $prompt = $this->getPrompt('question', [
173                'context' => $context,
174                'language' => $this->getLanguagePrompt()
175            ]);
176        } else {
177            $prompt = $this->getPrompt('noanswer');
178        }
179
180        $messages = [
181            [
182                'role' => 'system',
183                'content' => $prompt
184            ],
185            [
186                'role' => 'user',
187                'content' => $question
188            ]
189        ];
190
191        if ($previous) {
192            array_unshift($messages, [
193                'role' => 'assistant',
194                'content' => $previous[1]
195            ]);
196            array_unshift($messages, [
197                'role' => 'user',
198                'content' => $previous[0]
199            ]);
200        }
201
202        $answer = $this->getModel()->getAnswer($messages);
203
204        return [
205            'question' => $question,
206            'answer' => $answer,
207            'sources' => $similar,
208        ];
209    }
210
211    /**
212     * Rephrase a question into a standalone question based on the chat history
213     *
214     * @param string $question The original user question
215     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
216     * @return string The rephrased question
217     * @throws Exception
218     */
219    public function rephraseChatQuestion($question, $history)
220    {
221        // go back in history as far as possible without hitting the token limit
222        $chatHistory = '';
223        $history = array_reverse($history);
224        foreach ($history as $row) {
225            if (
226                count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) >
227                $this->getModel()->getMaxRephrasingTokenLength()
228            ) {
229                break;
230            }
231
232            $chatHistory =
233                "Human: " . $row[0] . "\n" .
234                "Assistant: " . $row[1] . "\n" .
235                $chatHistory;
236        }
237
238        // ask openAI to rephrase the question
239        $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]);
240        $messages = [['role' => 'user', 'content' => $prompt]];
241        return $this->getModel()->getRephrasedQuestion($messages);
242    }
243
244    /**
245     * Load the given prompt template and fill in the variables
246     *
247     * @param string $type
248     * @param string[] $vars
249     * @return string
250     */
251    protected function getPrompt($type, $vars = [])
252    {
253        $template = file_get_contents($this->localFN('prompt_' . $type));
254
255        $replace = [];
256        foreach ($vars as $key => $val) {
257            $replace['{{' . strtoupper($key) . '}}'] = $val;
258        }
259
260        return strtr($template, $replace);
261    }
262
263    /**
264     * Construct the prompt to define the answer language
265     *
266     * @return string
267     */
268    protected function getLanguagePrompt()
269    {
270        global $conf;
271
272        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
273            $isoLangnames = include(__DIR__ . '/lang/languages.php');
274            if (isset($isoLangnames[$conf['lang']])) {
275                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
276                return $languagePrompt;
277            }
278        }
279
280        $languagePrompt = 'Always answer in the user\'s language.';
281        return $languagePrompt;
282    }
283
284    /**
285     * Should sources be limited to current language?
286     *
287     * @return string The current language code or empty string
288     */
289    public function getLanguageLimit()
290    {
291        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
292            global $conf;
293            return $conf['lang'];
294        } else {
295            return '';
296        }
297    }
298}
299