xref: /plugin/aichat/helper.php (revision 441edf84da4c031892d23b5809b2adfb59c6d774)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\Extension\Plugin;
5use dokuwiki\plugin\aichat\AIChat;
6use dokuwiki\plugin\aichat\Chunk;
7use dokuwiki\plugin\aichat\Embeddings;
8use dokuwiki\plugin\aichat\Model\AbstractModel;
9use dokuwiki\plugin\aichat\Model\OpenAI\GPT35Turbo;
10use dokuwiki\plugin\aichat\Storage\AbstractStorage;
11use dokuwiki\plugin\aichat\Storage\ChromaStorage;
12use dokuwiki\plugin\aichat\Storage\PineconeStorage;
13use dokuwiki\plugin\aichat\Storage\SQLiteStorage;
14
15/**
16 * DokuWiki Plugin aichat (Helper Component)
17 *
18 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
19 * @author  Andreas Gohr <gohr@cosmocode.de>
20 */
21class helper_plugin_aichat extends Plugin
22{
23    /** @var CLIPlugin $logger */
24    protected $logger;
25    /** @var AbstractModel */
26    protected $model;
27    /** @var Embeddings */
28    protected $embeddings;
29    /** @var AbstractStorage */
30    protected $storage;
31
32    /**
33     * Constructor. Initializes vendor autoloader
34     */
35    public function __construct()
36    {
37        require_once __DIR__ . '/vendor/autoload.php';
38    }
39
40    /**
41     * Use the given CLI plugin for logging
42     *
43     * @param CLIPlugin $logger
44     * @return void
45     */
46    public function setLogger($logger)
47    {
48        $this->logger = $logger;
49    }
50
51    /**
52     * Check if the current user is allowed to use the plugin (if it has been restricted)
53     *
54     * @return bool
55     */
56    public function userMayAccess()
57    {
58        global $auth;
59        global $USERINFO;
60        global $INPUT;
61
62        if (!$auth) return true;
63        if (!$this->getConf('restrict')) return true;
64        if (!isset($USERINFO)) return false;
65
66        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
67    }
68
69    /**
70     * Access the OpenAI client
71     *
72     * @return GPT35Turbo
73     */
74    public function getModel()
75    {
76        if (!$this->model instanceof AbstractModel) {
77            $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
78
79            if (!class_exists($class)) {
80                throw new \RuntimeException('Configured model not found: ' . $class);
81            }
82            // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
83            $this->model = new $class([
84                'key' => $this->getConf('openaikey'),
85                'org' => $this->getConf('openaiorg')
86            ]);
87        }
88
89        return $this->model;
90    }
91
92    /**
93     * Access the Embeddings interface
94     *
95     * @return Embeddings
96     */
97    public function getEmbeddings()
98    {
99        if (!$this->embeddings instanceof Embeddings) {
100            $this->embeddings = new Embeddings($this->getModel(), $this->getStorage());
101            if ($this->logger) {
102                $this->embeddings->setLogger($this->logger);
103            }
104        }
105
106        return $this->embeddings;
107    }
108
109    /**
110     * Access the Storage interface
111     *
112     * @return AbstractStorage
113     */
114    public function getStorage()
115    {
116        if (!$this->storage instanceof AbstractStorage) {
117            if ($this->getConf('pinecone_apikey')) {
118                $this->storage = new PineconeStorage();
119            } elseif ($this->getConf('chroma_baseurl')) {
120                $this->storage = new ChromaStorage();
121            } else {
122                $this->storage = new SQLiteStorage();
123            }
124
125            if ($this->logger) {
126                $this->storage->setLogger($this->logger);
127            }
128        }
129
130        return $this->storage;
131    }
132
133    /**
134     * Ask a question with a chat history
135     *
136     * @param string $question
137     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
138     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
139     * @throws Exception
140     */
141    public function askChatQuestion($question, $history = [])
142    {
143        if ($history) {
144            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
145            $prev = end($history);
146        } else {
147            $standaloneQuestion = $question;
148            $prev = [];
149        }
150        return $this->askQuestion($standaloneQuestion, $prev);
151    }
152
153    /**
154     * Ask a single standalone question
155     *
156     * @param string $question
157     * @param array $previous [user, ai] of the previous question
158     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
159     * @throws Exception
160     */
161    public function askQuestion($question, $previous = [])
162    {
163        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
164        if ($similar) {
165            $context = implode(
166                "\n",
167                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
168            );
169            $prompt = $this->getPrompt('question', [
170                'context' => $context,
171                'language' => $this->getLanguagePrompt()
172            ]);
173        } else {
174            $prompt = $this->getPrompt('noanswer');
175        }
176
177        $messages = [
178            [
179                'role' => 'system',
180                'content' => $prompt
181            ],
182            [
183                'role' => 'user',
184                'content' => $question
185            ]
186        ];
187
188        if ($previous) {
189            array_unshift($messages, [
190                'role' => 'assistant',
191                'content' => $previous[1]
192            ]);
193            array_unshift($messages, [
194                'role' => 'user',
195                'content' => $previous[0]
196            ]);
197        }
198
199        $answer = $this->getModel()->getAnswer($messages);
200
201        return [
202            'question' => $question,
203            'answer' => $answer,
204            'sources' => $similar,
205        ];
206    }
207
208    /**
209     * Rephrase a question into a standalone question based on the chat history
210     *
211     * @param string $question The original user question
212     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
213     * @return string The rephrased question
214     * @throws Exception
215     */
216    public function rephraseChatQuestion($question, $history)
217    {
218        // go back in history as far as possible without hitting the token limit
219        $chatHistory = '';
220        $history = array_reverse($history);
221        foreach ($history as $row) {
222            if (
223                count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) >
224                $this->getModel()->getMaxRephrasingTokenLength()
225            ) {
226                break;
227            }
228
229            $chatHistory =
230                "Human: " . $row[0] . "\n" .
231                "Assistant: " . $row[1] . "\n" .
232                $chatHistory;
233        }
234
235        // ask openAI to rephrase the question
236        $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]);
237        $messages = [['role' => 'user', 'content' => $prompt]];
238        return $this->getModel()->getRephrasedQuestion($messages);
239    }
240
241    /**
242     * Load the given prompt template and fill in the variables
243     *
244     * @param string $type
245     * @param string[] $vars
246     * @return string
247     */
248    protected function getPrompt($type, $vars = [])
249    {
250        $template = file_get_contents($this->localFN('prompt_' . $type));
251
252        $replace = [];
253        foreach ($vars as $key => $val) {
254            $replace['{{' . strtoupper($key) . '}}'] = $val;
255        }
256
257        return strtr($template, $replace);
258    }
259
260    /**
261     * Construct the prompt to define the answer language
262     *
263     * @return string
264     */
265    protected function getLanguagePrompt()
266    {
267        global $conf;
268
269        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
270            $isoLangnames = include(__DIR__ . '/lang/languages.php');
271            if (isset($isoLangnames[$conf['lang']])) {
272                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
273                return $languagePrompt;
274            }
275        }
276
277        $languagePrompt = 'Always answer in the user\'s language.';
278        return $languagePrompt;
279    }
280
281    /**
282     * Should sources be limited to current language?
283     *
284     * @return string The current language code or empty string
285     */
286    public function getLanguageLimit()
287    {
288        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
289            global $conf;
290            return $conf['lang'];
291        } else {
292            return '';
293        }
294    }
295}
296