xref: /plugin/aichat/helper.php (revision f8d5ae013d1e8cb3669240e961cb98f1d60a5931)
1<?php
2
3use dokuwiki\Extension\Plugin;
4use dokuwiki\Extension\CLIPlugin;
5use dokuwiki\plugin\aichat\AIChat;
6use dokuwiki\plugin\aichat\Chunk;
7use dokuwiki\plugin\aichat\Embeddings;
8use dokuwiki\plugin\aichat\Model\AbstractModel;
9use dokuwiki\plugin\aichat\Model\OpenAI\GPT35Turbo;
10use dokuwiki\plugin\aichat\Storage\AbstractStorage;
11use dokuwiki\plugin\aichat\Storage\PineconeStorage;
12use dokuwiki\plugin\aichat\Storage\SQLiteStorage;
13
14/**
15 * DokuWiki Plugin aichat (Helper Component)
16 *
17 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
18 * @author  Andreas Gohr <gohr@cosmocode.de>
19 */
20class helper_plugin_aichat extends Plugin
21{
22    /** @var CLIPlugin $logger */
23    protected $logger;
24    /** @var AbstractModel */
25    protected $model;
26    /** @var Embeddings */
27    protected $embeddings;
28    /** @var AbstractStorage */
29    protected $storage;
30
31    /**
32     * Constructor. Initializes vendor autoloader
33     */
34    public function __construct()
35    {
36        require_once __DIR__ . '/vendor/autoload.php';
37    }
38
39    /**
40     * Use the given CLI plugin for logging
41     *
42     * @param CLIPlugin $logger
43     * @return void
44     */
45    public function setLogger($logger)
46    {
47        $this->logger = $logger;
48    }
49
50    /**
51     * Check if the current user is allowed to use the plugin (if it has been restricted)
52     *
53     * @return bool
54     */
55    public function userMayAccess()
56    {
57        global $auth;
58        global $USERINFO;
59        global $INPUT;
60
61        if (!$auth) return true;
62        if (!$this->getConf('restrict')) return true;
63        if (!isset($USERINFO)) return false;
64
65        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
66    }
67
68    /**
69     * Access the OpenAI client
70     *
71     * @return GPT35Turbo
72     */
73    public function getModel()
74    {
75        if (!$this->model instanceof AbstractModel) {
76            $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
77
78            if (!class_exists($class)) {
79                throw new \RuntimeException('Configured model not found: ' . $class);
80            }
81            // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
82            $this->model = new $class([
83                'key' => $this->getConf('openaikey'),
84                'org' => $this->getConf('openaiorg')
85            ]);
86        }
87
88        return $this->model;
89    }
90
91    /**
92     * Access the Embeddings interface
93     *
94     * @return Embeddings
95     */
96    public function getEmbeddings()
97    {
98        if (!$this->embeddings instanceof Embeddings) {
99            $this->embeddings = new Embeddings($this->getModel(), $this->getStorage());
100            if ($this->logger) {
101                $this->embeddings->setLogger($this->logger);
102            }
103        }
104
105        return $this->embeddings;
106    }
107
108    /**
109     * Access the Storage interface
110     *
111     * @return AbstractStorage
112     */
113    public function getStorage()
114    {
115        if (!$this->storage instanceof AbstractStorage) {
116            if ($this->getConf('pinecone_apikey')) {
117                $this->storage = new PineconeStorage();
118            } else {
119                $this->storage = new SQLiteStorage();
120            }
121
122            if ($this->logger) {
123                $this->storage->setLogger($this->logger);
124            }
125        }
126
127        return $this->storage;
128    }
129
130    /**
131     * Ask a question with a chat history
132     *
133     * @param string $question
134     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
135     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
136     * @throws Exception
137     */
138    public function askChatQuestion($question, $history = [])
139    {
140        if ($history) {
141            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
142            $prev = end($history);
143        } else {
144            $standaloneQuestion = $question;
145            $prev = [];
146        }
147        return $this->askQuestion($standaloneQuestion, $prev);
148    }
149
150    /**
151     * Ask a single standalone question
152     *
153     * @param string $question
154     * @param array $previous [user, ai] of the previous question
155     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
156     * @throws Exception
157     */
158    public function askQuestion($question, $previous = [])
159    {
160        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
161        if ($similar) {
162            $context = implode("\n", array_map(function (Chunk $chunk) {
163                return "\n```\n" . $chunk->getText() . "\n```\n";
164            }, $similar));
165            $prompt = $this->getPrompt('question', [
166                'context' => $context,
167                'language' => $this->getLanguagePrompt()
168            ]);
169        } else {
170            $prompt = $this->getPrompt('noanswer');
171        }
172
173        $messages = [
174            [
175                'role' => 'system',
176                'content' => $prompt
177            ],
178            [
179                'role' => 'user',
180                'content' => $question
181            ]
182        ];
183
184        if ($previous) {
185            array_unshift($messages, [
186                'role' => 'assistant',
187                'content' => $previous[1]
188            ]);
189            array_unshift($messages, [
190                'role' => 'user',
191                'content' => $previous[0]
192            ]);
193        }
194
195        $answer = $this->getModel()->getAnswer($messages);
196
197        return [
198            'question' => $question,
199            'answer' => $answer,
200            'sources' => $similar,
201        ];
202    }
203
204    /**
205     * Rephrase a question into a standalone question based on the chat history
206     *
207     * @param string $question The original user question
208     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
209     * @return string The rephrased question
210     * @throws Exception
211     */
212    public function rephraseChatQuestion($question, $history)
213    {
214        // go back in history as far as possible without hitting the token limit
215        $chatHistory = '';
216        $history = array_reverse($history);
217        foreach ($history as $row) {
218            if (
219                count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) >
220                $this->getModel()->getMaxRephrasingTokenLength()
221            ) {
222                break;
223            }
224
225            $chatHistory =
226                "Human: " . $row[0] . "\n" .
227                "Assistant: " . $row[1] . "\n" .
228                $chatHistory;
229        }
230
231        // ask openAI to rephrase the question
232        $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]);
233        $messages = [['role' => 'user', 'content' => $prompt]];
234        return $this->getModel()->getRephrasedQuestion($messages);
235    }
236
237    /**
238     * Load the given prompt template and fill in the variables
239     *
240     * @param string $type
241     * @param string[] $vars
242     * @return string
243     */
244    protected function getPrompt($type, $vars = [])
245    {
246        $template = file_get_contents($this->localFN('prompt_' . $type));
247
248        $replace = [];
249        foreach ($vars as $key => $val) {
250            $replace['{{' . strtoupper($key) . '}}'] = $val;
251        }
252
253        return strtr($template, $replace);
254    }
255
256    /**
257     * Construct the prompt to define the answer language
258     *
259     * @return string
260     */
261    protected function getLanguagePrompt()
262    {
263        global $conf;
264
265        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
266            $isoLangnames = include(__DIR__ . '/lang/languages.php');
267            if (isset($isoLangnames[$conf['lang']])) {
268                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
269                return $languagePrompt;
270            }
271        }
272
273        $languagePrompt = 'Always answer in the user\'s language.';
274        return $languagePrompt;
275    }
276
277    /**
278     * Should sources be limited to current language?
279     *
280     * @return string The current language code or empty string
281     */
282    public function getLanguageLimit()
283    {
284        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
285            global $conf;
286            return $conf['lang'];
287        } else {
288            return '';
289        }
290    }
291}
292