xref: /plugin/aichat/helper.php (revision e75dc39f0eaa9a83a7bf54421b7183cdc40b952d)
10337f47fSAndreas Gohr<?php
20337f47fSAndreas Gohr
33379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
45e6dd16eSAndreas Gohruse dokuwiki\Extension\Plugin;
5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat;
6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings;
8754b8394SAndreas Gohruse dokuwiki\plugin\aichat\Model\AbstractModel;
9f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Model\OpenAI\GPT35Turbo;
1001f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage;
115e6dd16eSAndreas Gohruse dokuwiki\plugin\aichat\Storage\ChromaStorage;
1213dbfc23SAndreas Gohruse dokuwiki\plugin\aichat\Storage\PineconeStorage;
134c0099a8SAndreas Gohruse dokuwiki\plugin\aichat\Storage\QdrantStorage;
14f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Storage\SQLiteStorage;
150337f47fSAndreas Gohr
160337f47fSAndreas Gohr/**
170337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component)
180337f47fSAndreas Gohr *
190337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
200337f47fSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
210337f47fSAndreas Gohr */
227ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin
230337f47fSAndreas Gohr{
243379af09SAndreas Gohr    /** @var CLIPlugin $logger */
253379af09SAndreas Gohr    protected $logger;
26f6ef2e50SAndreas Gohr    /** @var AbstractModel */
27f6ef2e50SAndreas Gohr    protected $model;
280337f47fSAndreas Gohr    /** @var Embeddings */
290337f47fSAndreas Gohr    protected $embeddings;
3001f06932SAndreas Gohr    /** @var AbstractStorage */
3101f06932SAndreas Gohr    protected $storage;
320337f47fSAndreas Gohr
33*e75dc39fSAndreas Gohr    /** @var array where to store meta data on the last run */
34*e75dc39fSAndreas Gohr    protected $runDataFile;
35*e75dc39fSAndreas Gohr
360337f47fSAndreas Gohr    /**
37f8d5ae01SAndreas Gohr     * Constructor. Initializes vendor autoloader
38f8d5ae01SAndreas Gohr     */
39f8d5ae01SAndreas Gohr    public function __construct()
40f8d5ae01SAndreas Gohr    {
41*e75dc39fSAndreas Gohr        require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards
42*e75dc39fSAndreas Gohr        global $conf;
43*e75dc39fSAndreas Gohr        $this->runDataFile = $conf['metadir'] . '/aichat__run.json';
44f8d5ae01SAndreas Gohr    }
45f8d5ae01SAndreas Gohr
46f8d5ae01SAndreas Gohr    /**
473379af09SAndreas Gohr     * Use the given CLI plugin for logging
483379af09SAndreas Gohr     *
493379af09SAndreas Gohr     * @param CLIPlugin $logger
503379af09SAndreas Gohr     * @return void
513379af09SAndreas Gohr     */
528285fff9SAndreas Gohr    public function setLogger($logger)
538285fff9SAndreas Gohr    {
543379af09SAndreas Gohr        $this->logger = $logger;
553379af09SAndreas Gohr    }
563379af09SAndreas Gohr
573379af09SAndreas Gohr    /**
58c4127b8eSAndreas Gohr     * Check if the current user is allowed to use the plugin (if it has been restricted)
59c4127b8eSAndreas Gohr     *
60c4127b8eSAndreas Gohr     * @return bool
61c4127b8eSAndreas Gohr     */
62c4127b8eSAndreas Gohr    public function userMayAccess()
63c4127b8eSAndreas Gohr    {
64c4127b8eSAndreas Gohr        global $auth;
65c4127b8eSAndreas Gohr        global $USERINFO;
66c4127b8eSAndreas Gohr        global $INPUT;
67c4127b8eSAndreas Gohr
68c4127b8eSAndreas Gohr        if (!$auth) return true;
69c4127b8eSAndreas Gohr        if (!$this->getConf('restrict')) return true;
70c4127b8eSAndreas Gohr        if (!isset($USERINFO)) return false;
71c4127b8eSAndreas Gohr
72c4127b8eSAndreas Gohr        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
73c4127b8eSAndreas Gohr    }
74c4127b8eSAndreas Gohr
75c4127b8eSAndreas Gohr    /**
760337f47fSAndreas Gohr     * Access the OpenAI client
770337f47fSAndreas Gohr     *
78f6ef2e50SAndreas Gohr     * @return GPT35Turbo
790337f47fSAndreas Gohr     */
80f6ef2e50SAndreas Gohr    public function getModel()
810337f47fSAndreas Gohr    {
827ebc7895Ssplitbrain        if (!$this->model instanceof AbstractModel) {
839f6b34c4SAndreas Gohr            $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
849f6b34c4SAndreas Gohr
859f6b34c4SAndreas Gohr            if (!class_exists($class)) {
869f6b34c4SAndreas Gohr                throw new \RuntimeException('Configured model not found: ' . $class);
879f6b34c4SAndreas Gohr            }
889f6b34c4SAndreas Gohr            // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
899f6b34c4SAndreas Gohr            $this->model = new $class([
909f6b34c4SAndreas Gohr                'key' => $this->getConf('openaikey'),
919f6b34c4SAndreas Gohr                'org' => $this->getConf('openaiorg')
929f6b34c4SAndreas Gohr            ]);
939f6b34c4SAndreas Gohr        }
949f6b34c4SAndreas Gohr
95f6ef2e50SAndreas Gohr        return $this->model;
960337f47fSAndreas Gohr    }
970337f47fSAndreas Gohr
980337f47fSAndreas Gohr    /**
990337f47fSAndreas Gohr     * Access the Embeddings interface
1000337f47fSAndreas Gohr     *
1010337f47fSAndreas Gohr     * @return Embeddings
1020337f47fSAndreas Gohr     */
1030337f47fSAndreas Gohr    public function getEmbeddings()
1040337f47fSAndreas Gohr    {
1057ebc7895Ssplitbrain        if (!$this->embeddings instanceof Embeddings) {
10601f06932SAndreas Gohr            $this->embeddings = new Embeddings($this->getModel(), $this->getStorage());
1073379af09SAndreas Gohr            if ($this->logger) {
1083379af09SAndreas Gohr                $this->embeddings->setLogger($this->logger);
1093379af09SAndreas Gohr            }
1109f6b34c4SAndreas Gohr        }
1119f6b34c4SAndreas Gohr
1120337f47fSAndreas Gohr        return $this->embeddings;
1130337f47fSAndreas Gohr    }
1140337f47fSAndreas Gohr
1150337f47fSAndreas Gohr    /**
11601f06932SAndreas Gohr     * Access the Storage interface
11701f06932SAndreas Gohr     *
11801f06932SAndreas Gohr     * @return AbstractStorage
11901f06932SAndreas Gohr     */
12001f06932SAndreas Gohr    public function getStorage()
12101f06932SAndreas Gohr    {
1227ebc7895Ssplitbrain        if (!$this->storage instanceof AbstractStorage) {
12313dbfc23SAndreas Gohr            if ($this->getConf('pinecone_apikey')) {
12413dbfc23SAndreas Gohr                $this->storage = new PineconeStorage();
1255e6dd16eSAndreas Gohr            } elseif ($this->getConf('chroma_baseurl')) {
1265e6dd16eSAndreas Gohr                $this->storage = new ChromaStorage();
1274c0099a8SAndreas Gohr            } elseif ($this->getConf('qdrant_baseurl')) {
1284c0099a8SAndreas Gohr                $this->storage = new QdrantStorage();
12913dbfc23SAndreas Gohr            } else {
13001f06932SAndreas Gohr                $this->storage = new SQLiteStorage();
13168b6fa79SAndreas Gohr            }
1328285fff9SAndreas Gohr
1333379af09SAndreas Gohr            if ($this->logger) {
1343379af09SAndreas Gohr                $this->storage->setLogger($this->logger);
1353379af09SAndreas Gohr            }
13601f06932SAndreas Gohr        }
13701f06932SAndreas Gohr
13801f06932SAndreas Gohr        return $this->storage;
13901f06932SAndreas Gohr    }
14001f06932SAndreas Gohr
14101f06932SAndreas Gohr    /**
1420337f47fSAndreas Gohr     * Ask a question with a chat history
1430337f47fSAndreas Gohr     *
1440337f47fSAndreas Gohr     * @param string $question
1450337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
1460337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1470337f47fSAndreas Gohr     * @throws Exception
1480337f47fSAndreas Gohr     */
1490337f47fSAndreas Gohr    public function askChatQuestion($question, $history = [])
1500337f47fSAndreas Gohr    {
1510337f47fSAndreas Gohr        if ($history) {
1520337f47fSAndreas Gohr            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
153754b8394SAndreas Gohr            $prev = end($history);
1540337f47fSAndreas Gohr        } else {
1550337f47fSAndreas Gohr            $standaloneQuestion = $question;
156754b8394SAndreas Gohr            $prev = [];
1570337f47fSAndreas Gohr        }
158754b8394SAndreas Gohr        return $this->askQuestion($standaloneQuestion, $prev);
1590337f47fSAndreas Gohr    }
1600337f47fSAndreas Gohr
1610337f47fSAndreas Gohr    /**
1620337f47fSAndreas Gohr     * Ask a single standalone question
1630337f47fSAndreas Gohr     *
1640337f47fSAndreas Gohr     * @param string $question
165754b8394SAndreas Gohr     * @param array $previous [user, ai] of the previous question
1660337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1670337f47fSAndreas Gohr     * @throws Exception
1680337f47fSAndreas Gohr     */
169754b8394SAndreas Gohr    public function askQuestion($question, $previous = [])
1700337f47fSAndreas Gohr    {
171e33a1d7aSAndreas Gohr        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
1729e81bea7SAndreas Gohr        if ($similar) {
173441edf84SAndreas Gohr            $context = implode(
174441edf84SAndreas Gohr                "\n",
175441edf84SAndreas Gohr                array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar)
176441edf84SAndreas Gohr            );
177219268b1SAndreas Gohr            $prompt = $this->getPrompt('question', [
178219268b1SAndreas Gohr                'context' => $context,
179219268b1SAndreas Gohr                'language' => $this->getLanguagePrompt()
180219268b1SAndreas Gohr            ]);
1819e81bea7SAndreas Gohr        } else {
1829e81bea7SAndreas Gohr            $prompt = $this->getPrompt('noanswer');
1839e81bea7SAndreas Gohr        }
18468908844SAndreas Gohr
1850337f47fSAndreas Gohr        $messages = [
1860337f47fSAndreas Gohr            [
1870337f47fSAndreas Gohr                'role' => 'system',
1880337f47fSAndreas Gohr                'content' => $prompt
1890337f47fSAndreas Gohr            ],
1900337f47fSAndreas Gohr            [
1910337f47fSAndreas Gohr                'role' => 'user',
1920337f47fSAndreas Gohr                'content' => $question
1930337f47fSAndreas Gohr            ]
1940337f47fSAndreas Gohr        ];
1950337f47fSAndreas Gohr
196754b8394SAndreas Gohr        if ($previous) {
197754b8394SAndreas Gohr            array_unshift($messages, [
198754b8394SAndreas Gohr                'role' => 'assistant',
199754b8394SAndreas Gohr                'content' => $previous[1]
200754b8394SAndreas Gohr            ]);
201754b8394SAndreas Gohr            array_unshift($messages, [
202754b8394SAndreas Gohr                'role' => 'user',
203754b8394SAndreas Gohr                'content' => $previous[0]
204754b8394SAndreas Gohr            ]);
205754b8394SAndreas Gohr        }
206754b8394SAndreas Gohr
2079f6b34c4SAndreas Gohr        $answer = $this->getModel()->getAnswer($messages);
2080337f47fSAndreas Gohr
2090337f47fSAndreas Gohr        return [
2100337f47fSAndreas Gohr            'question' => $question,
2110337f47fSAndreas Gohr            'answer' => $answer,
2120337f47fSAndreas Gohr            'sources' => $similar,
2130337f47fSAndreas Gohr        ];
2140337f47fSAndreas Gohr    }
2150337f47fSAndreas Gohr
2160337f47fSAndreas Gohr    /**
2170337f47fSAndreas Gohr     * Rephrase a question into a standalone question based on the chat history
2180337f47fSAndreas Gohr     *
2190337f47fSAndreas Gohr     * @param string $question The original user question
2200337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
2210337f47fSAndreas Gohr     * @return string The rephrased question
2220337f47fSAndreas Gohr     * @throws Exception
2230337f47fSAndreas Gohr     */
2240337f47fSAndreas Gohr    public function rephraseChatQuestion($question, $history)
2250337f47fSAndreas Gohr    {
2260337f47fSAndreas Gohr        // go back in history as far as possible without hitting the token limit
2270337f47fSAndreas Gohr        $chatHistory = '';
2280337f47fSAndreas Gohr        $history = array_reverse($history);
2290337f47fSAndreas Gohr        foreach ($history as $row) {
230f6ef2e50SAndreas Gohr            if (
2319f6b34c4SAndreas Gohr                count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) >
2329f6b34c4SAndreas Gohr                $this->getModel()->getMaxRephrasingTokenLength()
233f6ef2e50SAndreas Gohr            ) {
2340337f47fSAndreas Gohr                break;
2350337f47fSAndreas Gohr            }
2360337f47fSAndreas Gohr
2370337f47fSAndreas Gohr            $chatHistory =
2380337f47fSAndreas Gohr                "Human: " . $row[0] . "\n" .
2390337f47fSAndreas Gohr                "Assistant: " . $row[1] . "\n" .
2400337f47fSAndreas Gohr                $chatHistory;
2410337f47fSAndreas Gohr        }
2420337f47fSAndreas Gohr
2430337f47fSAndreas Gohr        // ask openAI to rephrase the question
2440337f47fSAndreas Gohr        $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]);
2450337f47fSAndreas Gohr        $messages = [['role' => 'user', 'content' => $prompt]];
2469f6b34c4SAndreas Gohr        return $this->getModel()->getRephrasedQuestion($messages);
2470337f47fSAndreas Gohr    }
2480337f47fSAndreas Gohr
2490337f47fSAndreas Gohr    /**
2500337f47fSAndreas Gohr     * Load the given prompt template and fill in the variables
2510337f47fSAndreas Gohr     *
2520337f47fSAndreas Gohr     * @param string $type
2530337f47fSAndreas Gohr     * @param string[] $vars
2540337f47fSAndreas Gohr     * @return string
2550337f47fSAndreas Gohr     */
2560337f47fSAndreas Gohr    protected function getPrompt($type, $vars = [])
2570337f47fSAndreas Gohr    {
2580337f47fSAndreas Gohr        $template = file_get_contents($this->localFN('prompt_' . $type));
2590337f47fSAndreas Gohr
2607ebc7895Ssplitbrain        $replace = [];
2610337f47fSAndreas Gohr        foreach ($vars as $key => $val) {
2620337f47fSAndreas Gohr            $replace['{{' . strtoupper($key) . '}}'] = $val;
2630337f47fSAndreas Gohr        }
2640337f47fSAndreas Gohr
2650337f47fSAndreas Gohr        return strtr($template, $replace);
2660337f47fSAndreas Gohr    }
267219268b1SAndreas Gohr
268219268b1SAndreas Gohr    /**
269219268b1SAndreas Gohr     * Construct the prompt to define the answer language
270219268b1SAndreas Gohr     *
271219268b1SAndreas Gohr     * @return string
272219268b1SAndreas Gohr     */
273219268b1SAndreas Gohr    protected function getLanguagePrompt()
274219268b1SAndreas Gohr    {
275219268b1SAndreas Gohr        global $conf;
276219268b1SAndreas Gohr
277e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
278219268b1SAndreas Gohr            $isoLangnames = include(__DIR__ . '/lang/languages.php');
279219268b1SAndreas Gohr            if (isset($isoLangnames[$conf['lang']])) {
280219268b1SAndreas Gohr                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
281219268b1SAndreas Gohr                return $languagePrompt;
282219268b1SAndreas Gohr            }
283219268b1SAndreas Gohr        }
284219268b1SAndreas Gohr
285219268b1SAndreas Gohr        $languagePrompt = 'Always answer in the user\'s language.';
286219268b1SAndreas Gohr        return $languagePrompt;
287219268b1SAndreas Gohr    }
288e33a1d7aSAndreas Gohr
289e33a1d7aSAndreas Gohr    /**
290e33a1d7aSAndreas Gohr     * Should sources be limited to current language?
291e33a1d7aSAndreas Gohr     *
292e33a1d7aSAndreas Gohr     * @return string The current language code or empty string
293e33a1d7aSAndreas Gohr     */
294e33a1d7aSAndreas Gohr    public function getLanguageLimit()
295e33a1d7aSAndreas Gohr    {
296e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
297e33a1d7aSAndreas Gohr            global $conf;
298e33a1d7aSAndreas Gohr            return $conf['lang'];
299e33a1d7aSAndreas Gohr        } else {
300e33a1d7aSAndreas Gohr            return '';
301e33a1d7aSAndreas Gohr        }
302e33a1d7aSAndreas Gohr    }
303*e75dc39fSAndreas Gohr
304*e75dc39fSAndreas Gohr    /**
305*e75dc39fSAndreas Gohr     * Store info about the last run
306*e75dc39fSAndreas Gohr     *
307*e75dc39fSAndreas Gohr     * @param array $data
308*e75dc39fSAndreas Gohr     * @return void
309*e75dc39fSAndreas Gohr     */
310*e75dc39fSAndreas Gohr    public function setRunData(array $data)
311*e75dc39fSAndreas Gohr    {
312*e75dc39fSAndreas Gohr        file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT));
313*e75dc39fSAndreas Gohr    }
314*e75dc39fSAndreas Gohr
315*e75dc39fSAndreas Gohr    /**
316*e75dc39fSAndreas Gohr     * Get info about the last run
317*e75dc39fSAndreas Gohr     *
318*e75dc39fSAndreas Gohr     * @return array
319*e75dc39fSAndreas Gohr     */
320*e75dc39fSAndreas Gohr    public function getRunData()
321*e75dc39fSAndreas Gohr    {
322*e75dc39fSAndreas Gohr        if (!file_exists($this->runDataFile)) {
323*e75dc39fSAndreas Gohr            return [];
324*e75dc39fSAndreas Gohr        }
325*e75dc39fSAndreas Gohr        return json_decode(file_get_contents($this->runDataFile), true);
326*e75dc39fSAndreas Gohr    }
3270337f47fSAndreas Gohr}
328