xref: /plugin/aichat/helper.php (revision f8d5ae013d1e8cb3669240e961cb98f1d60a5931)
10337f47fSAndreas Gohr<?php
20337f47fSAndreas Gohr
37ebc7895Ssplitbrainuse dokuwiki\Extension\Plugin;
43379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat;
6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings;
8754b8394SAndreas Gohruse dokuwiki\plugin\aichat\Model\AbstractModel;
9f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Model\OpenAI\GPT35Turbo;
1001f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage;
1113dbfc23SAndreas Gohruse dokuwiki\plugin\aichat\Storage\PineconeStorage;
12f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Storage\SQLiteStorage;
130337f47fSAndreas Gohr
140337f47fSAndreas Gohr/**
150337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component)
160337f47fSAndreas Gohr *
170337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
180337f47fSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
190337f47fSAndreas Gohr */
207ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin
210337f47fSAndreas Gohr{
223379af09SAndreas Gohr    /** @var CLIPlugin $logger */
233379af09SAndreas Gohr    protected $logger;
24f6ef2e50SAndreas Gohr    /** @var AbstractModel */
25f6ef2e50SAndreas Gohr    protected $model;
260337f47fSAndreas Gohr    /** @var Embeddings */
270337f47fSAndreas Gohr    protected $embeddings;
2801f06932SAndreas Gohr    /** @var AbstractStorage */
2901f06932SAndreas Gohr    protected $storage;
300337f47fSAndreas Gohr
310337f47fSAndreas Gohr    /**
32*f8d5ae01SAndreas Gohr     * Constructor. Initializes vendor autoloader
33*f8d5ae01SAndreas Gohr     */
34*f8d5ae01SAndreas Gohr    public function __construct()
35*f8d5ae01SAndreas Gohr    {
36*f8d5ae01SAndreas Gohr        require_once __DIR__ . '/vendor/autoload.php';
37*f8d5ae01SAndreas Gohr    }
38*f8d5ae01SAndreas Gohr
39*f8d5ae01SAndreas Gohr    /**
403379af09SAndreas Gohr     * Use the given CLI plugin for logging
413379af09SAndreas Gohr     *
423379af09SAndreas Gohr     * @param CLIPlugin $logger
433379af09SAndreas Gohr     * @return void
443379af09SAndreas Gohr     */
458285fff9SAndreas Gohr    public function setLogger($logger)
468285fff9SAndreas Gohr    {
473379af09SAndreas Gohr        $this->logger = $logger;
483379af09SAndreas Gohr    }
493379af09SAndreas Gohr
503379af09SAndreas Gohr    /**
51c4127b8eSAndreas Gohr     * Check if the current user is allowed to use the plugin (if it has been restricted)
52c4127b8eSAndreas Gohr     *
53c4127b8eSAndreas Gohr     * @return bool
54c4127b8eSAndreas Gohr     */
55c4127b8eSAndreas Gohr    public function userMayAccess()
56c4127b8eSAndreas Gohr    {
57c4127b8eSAndreas Gohr        global $auth;
58c4127b8eSAndreas Gohr        global $USERINFO;
59c4127b8eSAndreas Gohr        global $INPUT;
60c4127b8eSAndreas Gohr
61c4127b8eSAndreas Gohr        if (!$auth) return true;
62c4127b8eSAndreas Gohr        if (!$this->getConf('restrict')) return true;
63c4127b8eSAndreas Gohr        if (!isset($USERINFO)) return false;
64c4127b8eSAndreas Gohr
65c4127b8eSAndreas Gohr        return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']);
66c4127b8eSAndreas Gohr    }
67c4127b8eSAndreas Gohr
68c4127b8eSAndreas Gohr    /**
690337f47fSAndreas Gohr     * Access the OpenAI client
700337f47fSAndreas Gohr     *
71f6ef2e50SAndreas Gohr     * @return GPT35Turbo
720337f47fSAndreas Gohr     */
73f6ef2e50SAndreas Gohr    public function getModel()
740337f47fSAndreas Gohr    {
757ebc7895Ssplitbrain        if (!$this->model instanceof AbstractModel) {
769f6b34c4SAndreas Gohr            $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model');
779f6b34c4SAndreas Gohr
789f6b34c4SAndreas Gohr            if (!class_exists($class)) {
799f6b34c4SAndreas Gohr                throw new \RuntimeException('Configured model not found: ' . $class);
809f6b34c4SAndreas Gohr            }
819f6b34c4SAndreas Gohr            // FIXME for now we only have OpenAI models, so we can hardcode the auth setup
829f6b34c4SAndreas Gohr            $this->model = new $class([
839f6b34c4SAndreas Gohr                'key' => $this->getConf('openaikey'),
849f6b34c4SAndreas Gohr                'org' => $this->getConf('openaiorg')
859f6b34c4SAndreas Gohr            ]);
869f6b34c4SAndreas Gohr        }
879f6b34c4SAndreas Gohr
88f6ef2e50SAndreas Gohr        return $this->model;
890337f47fSAndreas Gohr    }
900337f47fSAndreas Gohr
910337f47fSAndreas Gohr    /**
920337f47fSAndreas Gohr     * Access the Embeddings interface
930337f47fSAndreas Gohr     *
940337f47fSAndreas Gohr     * @return Embeddings
950337f47fSAndreas Gohr     */
960337f47fSAndreas Gohr    public function getEmbeddings()
970337f47fSAndreas Gohr    {
987ebc7895Ssplitbrain        if (!$this->embeddings instanceof Embeddings) {
9901f06932SAndreas Gohr            $this->embeddings = new Embeddings($this->getModel(), $this->getStorage());
1003379af09SAndreas Gohr            if ($this->logger) {
1013379af09SAndreas Gohr                $this->embeddings->setLogger($this->logger);
1023379af09SAndreas Gohr            }
1039f6b34c4SAndreas Gohr        }
1049f6b34c4SAndreas Gohr
1050337f47fSAndreas Gohr        return $this->embeddings;
1060337f47fSAndreas Gohr    }
1070337f47fSAndreas Gohr
1080337f47fSAndreas Gohr    /**
10901f06932SAndreas Gohr     * Access the Storage interface
11001f06932SAndreas Gohr     *
11101f06932SAndreas Gohr     * @return AbstractStorage
11201f06932SAndreas Gohr     */
11301f06932SAndreas Gohr    public function getStorage()
11401f06932SAndreas Gohr    {
1157ebc7895Ssplitbrain        if (!$this->storage instanceof AbstractStorage) {
11613dbfc23SAndreas Gohr            if ($this->getConf('pinecone_apikey')) {
11713dbfc23SAndreas Gohr                $this->storage = new PineconeStorage();
11813dbfc23SAndreas Gohr            } else {
11901f06932SAndreas Gohr                $this->storage = new SQLiteStorage();
12068b6fa79SAndreas Gohr            }
1218285fff9SAndreas Gohr
1223379af09SAndreas Gohr            if ($this->logger) {
1233379af09SAndreas Gohr                $this->storage->setLogger($this->logger);
1243379af09SAndreas Gohr            }
12501f06932SAndreas Gohr        }
12601f06932SAndreas Gohr
12701f06932SAndreas Gohr        return $this->storage;
12801f06932SAndreas Gohr    }
12901f06932SAndreas Gohr
13001f06932SAndreas Gohr    /**
1310337f47fSAndreas Gohr     * Ask a question with a chat history
1320337f47fSAndreas Gohr     *
1330337f47fSAndreas Gohr     * @param string $question
1340337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
1350337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1360337f47fSAndreas Gohr     * @throws Exception
1370337f47fSAndreas Gohr     */
1380337f47fSAndreas Gohr    public function askChatQuestion($question, $history = [])
1390337f47fSAndreas Gohr    {
1400337f47fSAndreas Gohr        if ($history) {
1410337f47fSAndreas Gohr            $standaloneQuestion = $this->rephraseChatQuestion($question, $history);
142754b8394SAndreas Gohr            $prev = end($history);
1430337f47fSAndreas Gohr        } else {
1440337f47fSAndreas Gohr            $standaloneQuestion = $question;
145754b8394SAndreas Gohr            $prev = [];
1460337f47fSAndreas Gohr        }
147754b8394SAndreas Gohr        return $this->askQuestion($standaloneQuestion, $prev);
1480337f47fSAndreas Gohr    }
1490337f47fSAndreas Gohr
1500337f47fSAndreas Gohr    /**
1510337f47fSAndreas Gohr     * Ask a single standalone question
1520337f47fSAndreas Gohr     *
1530337f47fSAndreas Gohr     * @param string $question
154754b8394SAndreas Gohr     * @param array $previous [user, ai] of the previous question
1550337f47fSAndreas Gohr     * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources]
1560337f47fSAndreas Gohr     * @throws Exception
1570337f47fSAndreas Gohr     */
158754b8394SAndreas Gohr    public function askQuestion($question, $previous = [])
1590337f47fSAndreas Gohr    {
160e33a1d7aSAndreas Gohr        $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit());
1619e81bea7SAndreas Gohr        if ($similar) {
16255392016SAndreas Gohr            $context = implode("\n", array_map(function (Chunk $chunk) {
16368908844SAndreas Gohr                return "\n```\n" . $chunk->getText() . "\n```\n";
16455392016SAndreas Gohr            }, $similar));
165219268b1SAndreas Gohr            $prompt = $this->getPrompt('question', [
166219268b1SAndreas Gohr                'context' => $context,
167219268b1SAndreas Gohr                'language' => $this->getLanguagePrompt()
168219268b1SAndreas Gohr            ]);
1699e81bea7SAndreas Gohr        } else {
1709e81bea7SAndreas Gohr            $prompt = $this->getPrompt('noanswer');
1719e81bea7SAndreas Gohr        }
17268908844SAndreas Gohr
1730337f47fSAndreas Gohr        $messages = [
1740337f47fSAndreas Gohr            [
1750337f47fSAndreas Gohr                'role' => 'system',
1760337f47fSAndreas Gohr                'content' => $prompt
1770337f47fSAndreas Gohr            ],
1780337f47fSAndreas Gohr            [
1790337f47fSAndreas Gohr                'role' => 'user',
1800337f47fSAndreas Gohr                'content' => $question
1810337f47fSAndreas Gohr            ]
1820337f47fSAndreas Gohr        ];
1830337f47fSAndreas Gohr
184754b8394SAndreas Gohr        if ($previous) {
185754b8394SAndreas Gohr            array_unshift($messages, [
186754b8394SAndreas Gohr                'role' => 'assistant',
187754b8394SAndreas Gohr                'content' => $previous[1]
188754b8394SAndreas Gohr            ]);
189754b8394SAndreas Gohr            array_unshift($messages, [
190754b8394SAndreas Gohr                'role' => 'user',
191754b8394SAndreas Gohr                'content' => $previous[0]
192754b8394SAndreas Gohr            ]);
193754b8394SAndreas Gohr        }
194754b8394SAndreas Gohr
1959f6b34c4SAndreas Gohr        $answer = $this->getModel()->getAnswer($messages);
1960337f47fSAndreas Gohr
1970337f47fSAndreas Gohr        return [
1980337f47fSAndreas Gohr            'question' => $question,
1990337f47fSAndreas Gohr            'answer' => $answer,
2000337f47fSAndreas Gohr            'sources' => $similar,
2010337f47fSAndreas Gohr        ];
2020337f47fSAndreas Gohr    }
2030337f47fSAndreas Gohr
2040337f47fSAndreas Gohr    /**
2050337f47fSAndreas Gohr     * Rephrase a question into a standalone question based on the chat history
2060337f47fSAndreas Gohr     *
2070337f47fSAndreas Gohr     * @param string $question The original user question
2080337f47fSAndreas Gohr     * @param array[] $history The chat history [[user, ai], [user, ai], ...]
2090337f47fSAndreas Gohr     * @return string The rephrased question
2100337f47fSAndreas Gohr     * @throws Exception
2110337f47fSAndreas Gohr     */
2120337f47fSAndreas Gohr    public function rephraseChatQuestion($question, $history)
2130337f47fSAndreas Gohr    {
2140337f47fSAndreas Gohr        // go back in history as far as possible without hitting the token limit
2150337f47fSAndreas Gohr        $chatHistory = '';
2160337f47fSAndreas Gohr        $history = array_reverse($history);
2170337f47fSAndreas Gohr        foreach ($history as $row) {
218f6ef2e50SAndreas Gohr            if (
2199f6b34c4SAndreas Gohr                count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) >
2209f6b34c4SAndreas Gohr                $this->getModel()->getMaxRephrasingTokenLength()
221f6ef2e50SAndreas Gohr            ) {
2220337f47fSAndreas Gohr                break;
2230337f47fSAndreas Gohr            }
2240337f47fSAndreas Gohr
2250337f47fSAndreas Gohr            $chatHistory =
2260337f47fSAndreas Gohr                "Human: " . $row[0] . "\n" .
2270337f47fSAndreas Gohr                "Assistant: " . $row[1] . "\n" .
2280337f47fSAndreas Gohr                $chatHistory;
2290337f47fSAndreas Gohr        }
2300337f47fSAndreas Gohr
2310337f47fSAndreas Gohr        // ask openAI to rephrase the question
2320337f47fSAndreas Gohr        $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]);
2330337f47fSAndreas Gohr        $messages = [['role' => 'user', 'content' => $prompt]];
2349f6b34c4SAndreas Gohr        return $this->getModel()->getRephrasedQuestion($messages);
2350337f47fSAndreas Gohr    }
2360337f47fSAndreas Gohr
2370337f47fSAndreas Gohr    /**
2380337f47fSAndreas Gohr     * Load the given prompt template and fill in the variables
2390337f47fSAndreas Gohr     *
2400337f47fSAndreas Gohr     * @param string $type
2410337f47fSAndreas Gohr     * @param string[] $vars
2420337f47fSAndreas Gohr     * @return string
2430337f47fSAndreas Gohr     */
2440337f47fSAndreas Gohr    protected function getPrompt($type, $vars = [])
2450337f47fSAndreas Gohr    {
2460337f47fSAndreas Gohr        $template = file_get_contents($this->localFN('prompt_' . $type));
2470337f47fSAndreas Gohr
2487ebc7895Ssplitbrain        $replace = [];
2490337f47fSAndreas Gohr        foreach ($vars as $key => $val) {
2500337f47fSAndreas Gohr            $replace['{{' . strtoupper($key) . '}}'] = $val;
2510337f47fSAndreas Gohr        }
2520337f47fSAndreas Gohr
2530337f47fSAndreas Gohr        return strtr($template, $replace);
2540337f47fSAndreas Gohr    }
255219268b1SAndreas Gohr
256219268b1SAndreas Gohr    /**
257219268b1SAndreas Gohr     * Construct the prompt to define the answer language
258219268b1SAndreas Gohr     *
259219268b1SAndreas Gohr     * @return string
260219268b1SAndreas Gohr     */
261219268b1SAndreas Gohr    protected function getLanguagePrompt()
262219268b1SAndreas Gohr    {
263219268b1SAndreas Gohr        global $conf;
264219268b1SAndreas Gohr
265e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) {
266219268b1SAndreas Gohr            $isoLangnames = include(__DIR__ . '/lang/languages.php');
267219268b1SAndreas Gohr            if (isset($isoLangnames[$conf['lang']])) {
268219268b1SAndreas Gohr                $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.';
269219268b1SAndreas Gohr                return $languagePrompt;
270219268b1SAndreas Gohr            }
271219268b1SAndreas Gohr        }
272219268b1SAndreas Gohr
273219268b1SAndreas Gohr        $languagePrompt = 'Always answer in the user\'s language.';
274219268b1SAndreas Gohr        return $languagePrompt;
275219268b1SAndreas Gohr    }
276e33a1d7aSAndreas Gohr
277e33a1d7aSAndreas Gohr    /**
278e33a1d7aSAndreas Gohr     * Should sources be limited to current language?
279e33a1d7aSAndreas Gohr     *
280e33a1d7aSAndreas Gohr     * @return string The current language code or empty string
281e33a1d7aSAndreas Gohr     */
282e33a1d7aSAndreas Gohr    public function getLanguageLimit()
283e33a1d7aSAndreas Gohr    {
284e33a1d7aSAndreas Gohr        if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) {
285e33a1d7aSAndreas Gohr            global $conf;
286e33a1d7aSAndreas Gohr            return $conf['lang'];
287e33a1d7aSAndreas Gohr        } else {
288e33a1d7aSAndreas Gohr            return '';
289e33a1d7aSAndreas Gohr        }
290e33a1d7aSAndreas Gohr    }
2910337f47fSAndreas Gohr}
292