10337f47fSAndreas Gohr<?php 20337f47fSAndreas Gohr 33379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 45e6dd16eSAndreas Gohruse dokuwiki\Extension\Plugin; 5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat; 6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings; 8294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\ChatInterface; 9294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\EmbeddingInterface; 10c2b7a1f7SAndreas Gohruse dokuwiki\plugin\aichat\ModelFactory; 1101f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage; 120337f47fSAndreas Gohr 130337f47fSAndreas Gohr/** 140337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component) 150337f47fSAndreas Gohr * 160337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 170337f47fSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 180337f47fSAndreas Gohr */ 197ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin 200337f47fSAndreas Gohr{ 21c2b7a1f7SAndreas Gohr /** @var ModelFactory */ 22c2b7a1f7SAndreas Gohr public $factory; 23c2b7a1f7SAndreas Gohr 243379af09SAndreas Gohr /** @var CLIPlugin $logger */ 253379af09SAndreas Gohr protected $logger; 26c2b7a1f7SAndreas Gohr 270337f47fSAndreas Gohr /** @var Embeddings */ 280337f47fSAndreas Gohr protected $embeddings; 2901f06932SAndreas Gohr /** @var AbstractStorage */ 3001f06932SAndreas Gohr protected $storage; 310337f47fSAndreas Gohr 32e75dc39fSAndreas Gohr /** @var array where to store meta data on the last run */ 33e75dc39fSAndreas Gohr protected $runDataFile; 34e75dc39fSAndreas Gohr 3551aa8517SAndreas Gohr 360337f47fSAndreas Gohr /** 37f8d5ae01SAndreas Gohr * Constructor. Initializes vendor autoloader 38f8d5ae01SAndreas Gohr */ 39f8d5ae01SAndreas Gohr public function __construct() 40f8d5ae01SAndreas Gohr { 41e75dc39fSAndreas Gohr require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 42e75dc39fSAndreas Gohr global $conf; 43e75dc39fSAndreas Gohr $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 44d02b7935SAndreas Gohr $this->loadConfig(); 45c2b7a1f7SAndreas Gohr $this->factory = new ModelFactory($this->conf); 46f8d5ae01SAndreas Gohr } 47f8d5ae01SAndreas Gohr 48f8d5ae01SAndreas Gohr /** 493379af09SAndreas Gohr * Use the given CLI plugin for logging 503379af09SAndreas Gohr * 513379af09SAndreas Gohr * @param CLIPlugin $logger 523379af09SAndreas Gohr * @return void 533379af09SAndreas Gohr */ 548285fff9SAndreas Gohr public function setLogger($logger) 558285fff9SAndreas Gohr { 563379af09SAndreas Gohr $this->logger = $logger; 573379af09SAndreas Gohr } 583379af09SAndreas Gohr 593379af09SAndreas Gohr /** 60*0de7e020SAndreas Gohr * Update the configuration 61*0de7e020SAndreas Gohr * 62*0de7e020SAndreas Gohr * @param array $config 63*0de7e020SAndreas Gohr * @return void 64*0de7e020SAndreas Gohr */ 65*0de7e020SAndreas Gohr public function updateConfig(array $config) 66*0de7e020SAndreas Gohr { 67*0de7e020SAndreas Gohr $this->conf = array_merge($this->conf, $config); 68*0de7e020SAndreas Gohr $this->factory->updateConfig($config); 69*0de7e020SAndreas Gohr } 70*0de7e020SAndreas Gohr 71*0de7e020SAndreas Gohr /** 72c4127b8eSAndreas Gohr * Check if the current user is allowed to use the plugin (if it has been restricted) 73c4127b8eSAndreas Gohr * 74c4127b8eSAndreas Gohr * @return bool 75c4127b8eSAndreas Gohr */ 76c4127b8eSAndreas Gohr public function userMayAccess() 77c4127b8eSAndreas Gohr { 78c4127b8eSAndreas Gohr global $auth; 79c4127b8eSAndreas Gohr global $USERINFO; 80c4127b8eSAndreas Gohr global $INPUT; 81c4127b8eSAndreas Gohr 82c4127b8eSAndreas Gohr if (!$auth) return true; 83c4127b8eSAndreas Gohr if (!$this->getConf('restrict')) return true; 84c4127b8eSAndreas Gohr if (!isset($USERINFO)) return false; 85c4127b8eSAndreas Gohr 86c4127b8eSAndreas Gohr return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 87c4127b8eSAndreas Gohr } 88c4127b8eSAndreas Gohr 89c4127b8eSAndreas Gohr /** 906a18e0f4SAndreas Gohr * Access the Chat Model 910337f47fSAndreas Gohr * 92294a9eafSAndreas Gohr * @return ChatInterface 930337f47fSAndreas Gohr */ 946a18e0f4SAndreas Gohr public function getChatModel() 950337f47fSAndreas Gohr { 96c2b7a1f7SAndreas Gohr return $this->factory->getChatModel(); 979f6b34c4SAndreas Gohr } 989f6b34c4SAndreas Gohr 996a18e0f4SAndreas Gohr /** 10051aa8517SAndreas Gohr * @return ChatInterface 10151aa8517SAndreas Gohr */ 10251aa8517SAndreas Gohr public function getRephraseModel() 10351aa8517SAndreas Gohr { 104c2b7a1f7SAndreas Gohr return $this->factory->getRephraseModel(); 10551aa8517SAndreas Gohr } 10651aa8517SAndreas Gohr 10751aa8517SAndreas Gohr /** 1086a18e0f4SAndreas Gohr * Access the Embedding Model 1096a18e0f4SAndreas Gohr * 110294a9eafSAndreas Gohr * @return EmbeddingInterface 1116a18e0f4SAndreas Gohr */ 112c2b7a1f7SAndreas Gohr public function getEmbeddingModel() 1136a18e0f4SAndreas Gohr { 114c2b7a1f7SAndreas Gohr return $this->factory->getEmbeddingModel(); 1150337f47fSAndreas Gohr } 1160337f47fSAndreas Gohr 1170337f47fSAndreas Gohr /** 1180337f47fSAndreas Gohr * Access the Embeddings interface 1190337f47fSAndreas Gohr * 1200337f47fSAndreas Gohr * @return Embeddings 1210337f47fSAndreas Gohr */ 1220337f47fSAndreas Gohr public function getEmbeddings() 1230337f47fSAndreas Gohr { 1246a18e0f4SAndreas Gohr if ($this->embeddings instanceof Embeddings) { 1256a18e0f4SAndreas Gohr return $this->embeddings; 1266a18e0f4SAndreas Gohr } 1276a18e0f4SAndreas Gohr 12834a1c478SAndreas Gohr $this->embeddings = new Embeddings( 12934a1c478SAndreas Gohr $this->getChatModel(), 130c2b7a1f7SAndreas Gohr $this->getEmbeddingModel(), 13134a1c478SAndreas Gohr $this->getStorage(), 13234a1c478SAndreas Gohr $this->conf 13334a1c478SAndreas Gohr ); 1343379af09SAndreas Gohr if ($this->logger) { 1353379af09SAndreas Gohr $this->embeddings->setLogger($this->logger); 1363379af09SAndreas Gohr } 1379f6b34c4SAndreas Gohr 1380337f47fSAndreas Gohr return $this->embeddings; 1390337f47fSAndreas Gohr } 1400337f47fSAndreas Gohr 1410337f47fSAndreas Gohr /** 14201f06932SAndreas Gohr * Access the Storage interface 14301f06932SAndreas Gohr * 14401f06932SAndreas Gohr * @return AbstractStorage 14501f06932SAndreas Gohr */ 14601f06932SAndreas Gohr public function getStorage() 14701f06932SAndreas Gohr { 1486a18e0f4SAndreas Gohr if ($this->storage instanceof AbstractStorage) { 1496a18e0f4SAndreas Gohr return $this->storage; 1506a18e0f4SAndreas Gohr } 1516a18e0f4SAndreas Gohr 15204afb84fSAndreas Gohr $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage'; 15304afb84fSAndreas Gohr $this->storage = new $class($this->conf); 1548285fff9SAndreas Gohr 1553379af09SAndreas Gohr if ($this->logger) { 1563379af09SAndreas Gohr $this->storage->setLogger($this->logger); 1573379af09SAndreas Gohr } 15801f06932SAndreas Gohr 15901f06932SAndreas Gohr return $this->storage; 16001f06932SAndreas Gohr } 16101f06932SAndreas Gohr 16201f06932SAndreas Gohr /** 1630337f47fSAndreas Gohr * Ask a question with a chat history 1640337f47fSAndreas Gohr * 1650337f47fSAndreas Gohr * @param string $question 1660337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 1670337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1680337f47fSAndreas Gohr * @throws Exception 1690337f47fSAndreas Gohr */ 1700337f47fSAndreas Gohr public function askChatQuestion($question, $history = []) 1710337f47fSAndreas Gohr { 17251aa8517SAndreas Gohr if ($history && $this->getConf('rephraseHistory') > 0) { 1730337f47fSAndreas Gohr $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 1740337f47fSAndreas Gohr } else { 1750337f47fSAndreas Gohr $standaloneQuestion = $question; 1760337f47fSAndreas Gohr } 17734a1c478SAndreas Gohr return $this->askQuestion($standaloneQuestion, $history); 1780337f47fSAndreas Gohr } 1790337f47fSAndreas Gohr 1800337f47fSAndreas Gohr /** 1810337f47fSAndreas Gohr * Ask a single standalone question 1820337f47fSAndreas Gohr * 1830337f47fSAndreas Gohr * @param string $question 18434a1c478SAndreas Gohr * @param array $history [user, ai] of the previous question 1850337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1860337f47fSAndreas Gohr * @throws Exception 1870337f47fSAndreas Gohr */ 18834a1c478SAndreas Gohr public function askQuestion($question, $history = []) 1890337f47fSAndreas Gohr { 190e33a1d7aSAndreas Gohr $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 1919e81bea7SAndreas Gohr if ($similar) { 192441edf84SAndreas Gohr $context = implode( 193441edf84SAndreas Gohr "\n", 194441edf84SAndreas Gohr array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 195441edf84SAndreas Gohr ); 196219268b1SAndreas Gohr $prompt = $this->getPrompt('question', [ 197219268b1SAndreas Gohr 'context' => $context, 19859a2a267SAndreas Gohr 'question' => $question, 199219268b1SAndreas Gohr ]); 2009e81bea7SAndreas Gohr } else { 20159a2a267SAndreas Gohr $prompt = $this->getPrompt('noanswer', [ 20259a2a267SAndreas Gohr 'question' => $question, 20359a2a267SAndreas Gohr ]); 20434a1c478SAndreas Gohr $history = []; 2059e81bea7SAndreas Gohr } 20668908844SAndreas Gohr 20751aa8517SAndreas Gohr $messages = $this->prepareMessages( 2082071dcedSAndreas Gohr $this->getChatModel(), 2092071dcedSAndreas Gohr $prompt, 2102071dcedSAndreas Gohr $history, 2112071dcedSAndreas Gohr $this->getConf('chatHistory') 21251aa8517SAndreas Gohr ); 2136a18e0f4SAndreas Gohr $answer = $this->getChatModel()->getAnswer($messages); 2140337f47fSAndreas Gohr 2150337f47fSAndreas Gohr return [ 2160337f47fSAndreas Gohr 'question' => $question, 2170337f47fSAndreas Gohr 'answer' => $answer, 2180337f47fSAndreas Gohr 'sources' => $similar, 2190337f47fSAndreas Gohr ]; 2200337f47fSAndreas Gohr } 2210337f47fSAndreas Gohr 2220337f47fSAndreas Gohr /** 2230337f47fSAndreas Gohr * Rephrase a question into a standalone question based on the chat history 2240337f47fSAndreas Gohr * 2250337f47fSAndreas Gohr * @param string $question The original user question 2260337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 2270337f47fSAndreas Gohr * @return string The rephrased question 2280337f47fSAndreas Gohr * @throws Exception 2290337f47fSAndreas Gohr */ 2300337f47fSAndreas Gohr public function rephraseChatQuestion($question, $history) 2310337f47fSAndreas Gohr { 23259a2a267SAndreas Gohr $prompt = $this->getPrompt('rephrase', [ 23359a2a267SAndreas Gohr 'question' => $question, 23459a2a267SAndreas Gohr ]); 23551aa8517SAndreas Gohr $messages = $this->prepareMessages( 2362071dcedSAndreas Gohr $this->getRephraseModel(), 2372071dcedSAndreas Gohr $prompt, 2382071dcedSAndreas Gohr $history, 2392071dcedSAndreas Gohr $this->getConf('rephraseHistory') 24051aa8517SAndreas Gohr ); 24151aa8517SAndreas Gohr return $this->getRephraseModel()->getAnswer($messages); 24234a1c478SAndreas Gohr } 24334a1c478SAndreas Gohr 24434a1c478SAndreas Gohr /** 24534a1c478SAndreas Gohr * Prepare the messages for the AI 24634a1c478SAndreas Gohr * 24751aa8517SAndreas Gohr * @param ChatInterface $model The used model 24859a2a267SAndreas Gohr * @param string $promptedQuestion The user question embedded in a prompt 24934a1c478SAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 25051aa8517SAndreas Gohr * @param int $historySize The maximum number of messages to use from the history 25134a1c478SAndreas Gohr * @return array An OpenAI compatible array of messages 25234a1c478SAndreas Gohr */ 25351aa8517SAndreas Gohr protected function prepareMessages( 2542071dcedSAndreas Gohr ChatInterface $model, 2552071dcedSAndreas Gohr string $promptedQuestion, 2562071dcedSAndreas Gohr array $history, 2572071dcedSAndreas Gohr int $historySize 258*0de7e020SAndreas Gohr ): array 259*0de7e020SAndreas Gohr { 26034a1c478SAndreas Gohr // calculate the space for context 26151aa8517SAndreas Gohr $remainingContext = $model->getMaxInputTokenLength(); 26259a2a267SAndreas Gohr $remainingContext -= $this->countTokens($promptedQuestion); 26334a1c478SAndreas Gohr $safetyMargin = $remainingContext * 0.05; // 5% safety margin 26434a1c478SAndreas Gohr $remainingContext -= $safetyMargin; 26534a1c478SAndreas Gohr // FIXME we may want to also have an upper limit for the history and not always use the full context 26634a1c478SAndreas Gohr 26751aa8517SAndreas Gohr $messages = $this->historyMessages($history, $remainingContext, $historySize); 26834a1c478SAndreas Gohr $messages[] = [ 26934a1c478SAndreas Gohr 'role' => 'user', 27059a2a267SAndreas Gohr 'content' => $promptedQuestion 27134a1c478SAndreas Gohr ]; 27234a1c478SAndreas Gohr return $messages; 27334a1c478SAndreas Gohr } 27434a1c478SAndreas Gohr 27534a1c478SAndreas Gohr /** 27634a1c478SAndreas Gohr * Create an array of OpenAI compatible messages from the given history 27734a1c478SAndreas Gohr * 27834a1c478SAndreas Gohr * Only as many messages are used as fit into the token limit 27934a1c478SAndreas Gohr * 28034a1c478SAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 28151aa8517SAndreas Gohr * @param int $tokenLimit The maximum number of tokens to use 28251aa8517SAndreas Gohr * @param int $sizeLimit The maximum number of messages to use 28334a1c478SAndreas Gohr * @return array 28434a1c478SAndreas Gohr */ 28551aa8517SAndreas Gohr protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array 28634a1c478SAndreas Gohr { 28734a1c478SAndreas Gohr $remainingContext = $tokenLimit; 28834a1c478SAndreas Gohr 28934a1c478SAndreas Gohr $messages = []; 2900337f47fSAndreas Gohr $history = array_reverse($history); 29151aa8517SAndreas Gohr $history = array_slice($history, 0, $sizeLimit); 2920337f47fSAndreas Gohr foreach ($history as $row) { 29334a1c478SAndreas Gohr $length = $this->countTokens($row[0] . $row[1]); 29434a1c478SAndreas Gohr if ($length > $remainingContext) { 2950337f47fSAndreas Gohr break; 2960337f47fSAndreas Gohr } 29734a1c478SAndreas Gohr $remainingContext -= $length; 2980337f47fSAndreas Gohr 29934a1c478SAndreas Gohr $messages[] = [ 30034a1c478SAndreas Gohr 'role' => 'assistant', 30134a1c478SAndreas Gohr 'content' => $row[1] 30234a1c478SAndreas Gohr ]; 30334a1c478SAndreas Gohr $messages[] = [ 30434a1c478SAndreas Gohr 'role' => 'user', 30534a1c478SAndreas Gohr 'content' => $row[0] 30634a1c478SAndreas Gohr ]; 30734a1c478SAndreas Gohr } 30834a1c478SAndreas Gohr return array_reverse($messages); 3090337f47fSAndreas Gohr } 3100337f47fSAndreas Gohr 31134a1c478SAndreas Gohr /** 31234a1c478SAndreas Gohr * Get an aproximation of the token count for the given text 31334a1c478SAndreas Gohr * 31434a1c478SAndreas Gohr * @param $text 31534a1c478SAndreas Gohr * @return int 31634a1c478SAndreas Gohr */ 31734a1c478SAndreas Gohr protected function countTokens($text) 31834a1c478SAndreas Gohr { 31934a1c478SAndreas Gohr return count($this->getEmbeddings()->getTokenEncoder()->encode($text)); 3200337f47fSAndreas Gohr } 3210337f47fSAndreas Gohr 3220337f47fSAndreas Gohr /** 3230337f47fSAndreas Gohr * Load the given prompt template and fill in the variables 3240337f47fSAndreas Gohr * 3250337f47fSAndreas Gohr * @param string $type 3260337f47fSAndreas Gohr * @param string[] $vars 3270337f47fSAndreas Gohr * @return string 3280337f47fSAndreas Gohr */ 3290337f47fSAndreas Gohr protected function getPrompt($type, $vars = []) 3300337f47fSAndreas Gohr { 33159a2a267SAndreas Gohr $template = file_get_contents($this->localFN($type, 'prompt')); 33234a1c478SAndreas Gohr $vars['language'] = $this->getLanguagePrompt(); 3330337f47fSAndreas Gohr 3347ebc7895Ssplitbrain $replace = []; 3350337f47fSAndreas Gohr foreach ($vars as $key => $val) { 3360337f47fSAndreas Gohr $replace['{{' . strtoupper($key) . '}}'] = $val; 3370337f47fSAndreas Gohr } 3380337f47fSAndreas Gohr 3390337f47fSAndreas Gohr return strtr($template, $replace); 3400337f47fSAndreas Gohr } 341219268b1SAndreas Gohr 342219268b1SAndreas Gohr /** 343219268b1SAndreas Gohr * Construct the prompt to define the answer language 344219268b1SAndreas Gohr * 345219268b1SAndreas Gohr * @return string 346219268b1SAndreas Gohr */ 347219268b1SAndreas Gohr protected function getLanguagePrompt() 348219268b1SAndreas Gohr { 349219268b1SAndreas Gohr global $conf; 350cfaf6b32SAndreas Gohr $isoLangnames = include(__DIR__ . '/lang/languages.php'); 351cfaf6b32SAndreas Gohr 352cfaf6b32SAndreas Gohr $currentLang = $isoLangnames[$conf['lang']] ?? 'English'; 353219268b1SAndreas Gohr 354e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 355219268b1SAndreas Gohr if (isset($isoLangnames[$conf['lang']])) { 356219268b1SAndreas Gohr $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 357219268b1SAndreas Gohr return $languagePrompt; 358219268b1SAndreas Gohr } 359219268b1SAndreas Gohr } 360219268b1SAndreas Gohr 361cfaf6b32SAndreas Gohr $languagePrompt = 'Always answer in the user\'s language. ' . 362cfaf6b32SAndreas Gohr "If you are unsure about the language, speak $currentLang."; 363219268b1SAndreas Gohr return $languagePrompt; 364219268b1SAndreas Gohr } 365e33a1d7aSAndreas Gohr 366e33a1d7aSAndreas Gohr /** 367e33a1d7aSAndreas Gohr * Should sources be limited to current language? 368e33a1d7aSAndreas Gohr * 369e33a1d7aSAndreas Gohr * @return string The current language code or empty string 370e33a1d7aSAndreas Gohr */ 371e33a1d7aSAndreas Gohr public function getLanguageLimit() 372e33a1d7aSAndreas Gohr { 373e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 374e33a1d7aSAndreas Gohr global $conf; 375e33a1d7aSAndreas Gohr return $conf['lang']; 376e33a1d7aSAndreas Gohr } else { 377e33a1d7aSAndreas Gohr return ''; 378e33a1d7aSAndreas Gohr } 379e33a1d7aSAndreas Gohr } 380e75dc39fSAndreas Gohr 381e75dc39fSAndreas Gohr /** 382e75dc39fSAndreas Gohr * Store info about the last run 383e75dc39fSAndreas Gohr * 384e75dc39fSAndreas Gohr * @param array $data 385e75dc39fSAndreas Gohr * @return void 386e75dc39fSAndreas Gohr */ 387e75dc39fSAndreas Gohr public function setRunData(array $data) 388e75dc39fSAndreas Gohr { 389e75dc39fSAndreas Gohr file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 390e75dc39fSAndreas Gohr } 391e75dc39fSAndreas Gohr 392e75dc39fSAndreas Gohr /** 393e75dc39fSAndreas Gohr * Get info about the last run 394e75dc39fSAndreas Gohr * 395e75dc39fSAndreas Gohr * @return array 396e75dc39fSAndreas Gohr */ 397e75dc39fSAndreas Gohr public function getRunData() 398e75dc39fSAndreas Gohr { 399e75dc39fSAndreas Gohr if (!file_exists($this->runDataFile)) { 400e75dc39fSAndreas Gohr return []; 401e75dc39fSAndreas Gohr } 402e75dc39fSAndreas Gohr return json_decode(file_get_contents($this->runDataFile), true); 403e75dc39fSAndreas Gohr } 4040337f47fSAndreas Gohr} 405