10337f47fSAndreas Gohr<?php 20337f47fSAndreas Gohr 33379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 45e6dd16eSAndreas Gohruse dokuwiki\Extension\Plugin; 5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat; 6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings; 8294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\ChatInterface; 9294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\EmbeddingInterface; 10*c2b7a1f7SAndreas Gohruse dokuwiki\plugin\aichat\ModelFactory; 1101f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage; 120337f47fSAndreas Gohr 130337f47fSAndreas Gohr/** 140337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component) 150337f47fSAndreas Gohr * 160337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 170337f47fSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 180337f47fSAndreas Gohr */ 197ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin 200337f47fSAndreas Gohr{ 21*c2b7a1f7SAndreas Gohr /** @var ModelFactory */ 22*c2b7a1f7SAndreas Gohr public $factory; 23*c2b7a1f7SAndreas Gohr 243379af09SAndreas Gohr /** @var CLIPlugin $logger */ 253379af09SAndreas Gohr protected $logger; 26*c2b7a1f7SAndreas Gohr 270337f47fSAndreas Gohr /** @var Embeddings */ 280337f47fSAndreas Gohr protected $embeddings; 2901f06932SAndreas Gohr /** @var AbstractStorage */ 3001f06932SAndreas Gohr protected $storage; 310337f47fSAndreas Gohr 32e75dc39fSAndreas Gohr /** @var array where to store meta data on the last run */ 33e75dc39fSAndreas Gohr protected $runDataFile; 34e75dc39fSAndreas Gohr 3551aa8517SAndreas Gohr 360337f47fSAndreas Gohr /** 37f8d5ae01SAndreas Gohr * Constructor. Initializes vendor autoloader 38f8d5ae01SAndreas Gohr */ 39f8d5ae01SAndreas Gohr public function __construct() 40f8d5ae01SAndreas Gohr { 41e75dc39fSAndreas Gohr require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 42e75dc39fSAndreas Gohr global $conf; 43e75dc39fSAndreas Gohr $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 44d02b7935SAndreas Gohr $this->loadConfig(); 45*c2b7a1f7SAndreas Gohr $this->factory = new ModelFactory($this->conf); 46f8d5ae01SAndreas Gohr } 47f8d5ae01SAndreas Gohr 48f8d5ae01SAndreas Gohr /** 493379af09SAndreas Gohr * Use the given CLI plugin for logging 503379af09SAndreas Gohr * 513379af09SAndreas Gohr * @param CLIPlugin $logger 523379af09SAndreas Gohr * @return void 533379af09SAndreas Gohr */ 548285fff9SAndreas Gohr public function setLogger($logger) 558285fff9SAndreas Gohr { 563379af09SAndreas Gohr $this->logger = $logger; 573379af09SAndreas Gohr } 583379af09SAndreas Gohr 593379af09SAndreas Gohr /** 60c4127b8eSAndreas Gohr * Check if the current user is allowed to use the plugin (if it has been restricted) 61c4127b8eSAndreas Gohr * 62c4127b8eSAndreas Gohr * @return bool 63c4127b8eSAndreas Gohr */ 64c4127b8eSAndreas Gohr public function userMayAccess() 65c4127b8eSAndreas Gohr { 66c4127b8eSAndreas Gohr global $auth; 67c4127b8eSAndreas Gohr global $USERINFO; 68c4127b8eSAndreas Gohr global $INPUT; 69c4127b8eSAndreas Gohr 70c4127b8eSAndreas Gohr if (!$auth) return true; 71c4127b8eSAndreas Gohr if (!$this->getConf('restrict')) return true; 72c4127b8eSAndreas Gohr if (!isset($USERINFO)) return false; 73c4127b8eSAndreas Gohr 74c4127b8eSAndreas Gohr return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 75c4127b8eSAndreas Gohr } 76c4127b8eSAndreas Gohr 77c4127b8eSAndreas Gohr /** 786a18e0f4SAndreas Gohr * Access the Chat Model 790337f47fSAndreas Gohr * 80294a9eafSAndreas Gohr * @return ChatInterface 810337f47fSAndreas Gohr */ 826a18e0f4SAndreas Gohr public function getChatModel() 830337f47fSAndreas Gohr { 84*c2b7a1f7SAndreas Gohr return $this->factory->getChatModel(); 859f6b34c4SAndreas Gohr } 869f6b34c4SAndreas Gohr 876a18e0f4SAndreas Gohr /** 8851aa8517SAndreas Gohr * @return ChatInterface 8951aa8517SAndreas Gohr */ 9051aa8517SAndreas Gohr public function getRephraseModel() 9151aa8517SAndreas Gohr { 92*c2b7a1f7SAndreas Gohr return $this->factory->getRephraseModel(); 9351aa8517SAndreas Gohr } 9451aa8517SAndreas Gohr 9551aa8517SAndreas Gohr /** 966a18e0f4SAndreas Gohr * Access the Embedding Model 976a18e0f4SAndreas Gohr * 98294a9eafSAndreas Gohr * @return EmbeddingInterface 996a18e0f4SAndreas Gohr */ 100*c2b7a1f7SAndreas Gohr public function getEmbeddingModel() 1016a18e0f4SAndreas Gohr { 102*c2b7a1f7SAndreas Gohr return $this->factory->getEmbeddingModel(); 1030337f47fSAndreas Gohr } 1040337f47fSAndreas Gohr 1050337f47fSAndreas Gohr /** 1060337f47fSAndreas Gohr * Access the Embeddings interface 1070337f47fSAndreas Gohr * 1080337f47fSAndreas Gohr * @return Embeddings 1090337f47fSAndreas Gohr */ 1100337f47fSAndreas Gohr public function getEmbeddings() 1110337f47fSAndreas Gohr { 1126a18e0f4SAndreas Gohr if ($this->embeddings instanceof Embeddings) { 1136a18e0f4SAndreas Gohr return $this->embeddings; 1146a18e0f4SAndreas Gohr } 1156a18e0f4SAndreas Gohr 11634a1c478SAndreas Gohr $this->embeddings = new Embeddings( 11734a1c478SAndreas Gohr $this->getChatModel(), 118*c2b7a1f7SAndreas Gohr $this->getEmbeddingModel(), 11934a1c478SAndreas Gohr $this->getStorage(), 12034a1c478SAndreas Gohr $this->conf 12134a1c478SAndreas Gohr ); 1223379af09SAndreas Gohr if ($this->logger) { 1233379af09SAndreas Gohr $this->embeddings->setLogger($this->logger); 1243379af09SAndreas Gohr } 1259f6b34c4SAndreas Gohr 1260337f47fSAndreas Gohr return $this->embeddings; 1270337f47fSAndreas Gohr } 1280337f47fSAndreas Gohr 1290337f47fSAndreas Gohr /** 13001f06932SAndreas Gohr * Access the Storage interface 13101f06932SAndreas Gohr * 13201f06932SAndreas Gohr * @return AbstractStorage 13301f06932SAndreas Gohr */ 13401f06932SAndreas Gohr public function getStorage() 13501f06932SAndreas Gohr { 1366a18e0f4SAndreas Gohr if ($this->storage instanceof AbstractStorage) { 1376a18e0f4SAndreas Gohr return $this->storage; 1386a18e0f4SAndreas Gohr } 1396a18e0f4SAndreas Gohr 14004afb84fSAndreas Gohr $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage'; 14104afb84fSAndreas Gohr $this->storage = new $class($this->conf); 1428285fff9SAndreas Gohr 1433379af09SAndreas Gohr if ($this->logger) { 1443379af09SAndreas Gohr $this->storage->setLogger($this->logger); 1453379af09SAndreas Gohr } 14601f06932SAndreas Gohr 14701f06932SAndreas Gohr return $this->storage; 14801f06932SAndreas Gohr } 14901f06932SAndreas Gohr 15001f06932SAndreas Gohr /** 1510337f47fSAndreas Gohr * Ask a question with a chat history 1520337f47fSAndreas Gohr * 1530337f47fSAndreas Gohr * @param string $question 1540337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 1550337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1560337f47fSAndreas Gohr * @throws Exception 1570337f47fSAndreas Gohr */ 1580337f47fSAndreas Gohr public function askChatQuestion($question, $history = []) 1590337f47fSAndreas Gohr { 16051aa8517SAndreas Gohr if ($history && $this->getConf('rephraseHistory') > 0) { 1610337f47fSAndreas Gohr $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 1620337f47fSAndreas Gohr } else { 1630337f47fSAndreas Gohr $standaloneQuestion = $question; 1640337f47fSAndreas Gohr } 16534a1c478SAndreas Gohr return $this->askQuestion($standaloneQuestion, $history); 1660337f47fSAndreas Gohr } 1670337f47fSAndreas Gohr 1680337f47fSAndreas Gohr /** 1690337f47fSAndreas Gohr * Ask a single standalone question 1700337f47fSAndreas Gohr * 1710337f47fSAndreas Gohr * @param string $question 17234a1c478SAndreas Gohr * @param array $history [user, ai] of the previous question 1730337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1740337f47fSAndreas Gohr * @throws Exception 1750337f47fSAndreas Gohr */ 17634a1c478SAndreas Gohr public function askQuestion($question, $history = []) 1770337f47fSAndreas Gohr { 178e33a1d7aSAndreas Gohr $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 1799e81bea7SAndreas Gohr if ($similar) { 180441edf84SAndreas Gohr $context = implode( 181441edf84SAndreas Gohr "\n", 182441edf84SAndreas Gohr array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 183441edf84SAndreas Gohr ); 184219268b1SAndreas Gohr $prompt = $this->getPrompt('question', [ 185219268b1SAndreas Gohr 'context' => $context, 186219268b1SAndreas Gohr ]); 1879e81bea7SAndreas Gohr } else { 18834a1c478SAndreas Gohr $prompt = $this->getPrompt('noanswer'); 18934a1c478SAndreas Gohr $history = []; 1909e81bea7SAndreas Gohr } 19168908844SAndreas Gohr 19251aa8517SAndreas Gohr $messages = $this->prepareMessages( 19351aa8517SAndreas Gohr $this->getChatModel(), $prompt, $question, $history, $this->getConf('chatHistory') 19451aa8517SAndreas Gohr ); 1956a18e0f4SAndreas Gohr $answer = $this->getChatModel()->getAnswer($messages); 1960337f47fSAndreas Gohr 1970337f47fSAndreas Gohr return [ 1980337f47fSAndreas Gohr 'question' => $question, 1990337f47fSAndreas Gohr 'answer' => $answer, 2000337f47fSAndreas Gohr 'sources' => $similar, 2010337f47fSAndreas Gohr ]; 2020337f47fSAndreas Gohr } 2030337f47fSAndreas Gohr 2040337f47fSAndreas Gohr /** 2050337f47fSAndreas Gohr * Rephrase a question into a standalone question based on the chat history 2060337f47fSAndreas Gohr * 2070337f47fSAndreas Gohr * @param string $question The original user question 2080337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 2090337f47fSAndreas Gohr * @return string The rephrased question 2100337f47fSAndreas Gohr * @throws Exception 2110337f47fSAndreas Gohr */ 2120337f47fSAndreas Gohr public function rephraseChatQuestion($question, $history) 2130337f47fSAndreas Gohr { 21434a1c478SAndreas Gohr $prompt = $this->getPrompt('rephrase'); 21551aa8517SAndreas Gohr $messages = $this->prepareMessages( 21651aa8517SAndreas Gohr $this->getRephraseModel(), $prompt, $question, $history, $this->getConf('rephraseHistory') 21751aa8517SAndreas Gohr ); 21851aa8517SAndreas Gohr return $this->getRephraseModel()->getAnswer($messages); 21934a1c478SAndreas Gohr } 22034a1c478SAndreas Gohr 22134a1c478SAndreas Gohr /** 22234a1c478SAndreas Gohr * Prepare the messages for the AI 22334a1c478SAndreas Gohr * 22451aa8517SAndreas Gohr * @param ChatInterface $model The used model 22534a1c478SAndreas Gohr * @param string $prompt The fully prepared system prompt 22634a1c478SAndreas Gohr * @param string $question The user question 22734a1c478SAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 22851aa8517SAndreas Gohr * @param int $historySize The maximum number of messages to use from the history 22934a1c478SAndreas Gohr * @return array An OpenAI compatible array of messages 23034a1c478SAndreas Gohr */ 23151aa8517SAndreas Gohr protected function prepareMessages( 23251aa8517SAndreas Gohr ChatInterface $model, string $prompt, string $question, array $history, int $historySize 23351aa8517SAndreas Gohr ): array 23434a1c478SAndreas Gohr { 23534a1c478SAndreas Gohr // calculate the space for context 23651aa8517SAndreas Gohr $remainingContext = $model->getMaxInputTokenLength(); 23734a1c478SAndreas Gohr $remainingContext -= $this->countTokens($prompt); 23834a1c478SAndreas Gohr $remainingContext -= $this->countTokens($question); 23934a1c478SAndreas Gohr $safetyMargin = $remainingContext * 0.05; // 5% safety margin 24034a1c478SAndreas Gohr $remainingContext -= $safetyMargin; 24134a1c478SAndreas Gohr // FIXME we may want to also have an upper limit for the history and not always use the full context 24234a1c478SAndreas Gohr 24351aa8517SAndreas Gohr $messages = $this->historyMessages($history, $remainingContext, $historySize); 24434a1c478SAndreas Gohr $messages[] = [ 24534a1c478SAndreas Gohr 'role' => 'system', 24634a1c478SAndreas Gohr 'content' => $prompt 24734a1c478SAndreas Gohr ]; 24834a1c478SAndreas Gohr $messages[] = [ 24934a1c478SAndreas Gohr 'role' => 'user', 25034a1c478SAndreas Gohr 'content' => $question 25134a1c478SAndreas Gohr ]; 25234a1c478SAndreas Gohr return $messages; 25334a1c478SAndreas Gohr } 25434a1c478SAndreas Gohr 25534a1c478SAndreas Gohr /** 25634a1c478SAndreas Gohr * Create an array of OpenAI compatible messages from the given history 25734a1c478SAndreas Gohr * 25834a1c478SAndreas Gohr * Only as many messages are used as fit into the token limit 25934a1c478SAndreas Gohr * 26034a1c478SAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 26151aa8517SAndreas Gohr * @param int $tokenLimit The maximum number of tokens to use 26251aa8517SAndreas Gohr * @param int $sizeLimit The maximum number of messages to use 26334a1c478SAndreas Gohr * @return array 26434a1c478SAndreas Gohr */ 26551aa8517SAndreas Gohr protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array 26634a1c478SAndreas Gohr { 26734a1c478SAndreas Gohr $remainingContext = $tokenLimit; 26834a1c478SAndreas Gohr 26934a1c478SAndreas Gohr $messages = []; 2700337f47fSAndreas Gohr $history = array_reverse($history); 27151aa8517SAndreas Gohr $history = array_slice($history, 0, $sizeLimit); 2720337f47fSAndreas Gohr foreach ($history as $row) { 27334a1c478SAndreas Gohr $length = $this->countTokens($row[0] . $row[1]); 27434a1c478SAndreas Gohr if ($length > $remainingContext) { 2750337f47fSAndreas Gohr break; 2760337f47fSAndreas Gohr } 27734a1c478SAndreas Gohr $remainingContext -= $length; 2780337f47fSAndreas Gohr 27934a1c478SAndreas Gohr $messages[] = [ 28034a1c478SAndreas Gohr 'role' => 'assistant', 28134a1c478SAndreas Gohr 'content' => $row[1] 28234a1c478SAndreas Gohr ]; 28334a1c478SAndreas Gohr $messages[] = [ 28434a1c478SAndreas Gohr 'role' => 'user', 28534a1c478SAndreas Gohr 'content' => $row[0] 28634a1c478SAndreas Gohr ]; 28734a1c478SAndreas Gohr } 28834a1c478SAndreas Gohr return array_reverse($messages); 2890337f47fSAndreas Gohr } 2900337f47fSAndreas Gohr 29134a1c478SAndreas Gohr /** 29234a1c478SAndreas Gohr * Get an aproximation of the token count for the given text 29334a1c478SAndreas Gohr * 29434a1c478SAndreas Gohr * @param $text 29534a1c478SAndreas Gohr * @return int 29634a1c478SAndreas Gohr */ 29734a1c478SAndreas Gohr protected function countTokens($text) 29834a1c478SAndreas Gohr { 29934a1c478SAndreas Gohr return count($this->getEmbeddings()->getTokenEncoder()->encode($text)); 3000337f47fSAndreas Gohr } 3010337f47fSAndreas Gohr 3020337f47fSAndreas Gohr /** 3030337f47fSAndreas Gohr * Load the given prompt template and fill in the variables 3040337f47fSAndreas Gohr * 3050337f47fSAndreas Gohr * @param string $type 3060337f47fSAndreas Gohr * @param string[] $vars 3070337f47fSAndreas Gohr * @return string 3080337f47fSAndreas Gohr */ 3090337f47fSAndreas Gohr protected function getPrompt($type, $vars = []) 3100337f47fSAndreas Gohr { 3110337f47fSAndreas Gohr $template = file_get_contents($this->localFN('prompt_' . $type)); 31234a1c478SAndreas Gohr $vars['language'] = $this->getLanguagePrompt(); 3130337f47fSAndreas Gohr 3147ebc7895Ssplitbrain $replace = []; 3150337f47fSAndreas Gohr foreach ($vars as $key => $val) { 3160337f47fSAndreas Gohr $replace['{{' . strtoupper($key) . '}}'] = $val; 3170337f47fSAndreas Gohr } 3180337f47fSAndreas Gohr 3190337f47fSAndreas Gohr return strtr($template, $replace); 3200337f47fSAndreas Gohr } 321219268b1SAndreas Gohr 322219268b1SAndreas Gohr /** 323219268b1SAndreas Gohr * Construct the prompt to define the answer language 324219268b1SAndreas Gohr * 325219268b1SAndreas Gohr * @return string 326219268b1SAndreas Gohr */ 327219268b1SAndreas Gohr protected function getLanguagePrompt() 328219268b1SAndreas Gohr { 329219268b1SAndreas Gohr global $conf; 330cfaf6b32SAndreas Gohr $isoLangnames = include(__DIR__ . '/lang/languages.php'); 331cfaf6b32SAndreas Gohr 332cfaf6b32SAndreas Gohr $currentLang = $isoLangnames[$conf['lang']] ?? 'English'; 333219268b1SAndreas Gohr 334e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 335219268b1SAndreas Gohr if (isset($isoLangnames[$conf['lang']])) { 336219268b1SAndreas Gohr $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 337219268b1SAndreas Gohr return $languagePrompt; 338219268b1SAndreas Gohr } 339219268b1SAndreas Gohr } 340219268b1SAndreas Gohr 341cfaf6b32SAndreas Gohr $languagePrompt = 'Always answer in the user\'s language. ' . 342cfaf6b32SAndreas Gohr "If you are unsure about the language, speak $currentLang."; 343219268b1SAndreas Gohr return $languagePrompt; 344219268b1SAndreas Gohr } 345e33a1d7aSAndreas Gohr 346e33a1d7aSAndreas Gohr /** 347e33a1d7aSAndreas Gohr * Should sources be limited to current language? 348e33a1d7aSAndreas Gohr * 349e33a1d7aSAndreas Gohr * @return string The current language code or empty string 350e33a1d7aSAndreas Gohr */ 351e33a1d7aSAndreas Gohr public function getLanguageLimit() 352e33a1d7aSAndreas Gohr { 353e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 354e33a1d7aSAndreas Gohr global $conf; 355e33a1d7aSAndreas Gohr return $conf['lang']; 356e33a1d7aSAndreas Gohr } else { 357e33a1d7aSAndreas Gohr return ''; 358e33a1d7aSAndreas Gohr } 359e33a1d7aSAndreas Gohr } 360e75dc39fSAndreas Gohr 361e75dc39fSAndreas Gohr /** 362e75dc39fSAndreas Gohr * Store info about the last run 363e75dc39fSAndreas Gohr * 364e75dc39fSAndreas Gohr * @param array $data 365e75dc39fSAndreas Gohr * @return void 366e75dc39fSAndreas Gohr */ 367e75dc39fSAndreas Gohr public function setRunData(array $data) 368e75dc39fSAndreas Gohr { 369e75dc39fSAndreas Gohr file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 370e75dc39fSAndreas Gohr } 371e75dc39fSAndreas Gohr 372e75dc39fSAndreas Gohr /** 373e75dc39fSAndreas Gohr * Get info about the last run 374e75dc39fSAndreas Gohr * 375e75dc39fSAndreas Gohr * @return array 376e75dc39fSAndreas Gohr */ 377e75dc39fSAndreas Gohr public function getRunData() 378e75dc39fSAndreas Gohr { 379e75dc39fSAndreas Gohr if (!file_exists($this->runDataFile)) { 380e75dc39fSAndreas Gohr return []; 381e75dc39fSAndreas Gohr } 382e75dc39fSAndreas Gohr return json_decode(file_get_contents($this->runDataFile), true); 383e75dc39fSAndreas Gohr } 3840337f47fSAndreas Gohr} 385