10337f47fSAndreas Gohr<?php 20337f47fSAndreas Gohr 33379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 45e6dd16eSAndreas Gohruse dokuwiki\Extension\Plugin; 5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat; 6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings; 8294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\ChatInterface; 9294a9eafSAndreas Gohruse dokuwiki\plugin\aichat\Model\EmbeddingInterface; 1034a1c478SAndreas Gohruse dokuwiki\plugin\aichat\Model\OpenAI\Embedding3Small; 1101f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage; 120337f47fSAndreas Gohr 130337f47fSAndreas Gohr/** 140337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component) 150337f47fSAndreas Gohr * 160337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 170337f47fSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 180337f47fSAndreas Gohr */ 197ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin 200337f47fSAndreas Gohr{ 213379af09SAndreas Gohr /** @var CLIPlugin $logger */ 223379af09SAndreas Gohr protected $logger; 23294a9eafSAndreas Gohr /** @var ChatInterface */ 246a18e0f4SAndreas Gohr protected $chatModel; 25294a9eafSAndreas Gohr /** @var EmbeddingInterface */ 266a18e0f4SAndreas Gohr protected $embedModel; 270337f47fSAndreas Gohr /** @var Embeddings */ 280337f47fSAndreas Gohr protected $embeddings; 2901f06932SAndreas Gohr /** @var AbstractStorage */ 3001f06932SAndreas Gohr protected $storage; 310337f47fSAndreas Gohr 32e75dc39fSAndreas Gohr /** @var array where to store meta data on the last run */ 33e75dc39fSAndreas Gohr protected $runDataFile; 34e75dc39fSAndreas Gohr 350337f47fSAndreas Gohr /** 36f8d5ae01SAndreas Gohr * Constructor. Initializes vendor autoloader 37f8d5ae01SAndreas Gohr */ 38f8d5ae01SAndreas Gohr public function __construct() 39f8d5ae01SAndreas Gohr { 40e75dc39fSAndreas Gohr require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 41e75dc39fSAndreas Gohr global $conf; 42e75dc39fSAndreas Gohr $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 43d02b7935SAndreas Gohr $this->loadConfig(); 44f8d5ae01SAndreas Gohr } 45f8d5ae01SAndreas Gohr 46f8d5ae01SAndreas Gohr /** 473379af09SAndreas Gohr * Use the given CLI plugin for logging 483379af09SAndreas Gohr * 493379af09SAndreas Gohr * @param CLIPlugin $logger 503379af09SAndreas Gohr * @return void 513379af09SAndreas Gohr */ 528285fff9SAndreas Gohr public function setLogger($logger) 538285fff9SAndreas Gohr { 543379af09SAndreas Gohr $this->logger = $logger; 553379af09SAndreas Gohr } 563379af09SAndreas Gohr 573379af09SAndreas Gohr /** 58c4127b8eSAndreas Gohr * Check if the current user is allowed to use the plugin (if it has been restricted) 59c4127b8eSAndreas Gohr * 60c4127b8eSAndreas Gohr * @return bool 61c4127b8eSAndreas Gohr */ 62c4127b8eSAndreas Gohr public function userMayAccess() 63c4127b8eSAndreas Gohr { 64c4127b8eSAndreas Gohr global $auth; 65c4127b8eSAndreas Gohr global $USERINFO; 66c4127b8eSAndreas Gohr global $INPUT; 67c4127b8eSAndreas Gohr 68c4127b8eSAndreas Gohr if (!$auth) return true; 69c4127b8eSAndreas Gohr if (!$this->getConf('restrict')) return true; 70c4127b8eSAndreas Gohr if (!isset($USERINFO)) return false; 71c4127b8eSAndreas Gohr 72c4127b8eSAndreas Gohr return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 73c4127b8eSAndreas Gohr } 74c4127b8eSAndreas Gohr 75c4127b8eSAndreas Gohr /** 766a18e0f4SAndreas Gohr * Access the Chat Model 770337f47fSAndreas Gohr * 78294a9eafSAndreas Gohr * @return ChatInterface 790337f47fSAndreas Gohr */ 806a18e0f4SAndreas Gohr public function getChatModel() 810337f47fSAndreas Gohr { 82294a9eafSAndreas Gohr if ($this->chatModel instanceof ChatInterface) { 836a18e0f4SAndreas Gohr return $this->chatModel; 846a18e0f4SAndreas Gohr } 856a18e0f4SAndreas Gohr 86dce0dee5SAndreas Gohr [$namespace, $name] = sexplode(' ', $this->getConf('chatmodel'), 2); 87dce0dee5SAndreas Gohr $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\ChatModel'; 88d02b7935SAndreas Gohr 899f6b34c4SAndreas Gohr if (!class_exists($class)) { 90dce0dee5SAndreas Gohr throw new \RuntimeException('No ChatModel found for ' . $namespace); 919f6b34c4SAndreas Gohr } 92d02b7935SAndreas Gohr 93dce0dee5SAndreas Gohr $this->chatModel = new $class($name, $this->conf); 946a18e0f4SAndreas Gohr return $this->chatModel; 959f6b34c4SAndreas Gohr } 969f6b34c4SAndreas Gohr 976a18e0f4SAndreas Gohr /** 986a18e0f4SAndreas Gohr * Access the Embedding Model 996a18e0f4SAndreas Gohr * 100294a9eafSAndreas Gohr * @return EmbeddingInterface 1016a18e0f4SAndreas Gohr */ 1026a18e0f4SAndreas Gohr public function getEmbedModel() 1036a18e0f4SAndreas Gohr { 104294a9eafSAndreas Gohr if ($this->embedModel instanceof EmbeddingInterface) { 1056a18e0f4SAndreas Gohr return $this->embedModel; 1060337f47fSAndreas Gohr } 1070337f47fSAndreas Gohr 108dce0dee5SAndreas Gohr [$namespace, $name] = sexplode(' ', $this->getConf('embedmodel'), 2); 109dce0dee5SAndreas Gohr $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\EmbeddingModel'; 1106a18e0f4SAndreas Gohr 111dce0dee5SAndreas Gohr if (!class_exists($class)) { 112dce0dee5SAndreas Gohr throw new \RuntimeException('No EmbeddingModel found for ' . $namespace); 113dce0dee5SAndreas Gohr } 114dce0dee5SAndreas Gohr 115dce0dee5SAndreas Gohr $this->embedModel = new $class($name, $this->conf); 1166a18e0f4SAndreas Gohr return $this->embedModel; 1176a18e0f4SAndreas Gohr } 1186a18e0f4SAndreas Gohr 1196a18e0f4SAndreas Gohr 1200337f47fSAndreas Gohr /** 1210337f47fSAndreas Gohr * Access the Embeddings interface 1220337f47fSAndreas Gohr * 1230337f47fSAndreas Gohr * @return Embeddings 1240337f47fSAndreas Gohr */ 1250337f47fSAndreas Gohr public function getEmbeddings() 1260337f47fSAndreas Gohr { 1276a18e0f4SAndreas Gohr if ($this->embeddings instanceof Embeddings) { 1286a18e0f4SAndreas Gohr return $this->embeddings; 1296a18e0f4SAndreas Gohr } 1306a18e0f4SAndreas Gohr 13134a1c478SAndreas Gohr $this->embeddings = new Embeddings( 13234a1c478SAndreas Gohr $this->getChatModel(), 13334a1c478SAndreas Gohr $this->getEmbedModel(), 13434a1c478SAndreas Gohr $this->getStorage(), 13534a1c478SAndreas Gohr $this->conf 13634a1c478SAndreas Gohr ); 1373379af09SAndreas Gohr if ($this->logger) { 1383379af09SAndreas Gohr $this->embeddings->setLogger($this->logger); 1393379af09SAndreas Gohr } 1409f6b34c4SAndreas Gohr 1410337f47fSAndreas Gohr return $this->embeddings; 1420337f47fSAndreas Gohr } 1430337f47fSAndreas Gohr 1440337f47fSAndreas Gohr /** 14501f06932SAndreas Gohr * Access the Storage interface 14601f06932SAndreas Gohr * 14701f06932SAndreas Gohr * @return AbstractStorage 14801f06932SAndreas Gohr */ 14901f06932SAndreas Gohr public function getStorage() 15001f06932SAndreas Gohr { 1516a18e0f4SAndreas Gohr if ($this->storage instanceof AbstractStorage) { 1526a18e0f4SAndreas Gohr return $this->storage; 1536a18e0f4SAndreas Gohr } 1546a18e0f4SAndreas Gohr 155*04afb84fSAndreas Gohr $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage'; 156*04afb84fSAndreas Gohr $this->storage = new $class($this->conf); 1578285fff9SAndreas Gohr 1583379af09SAndreas Gohr if ($this->logger) { 1593379af09SAndreas Gohr $this->storage->setLogger($this->logger); 1603379af09SAndreas Gohr } 16101f06932SAndreas Gohr 16201f06932SAndreas Gohr return $this->storage; 16301f06932SAndreas Gohr } 16401f06932SAndreas Gohr 16501f06932SAndreas Gohr /** 1660337f47fSAndreas Gohr * Ask a question with a chat history 1670337f47fSAndreas Gohr * 1680337f47fSAndreas Gohr * @param string $question 1690337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 1700337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1710337f47fSAndreas Gohr * @throws Exception 1720337f47fSAndreas Gohr */ 1730337f47fSAndreas Gohr public function askChatQuestion($question, $history = []) 1740337f47fSAndreas Gohr { 1750337f47fSAndreas Gohr if ($history) { 1760337f47fSAndreas Gohr $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 1770337f47fSAndreas Gohr } else { 1780337f47fSAndreas Gohr $standaloneQuestion = $question; 1790337f47fSAndreas Gohr } 18034a1c478SAndreas Gohr return $this->askQuestion($standaloneQuestion, $history); 1810337f47fSAndreas Gohr } 1820337f47fSAndreas Gohr 1830337f47fSAndreas Gohr /** 1840337f47fSAndreas Gohr * Ask a single standalone question 1850337f47fSAndreas Gohr * 1860337f47fSAndreas Gohr * @param string $question 18734a1c478SAndreas Gohr * @param array $history [user, ai] of the previous question 1880337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1890337f47fSAndreas Gohr * @throws Exception 1900337f47fSAndreas Gohr */ 19134a1c478SAndreas Gohr public function askQuestion($question, $history = []) 1920337f47fSAndreas Gohr { 193e33a1d7aSAndreas Gohr $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 1949e81bea7SAndreas Gohr if ($similar) { 195441edf84SAndreas Gohr $context = implode( 196441edf84SAndreas Gohr "\n", 197441edf84SAndreas Gohr array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 198441edf84SAndreas Gohr ); 199219268b1SAndreas Gohr $prompt = $this->getPrompt('question', [ 200219268b1SAndreas Gohr 'context' => $context, 201219268b1SAndreas Gohr ]); 2029e81bea7SAndreas Gohr } else { 20334a1c478SAndreas Gohr $prompt = $this->getPrompt('noanswer'); 20434a1c478SAndreas Gohr $history = []; 2059e81bea7SAndreas Gohr } 20668908844SAndreas Gohr 20734a1c478SAndreas Gohr $messages = $this->prepareMessages($prompt, $question, $history); 2086a18e0f4SAndreas Gohr $answer = $this->getChatModel()->getAnswer($messages); 2090337f47fSAndreas Gohr 2100337f47fSAndreas Gohr return [ 2110337f47fSAndreas Gohr 'question' => $question, 2120337f47fSAndreas Gohr 'answer' => $answer, 2130337f47fSAndreas Gohr 'sources' => $similar, 2140337f47fSAndreas Gohr ]; 2150337f47fSAndreas Gohr } 2160337f47fSAndreas Gohr 2170337f47fSAndreas Gohr /** 2180337f47fSAndreas Gohr * Rephrase a question into a standalone question based on the chat history 2190337f47fSAndreas Gohr * 2200337f47fSAndreas Gohr * @param string $question The original user question 2210337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 2220337f47fSAndreas Gohr * @return string The rephrased question 2230337f47fSAndreas Gohr * @throws Exception 2240337f47fSAndreas Gohr */ 2250337f47fSAndreas Gohr public function rephraseChatQuestion($question, $history) 2260337f47fSAndreas Gohr { 22734a1c478SAndreas Gohr $prompt = $this->getPrompt('rephrase'); 22834a1c478SAndreas Gohr $messages = $this->prepareMessages($prompt, $question, $history); 22934a1c478SAndreas Gohr return $this->getChatModel()->getAnswer($messages); 23034a1c478SAndreas Gohr } 23134a1c478SAndreas Gohr 23234a1c478SAndreas Gohr /** 23334a1c478SAndreas Gohr * Prepare the messages for the AI 23434a1c478SAndreas Gohr * 23534a1c478SAndreas Gohr * @param string $prompt The fully prepared system prompt 23634a1c478SAndreas Gohr * @param string $question The user question 23734a1c478SAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 23834a1c478SAndreas Gohr * @return array An OpenAI compatible array of messages 23934a1c478SAndreas Gohr */ 24034a1c478SAndreas Gohr protected function prepareMessages($prompt, $question, $history) 24134a1c478SAndreas Gohr { 24234a1c478SAndreas Gohr // calculate the space for context 24334a1c478SAndreas Gohr $remainingContext = $this->getChatModel()->getMaxInputTokenLength(); 24434a1c478SAndreas Gohr $remainingContext -= $this->countTokens($prompt); 24534a1c478SAndreas Gohr $remainingContext -= $this->countTokens($question); 24634a1c478SAndreas Gohr $safetyMargin = $remainingContext * 0.05; // 5% safety margin 24734a1c478SAndreas Gohr $remainingContext -= $safetyMargin; 24834a1c478SAndreas Gohr // FIXME we may want to also have an upper limit for the history and not always use the full context 24934a1c478SAndreas Gohr 25034a1c478SAndreas Gohr $messages = $this->historyMessages($history, $remainingContext); 25134a1c478SAndreas Gohr $messages[] = [ 25234a1c478SAndreas Gohr 'role' => 'system', 25334a1c478SAndreas Gohr 'content' => $prompt 25434a1c478SAndreas Gohr ]; 25534a1c478SAndreas Gohr $messages[] = [ 25634a1c478SAndreas Gohr 'role' => 'user', 25734a1c478SAndreas Gohr 'content' => $question 25834a1c478SAndreas Gohr ]; 25934a1c478SAndreas Gohr return $messages; 26034a1c478SAndreas Gohr } 26134a1c478SAndreas Gohr 26234a1c478SAndreas Gohr /** 26334a1c478SAndreas Gohr * Create an array of OpenAI compatible messages from the given history 26434a1c478SAndreas Gohr * 26534a1c478SAndreas Gohr * Only as many messages are used as fit into the token limit 26634a1c478SAndreas Gohr * 26734a1c478SAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 26834a1c478SAndreas Gohr * @param int $tokenLimit 26934a1c478SAndreas Gohr * @return array 27034a1c478SAndreas Gohr */ 27134a1c478SAndreas Gohr protected function historyMessages($history, $tokenLimit) 27234a1c478SAndreas Gohr { 27334a1c478SAndreas Gohr $remainingContext = $tokenLimit; 27434a1c478SAndreas Gohr 27534a1c478SAndreas Gohr $messages = []; 2760337f47fSAndreas Gohr $history = array_reverse($history); 2770337f47fSAndreas Gohr foreach ($history as $row) { 27834a1c478SAndreas Gohr $length = $this->countTokens($row[0] . $row[1]); 27934a1c478SAndreas Gohr if ($length > $remainingContext) { 2800337f47fSAndreas Gohr break; 2810337f47fSAndreas Gohr } 28234a1c478SAndreas Gohr $remainingContext -= $length; 2830337f47fSAndreas Gohr 28434a1c478SAndreas Gohr $messages[] = [ 28534a1c478SAndreas Gohr 'role' => 'assistant', 28634a1c478SAndreas Gohr 'content' => $row[1] 28734a1c478SAndreas Gohr ]; 28834a1c478SAndreas Gohr $messages[] = [ 28934a1c478SAndreas Gohr 'role' => 'user', 29034a1c478SAndreas Gohr 'content' => $row[0] 29134a1c478SAndreas Gohr ]; 29234a1c478SAndreas Gohr } 29334a1c478SAndreas Gohr return array_reverse($messages); 2940337f47fSAndreas Gohr } 2950337f47fSAndreas Gohr 29634a1c478SAndreas Gohr /** 29734a1c478SAndreas Gohr * Get an aproximation of the token count for the given text 29834a1c478SAndreas Gohr * 29934a1c478SAndreas Gohr * @param $text 30034a1c478SAndreas Gohr * @return int 30134a1c478SAndreas Gohr */ 30234a1c478SAndreas Gohr protected function countTokens($text) 30334a1c478SAndreas Gohr { 30434a1c478SAndreas Gohr return count($this->getEmbeddings()->getTokenEncoder()->encode($text)); 3050337f47fSAndreas Gohr } 3060337f47fSAndreas Gohr 3070337f47fSAndreas Gohr /** 3080337f47fSAndreas Gohr * Load the given prompt template and fill in the variables 3090337f47fSAndreas Gohr * 3100337f47fSAndreas Gohr * @param string $type 3110337f47fSAndreas Gohr * @param string[] $vars 3120337f47fSAndreas Gohr * @return string 3130337f47fSAndreas Gohr */ 3140337f47fSAndreas Gohr protected function getPrompt($type, $vars = []) 3150337f47fSAndreas Gohr { 3160337f47fSAndreas Gohr $template = file_get_contents($this->localFN('prompt_' . $type)); 31734a1c478SAndreas Gohr $vars['language'] = $this->getLanguagePrompt(); 3180337f47fSAndreas Gohr 3197ebc7895Ssplitbrain $replace = []; 3200337f47fSAndreas Gohr foreach ($vars as $key => $val) { 3210337f47fSAndreas Gohr $replace['{{' . strtoupper($key) . '}}'] = $val; 3220337f47fSAndreas Gohr } 3230337f47fSAndreas Gohr 3240337f47fSAndreas Gohr return strtr($template, $replace); 3250337f47fSAndreas Gohr } 326219268b1SAndreas Gohr 327219268b1SAndreas Gohr /** 328219268b1SAndreas Gohr * Construct the prompt to define the answer language 329219268b1SAndreas Gohr * 330219268b1SAndreas Gohr * @return string 331219268b1SAndreas Gohr */ 332219268b1SAndreas Gohr protected function getLanguagePrompt() 333219268b1SAndreas Gohr { 334219268b1SAndreas Gohr global $conf; 335cfaf6b32SAndreas Gohr $isoLangnames = include(__DIR__ . '/lang/languages.php'); 336cfaf6b32SAndreas Gohr 337cfaf6b32SAndreas Gohr $currentLang = $isoLangnames[$conf['lang']] ?? 'English'; 338219268b1SAndreas Gohr 339e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 340219268b1SAndreas Gohr if (isset($isoLangnames[$conf['lang']])) { 341219268b1SAndreas Gohr $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 342219268b1SAndreas Gohr return $languagePrompt; 343219268b1SAndreas Gohr } 344219268b1SAndreas Gohr } 345219268b1SAndreas Gohr 346cfaf6b32SAndreas Gohr $languagePrompt = 'Always answer in the user\'s language. ' . 347cfaf6b32SAndreas Gohr "If you are unsure about the language, speak $currentLang."; 348219268b1SAndreas Gohr return $languagePrompt; 349219268b1SAndreas Gohr } 350e33a1d7aSAndreas Gohr 351e33a1d7aSAndreas Gohr /** 352e33a1d7aSAndreas Gohr * Should sources be limited to current language? 353e33a1d7aSAndreas Gohr * 354e33a1d7aSAndreas Gohr * @return string The current language code or empty string 355e33a1d7aSAndreas Gohr */ 356e33a1d7aSAndreas Gohr public function getLanguageLimit() 357e33a1d7aSAndreas Gohr { 358e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 359e33a1d7aSAndreas Gohr global $conf; 360e33a1d7aSAndreas Gohr return $conf['lang']; 361e33a1d7aSAndreas Gohr } else { 362e33a1d7aSAndreas Gohr return ''; 363e33a1d7aSAndreas Gohr } 364e33a1d7aSAndreas Gohr } 365e75dc39fSAndreas Gohr 366e75dc39fSAndreas Gohr /** 367e75dc39fSAndreas Gohr * Store info about the last run 368e75dc39fSAndreas Gohr * 369e75dc39fSAndreas Gohr * @param array $data 370e75dc39fSAndreas Gohr * @return void 371e75dc39fSAndreas Gohr */ 372e75dc39fSAndreas Gohr public function setRunData(array $data) 373e75dc39fSAndreas Gohr { 374e75dc39fSAndreas Gohr file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 375e75dc39fSAndreas Gohr } 376e75dc39fSAndreas Gohr 377e75dc39fSAndreas Gohr /** 378e75dc39fSAndreas Gohr * Get info about the last run 379e75dc39fSAndreas Gohr * 380e75dc39fSAndreas Gohr * @return array 381e75dc39fSAndreas Gohr */ 382e75dc39fSAndreas Gohr public function getRunData() 383e75dc39fSAndreas Gohr { 384e75dc39fSAndreas Gohr if (!file_exists($this->runDataFile)) { 385e75dc39fSAndreas Gohr return []; 386e75dc39fSAndreas Gohr } 387e75dc39fSAndreas Gohr return json_decode(file_get_contents($this->runDataFile), true); 388e75dc39fSAndreas Gohr } 3890337f47fSAndreas Gohr} 390