10337f47fSAndreas Gohr<?php 20337f47fSAndreas Gohr 33379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 45e6dd16eSAndreas Gohruse dokuwiki\Extension\Plugin; 5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat; 6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings; 8*6a18e0f4SAndreas Gohruse dokuwiki\plugin\aichat\Model\AbstractChatModel; 9*6a18e0f4SAndreas Gohruse dokuwiki\plugin\aichat\Model\AbstractEmbeddingModel; 10*6a18e0f4SAndreas Gohruse dokuwiki\plugin\aichat\Model\OpenAI\EmbeddingAda02; 1101f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage; 125e6dd16eSAndreas Gohruse dokuwiki\plugin\aichat\Storage\ChromaStorage; 1313dbfc23SAndreas Gohruse dokuwiki\plugin\aichat\Storage\PineconeStorage; 144c0099a8SAndreas Gohruse dokuwiki\plugin\aichat\Storage\QdrantStorage; 15f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Storage\SQLiteStorage; 160337f47fSAndreas Gohr 170337f47fSAndreas Gohr/** 180337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component) 190337f47fSAndreas Gohr * 200337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 210337f47fSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 220337f47fSAndreas Gohr */ 237ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin 240337f47fSAndreas Gohr{ 253379af09SAndreas Gohr /** @var CLIPlugin $logger */ 263379af09SAndreas Gohr protected $logger; 27*6a18e0f4SAndreas Gohr /** @var AbstractChatModel */ 28*6a18e0f4SAndreas Gohr protected $chatModel; 29*6a18e0f4SAndreas Gohr /** @var AbstractEmbeddingModel */ 30*6a18e0f4SAndreas Gohr protected $embedModel; 310337f47fSAndreas Gohr /** @var Embeddings */ 320337f47fSAndreas Gohr protected $embeddings; 3301f06932SAndreas Gohr /** @var AbstractStorage */ 3401f06932SAndreas Gohr protected $storage; 350337f47fSAndreas Gohr 36e75dc39fSAndreas Gohr /** @var array where to store meta data on the last run */ 37e75dc39fSAndreas Gohr protected $runDataFile; 38e75dc39fSAndreas Gohr 390337f47fSAndreas Gohr /** 40f8d5ae01SAndreas Gohr * Constructor. Initializes vendor autoloader 41f8d5ae01SAndreas Gohr */ 42f8d5ae01SAndreas Gohr public function __construct() 43f8d5ae01SAndreas Gohr { 44e75dc39fSAndreas Gohr require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 45e75dc39fSAndreas Gohr global $conf; 46e75dc39fSAndreas Gohr $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 47f8d5ae01SAndreas Gohr } 48f8d5ae01SAndreas Gohr 49f8d5ae01SAndreas Gohr /** 503379af09SAndreas Gohr * Use the given CLI plugin for logging 513379af09SAndreas Gohr * 523379af09SAndreas Gohr * @param CLIPlugin $logger 533379af09SAndreas Gohr * @return void 543379af09SAndreas Gohr */ 558285fff9SAndreas Gohr public function setLogger($logger) 568285fff9SAndreas Gohr { 573379af09SAndreas Gohr $this->logger = $logger; 583379af09SAndreas Gohr } 593379af09SAndreas Gohr 603379af09SAndreas Gohr /** 61c4127b8eSAndreas Gohr * Check if the current user is allowed to use the plugin (if it has been restricted) 62c4127b8eSAndreas Gohr * 63c4127b8eSAndreas Gohr * @return bool 64c4127b8eSAndreas Gohr */ 65c4127b8eSAndreas Gohr public function userMayAccess() 66c4127b8eSAndreas Gohr { 67c4127b8eSAndreas Gohr global $auth; 68c4127b8eSAndreas Gohr global $USERINFO; 69c4127b8eSAndreas Gohr global $INPUT; 70c4127b8eSAndreas Gohr 71c4127b8eSAndreas Gohr if (!$auth) return true; 72c4127b8eSAndreas Gohr if (!$this->getConf('restrict')) return true; 73c4127b8eSAndreas Gohr if (!isset($USERINFO)) return false; 74c4127b8eSAndreas Gohr 75c4127b8eSAndreas Gohr return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 76c4127b8eSAndreas Gohr } 77c4127b8eSAndreas Gohr 78c4127b8eSAndreas Gohr /** 79*6a18e0f4SAndreas Gohr * Access the Chat Model 800337f47fSAndreas Gohr * 81*6a18e0f4SAndreas Gohr * @return AbstractChatModel 820337f47fSAndreas Gohr */ 83*6a18e0f4SAndreas Gohr public function getChatModel() 840337f47fSAndreas Gohr { 85*6a18e0f4SAndreas Gohr if ($this->chatModel instanceof AbstractChatModel) { 86*6a18e0f4SAndreas Gohr return $this->chatModel; 87*6a18e0f4SAndreas Gohr } 88*6a18e0f4SAndreas Gohr 899f6b34c4SAndreas Gohr $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model'); 909f6b34c4SAndreas Gohr 919f6b34c4SAndreas Gohr if (!class_exists($class)) { 929f6b34c4SAndreas Gohr throw new \RuntimeException('Configured model not found: ' . $class); 939f6b34c4SAndreas Gohr } 949f6b34c4SAndreas Gohr // FIXME for now we only have OpenAI models, so we can hardcode the auth setup 95*6a18e0f4SAndreas Gohr $this->chatModel = new $class([ 969f6b34c4SAndreas Gohr 'key' => $this->getConf('openaikey'), 979f6b34c4SAndreas Gohr 'org' => $this->getConf('openaiorg') 989f6b34c4SAndreas Gohr ]); 99*6a18e0f4SAndreas Gohr 100*6a18e0f4SAndreas Gohr return $this->chatModel; 1019f6b34c4SAndreas Gohr } 1029f6b34c4SAndreas Gohr 103*6a18e0f4SAndreas Gohr /** 104*6a18e0f4SAndreas Gohr * Access the Embedding Model 105*6a18e0f4SAndreas Gohr * 106*6a18e0f4SAndreas Gohr * @return AbstractEmbeddingModel 107*6a18e0f4SAndreas Gohr */ 108*6a18e0f4SAndreas Gohr public function getEmbedModel() 109*6a18e0f4SAndreas Gohr { 110*6a18e0f4SAndreas Gohr // FIXME this is hardcoded to OpenAI for now 111*6a18e0f4SAndreas Gohr if ($this->embedModel instanceof AbstractEmbeddingModel) { 112*6a18e0f4SAndreas Gohr return $this->embedModel; 1130337f47fSAndreas Gohr } 1140337f47fSAndreas Gohr 115*6a18e0f4SAndreas Gohr 116*6a18e0f4SAndreas Gohr $this->embedModel = new EmbeddingAda02([ 117*6a18e0f4SAndreas Gohr 'key' => $this->getConf('openaikey'), 118*6a18e0f4SAndreas Gohr 'org' => $this->getConf('openaiorg') 119*6a18e0f4SAndreas Gohr ]); 120*6a18e0f4SAndreas Gohr 121*6a18e0f4SAndreas Gohr return $this->embedModel; 122*6a18e0f4SAndreas Gohr } 123*6a18e0f4SAndreas Gohr 124*6a18e0f4SAndreas Gohr 1250337f47fSAndreas Gohr /** 1260337f47fSAndreas Gohr * Access the Embeddings interface 1270337f47fSAndreas Gohr * 1280337f47fSAndreas Gohr * @return Embeddings 1290337f47fSAndreas Gohr */ 1300337f47fSAndreas Gohr public function getEmbeddings() 1310337f47fSAndreas Gohr { 132*6a18e0f4SAndreas Gohr if ($this->embeddings instanceof Embeddings) { 133*6a18e0f4SAndreas Gohr return $this->embeddings; 134*6a18e0f4SAndreas Gohr } 135*6a18e0f4SAndreas Gohr 136*6a18e0f4SAndreas Gohr $this->embeddings = new Embeddings($this->getChatModel(), $this->getEmbedModel(), $this->getStorage()); 1373379af09SAndreas Gohr if ($this->logger) { 1383379af09SAndreas Gohr $this->embeddings->setLogger($this->logger); 1393379af09SAndreas Gohr } 1409f6b34c4SAndreas Gohr 1410337f47fSAndreas Gohr return $this->embeddings; 1420337f47fSAndreas Gohr } 1430337f47fSAndreas Gohr 1440337f47fSAndreas Gohr /** 14501f06932SAndreas Gohr * Access the Storage interface 14601f06932SAndreas Gohr * 14701f06932SAndreas Gohr * @return AbstractStorage 14801f06932SAndreas Gohr */ 14901f06932SAndreas Gohr public function getStorage() 15001f06932SAndreas Gohr { 151*6a18e0f4SAndreas Gohr if ($this->storage instanceof AbstractStorage) { 152*6a18e0f4SAndreas Gohr return $this->storage; 153*6a18e0f4SAndreas Gohr } 154*6a18e0f4SAndreas Gohr 15513dbfc23SAndreas Gohr if ($this->getConf('pinecone_apikey')) { 15613dbfc23SAndreas Gohr $this->storage = new PineconeStorage(); 1575e6dd16eSAndreas Gohr } elseif ($this->getConf('chroma_baseurl')) { 1585e6dd16eSAndreas Gohr $this->storage = new ChromaStorage(); 1594c0099a8SAndreas Gohr } elseif ($this->getConf('qdrant_baseurl')) { 1604c0099a8SAndreas Gohr $this->storage = new QdrantStorage(); 16113dbfc23SAndreas Gohr } else { 16201f06932SAndreas Gohr $this->storage = new SQLiteStorage(); 16368b6fa79SAndreas Gohr } 1648285fff9SAndreas Gohr 1653379af09SAndreas Gohr if ($this->logger) { 1663379af09SAndreas Gohr $this->storage->setLogger($this->logger); 1673379af09SAndreas Gohr } 16801f06932SAndreas Gohr 16901f06932SAndreas Gohr return $this->storage; 17001f06932SAndreas Gohr } 17101f06932SAndreas Gohr 17201f06932SAndreas Gohr /** 1730337f47fSAndreas Gohr * Ask a question with a chat history 1740337f47fSAndreas Gohr * 1750337f47fSAndreas Gohr * @param string $question 1760337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 1770337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1780337f47fSAndreas Gohr * @throws Exception 1790337f47fSAndreas Gohr */ 1800337f47fSAndreas Gohr public function askChatQuestion($question, $history = []) 1810337f47fSAndreas Gohr { 1820337f47fSAndreas Gohr if ($history) { 1830337f47fSAndreas Gohr $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 184754b8394SAndreas Gohr $prev = end($history); 1850337f47fSAndreas Gohr } else { 1860337f47fSAndreas Gohr $standaloneQuestion = $question; 187754b8394SAndreas Gohr $prev = []; 1880337f47fSAndreas Gohr } 189754b8394SAndreas Gohr return $this->askQuestion($standaloneQuestion, $prev); 1900337f47fSAndreas Gohr } 1910337f47fSAndreas Gohr 1920337f47fSAndreas Gohr /** 1930337f47fSAndreas Gohr * Ask a single standalone question 1940337f47fSAndreas Gohr * 1950337f47fSAndreas Gohr * @param string $question 196754b8394SAndreas Gohr * @param array $previous [user, ai] of the previous question 1970337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1980337f47fSAndreas Gohr * @throws Exception 1990337f47fSAndreas Gohr */ 200754b8394SAndreas Gohr public function askQuestion($question, $previous = []) 2010337f47fSAndreas Gohr { 202e33a1d7aSAndreas Gohr $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 2039e81bea7SAndreas Gohr if ($similar) { 204441edf84SAndreas Gohr $context = implode( 205441edf84SAndreas Gohr "\n", 206441edf84SAndreas Gohr array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 207441edf84SAndreas Gohr ); 208219268b1SAndreas Gohr $prompt = $this->getPrompt('question', [ 209219268b1SAndreas Gohr 'context' => $context, 210219268b1SAndreas Gohr 'language' => $this->getLanguagePrompt() 211219268b1SAndreas Gohr ]); 2129e81bea7SAndreas Gohr } else { 213cfaf6b32SAndreas Gohr $prompt = $this->getPrompt('noanswer') . ' ' . $this->getLanguagePrompt(); 2149e81bea7SAndreas Gohr } 21568908844SAndreas Gohr 2160337f47fSAndreas Gohr $messages = [ 2170337f47fSAndreas Gohr [ 2180337f47fSAndreas Gohr 'role' => 'system', 2190337f47fSAndreas Gohr 'content' => $prompt 2200337f47fSAndreas Gohr ], 2210337f47fSAndreas Gohr [ 2220337f47fSAndreas Gohr 'role' => 'user', 2230337f47fSAndreas Gohr 'content' => $question 2240337f47fSAndreas Gohr ] 2250337f47fSAndreas Gohr ]; 2260337f47fSAndreas Gohr 227754b8394SAndreas Gohr if ($previous) { 228754b8394SAndreas Gohr array_unshift($messages, [ 229754b8394SAndreas Gohr 'role' => 'assistant', 230754b8394SAndreas Gohr 'content' => $previous[1] 231754b8394SAndreas Gohr ]); 232754b8394SAndreas Gohr array_unshift($messages, [ 233754b8394SAndreas Gohr 'role' => 'user', 234754b8394SAndreas Gohr 'content' => $previous[0] 235754b8394SAndreas Gohr ]); 236754b8394SAndreas Gohr } 237754b8394SAndreas Gohr 238*6a18e0f4SAndreas Gohr $answer = $this->getChatModel()->getAnswer($messages); 2390337f47fSAndreas Gohr 2400337f47fSAndreas Gohr return [ 2410337f47fSAndreas Gohr 'question' => $question, 2420337f47fSAndreas Gohr 'answer' => $answer, 2430337f47fSAndreas Gohr 'sources' => $similar, 2440337f47fSAndreas Gohr ]; 2450337f47fSAndreas Gohr } 2460337f47fSAndreas Gohr 2470337f47fSAndreas Gohr /** 2480337f47fSAndreas Gohr * Rephrase a question into a standalone question based on the chat history 2490337f47fSAndreas Gohr * 2500337f47fSAndreas Gohr * @param string $question The original user question 2510337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 2520337f47fSAndreas Gohr * @return string The rephrased question 2530337f47fSAndreas Gohr * @throws Exception 2540337f47fSAndreas Gohr */ 2550337f47fSAndreas Gohr public function rephraseChatQuestion($question, $history) 2560337f47fSAndreas Gohr { 2570337f47fSAndreas Gohr // go back in history as far as possible without hitting the token limit 2580337f47fSAndreas Gohr $chatHistory = ''; 2590337f47fSAndreas Gohr $history = array_reverse($history); 2600337f47fSAndreas Gohr foreach ($history as $row) { 261f6ef2e50SAndreas Gohr if ( 2629f6b34c4SAndreas Gohr count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) > 263*6a18e0f4SAndreas Gohr $this->getChatModel()->getMaxRephrasingTokenLength() 264f6ef2e50SAndreas Gohr ) { 2650337f47fSAndreas Gohr break; 2660337f47fSAndreas Gohr } 2670337f47fSAndreas Gohr 2680337f47fSAndreas Gohr $chatHistory = 2690337f47fSAndreas Gohr "Human: " . $row[0] . "\n" . 2700337f47fSAndreas Gohr "Assistant: " . $row[1] . "\n" . 2710337f47fSAndreas Gohr $chatHistory; 2720337f47fSAndreas Gohr } 2730337f47fSAndreas Gohr 2740337f47fSAndreas Gohr // ask openAI to rephrase the question 2750337f47fSAndreas Gohr $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]); 2760337f47fSAndreas Gohr $messages = [['role' => 'user', 'content' => $prompt]]; 277*6a18e0f4SAndreas Gohr return $this->getChatModel()->getRephrasedQuestion($messages); 2780337f47fSAndreas Gohr } 2790337f47fSAndreas Gohr 2800337f47fSAndreas Gohr /** 2810337f47fSAndreas Gohr * Load the given prompt template and fill in the variables 2820337f47fSAndreas Gohr * 2830337f47fSAndreas Gohr * @param string $type 2840337f47fSAndreas Gohr * @param string[] $vars 2850337f47fSAndreas Gohr * @return string 2860337f47fSAndreas Gohr */ 2870337f47fSAndreas Gohr protected function getPrompt($type, $vars = []) 2880337f47fSAndreas Gohr { 2890337f47fSAndreas Gohr $template = file_get_contents($this->localFN('prompt_' . $type)); 2900337f47fSAndreas Gohr 2917ebc7895Ssplitbrain $replace = []; 2920337f47fSAndreas Gohr foreach ($vars as $key => $val) { 2930337f47fSAndreas Gohr $replace['{{' . strtoupper($key) . '}}'] = $val; 2940337f47fSAndreas Gohr } 2950337f47fSAndreas Gohr 2960337f47fSAndreas Gohr return strtr($template, $replace); 2970337f47fSAndreas Gohr } 298219268b1SAndreas Gohr 299219268b1SAndreas Gohr /** 300219268b1SAndreas Gohr * Construct the prompt to define the answer language 301219268b1SAndreas Gohr * 302219268b1SAndreas Gohr * @return string 303219268b1SAndreas Gohr */ 304219268b1SAndreas Gohr protected function getLanguagePrompt() 305219268b1SAndreas Gohr { 306219268b1SAndreas Gohr global $conf; 307cfaf6b32SAndreas Gohr $isoLangnames = include(__DIR__ . '/lang/languages.php'); 308cfaf6b32SAndreas Gohr 309cfaf6b32SAndreas Gohr $currentLang = $isoLangnames[$conf['lang']] ?? 'English'; 310219268b1SAndreas Gohr 311e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 312219268b1SAndreas Gohr if (isset($isoLangnames[$conf['lang']])) { 313219268b1SAndreas Gohr $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 314219268b1SAndreas Gohr return $languagePrompt; 315219268b1SAndreas Gohr } 316219268b1SAndreas Gohr } 317219268b1SAndreas Gohr 318cfaf6b32SAndreas Gohr $languagePrompt = 'Always answer in the user\'s language.' . 319cfaf6b32SAndreas Gohr "If you are unsure about the language, speak $currentLang."; 320219268b1SAndreas Gohr return $languagePrompt; 321219268b1SAndreas Gohr } 322e33a1d7aSAndreas Gohr 323e33a1d7aSAndreas Gohr /** 324e33a1d7aSAndreas Gohr * Should sources be limited to current language? 325e33a1d7aSAndreas Gohr * 326e33a1d7aSAndreas Gohr * @return string The current language code or empty string 327e33a1d7aSAndreas Gohr */ 328e33a1d7aSAndreas Gohr public function getLanguageLimit() 329e33a1d7aSAndreas Gohr { 330e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 331e33a1d7aSAndreas Gohr global $conf; 332e33a1d7aSAndreas Gohr return $conf['lang']; 333e33a1d7aSAndreas Gohr } else { 334e33a1d7aSAndreas Gohr return ''; 335e33a1d7aSAndreas Gohr } 336e33a1d7aSAndreas Gohr } 337e75dc39fSAndreas Gohr 338e75dc39fSAndreas Gohr /** 339e75dc39fSAndreas Gohr * Store info about the last run 340e75dc39fSAndreas Gohr * 341e75dc39fSAndreas Gohr * @param array $data 342e75dc39fSAndreas Gohr * @return void 343e75dc39fSAndreas Gohr */ 344e75dc39fSAndreas Gohr public function setRunData(array $data) 345e75dc39fSAndreas Gohr { 346e75dc39fSAndreas Gohr file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 347e75dc39fSAndreas Gohr } 348e75dc39fSAndreas Gohr 349e75dc39fSAndreas Gohr /** 350e75dc39fSAndreas Gohr * Get info about the last run 351e75dc39fSAndreas Gohr * 352e75dc39fSAndreas Gohr * @return array 353e75dc39fSAndreas Gohr */ 354e75dc39fSAndreas Gohr public function getRunData() 355e75dc39fSAndreas Gohr { 356e75dc39fSAndreas Gohr if (!file_exists($this->runDataFile)) { 357e75dc39fSAndreas Gohr return []; 358e75dc39fSAndreas Gohr } 359e75dc39fSAndreas Gohr return json_decode(file_get_contents($this->runDataFile), true); 360e75dc39fSAndreas Gohr } 3610337f47fSAndreas Gohr} 362