10337f47fSAndreas Gohr<?php 20337f47fSAndreas Gohr 33379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 45e6dd16eSAndreas Gohruse dokuwiki\Extension\Plugin; 5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat; 6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings; 8754b8394SAndreas Gohruse dokuwiki\plugin\aichat\Model\AbstractModel; 9f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Model\OpenAI\GPT35Turbo; 1001f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage; 115e6dd16eSAndreas Gohruse dokuwiki\plugin\aichat\Storage\ChromaStorage; 1213dbfc23SAndreas Gohruse dokuwiki\plugin\aichat\Storage\PineconeStorage; 134c0099a8SAndreas Gohruse dokuwiki\plugin\aichat\Storage\QdrantStorage; 14f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Storage\SQLiteStorage; 150337f47fSAndreas Gohr 160337f47fSAndreas Gohr/** 170337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component) 180337f47fSAndreas Gohr * 190337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 200337f47fSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 210337f47fSAndreas Gohr */ 227ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin 230337f47fSAndreas Gohr{ 243379af09SAndreas Gohr /** @var CLIPlugin $logger */ 253379af09SAndreas Gohr protected $logger; 26f6ef2e50SAndreas Gohr /** @var AbstractModel */ 27f6ef2e50SAndreas Gohr protected $model; 280337f47fSAndreas Gohr /** @var Embeddings */ 290337f47fSAndreas Gohr protected $embeddings; 3001f06932SAndreas Gohr /** @var AbstractStorage */ 3101f06932SAndreas Gohr protected $storage; 320337f47fSAndreas Gohr 33*e75dc39fSAndreas Gohr /** @var array where to store meta data on the last run */ 34*e75dc39fSAndreas Gohr protected $runDataFile; 35*e75dc39fSAndreas Gohr 360337f47fSAndreas Gohr /** 37f8d5ae01SAndreas Gohr * Constructor. Initializes vendor autoloader 38f8d5ae01SAndreas Gohr */ 39f8d5ae01SAndreas Gohr public function __construct() 40f8d5ae01SAndreas Gohr { 41*e75dc39fSAndreas Gohr require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 42*e75dc39fSAndreas Gohr global $conf; 43*e75dc39fSAndreas Gohr $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 44f8d5ae01SAndreas Gohr } 45f8d5ae01SAndreas Gohr 46f8d5ae01SAndreas Gohr /** 473379af09SAndreas Gohr * Use the given CLI plugin for logging 483379af09SAndreas Gohr * 493379af09SAndreas Gohr * @param CLIPlugin $logger 503379af09SAndreas Gohr * @return void 513379af09SAndreas Gohr */ 528285fff9SAndreas Gohr public function setLogger($logger) 538285fff9SAndreas Gohr { 543379af09SAndreas Gohr $this->logger = $logger; 553379af09SAndreas Gohr } 563379af09SAndreas Gohr 573379af09SAndreas Gohr /** 58c4127b8eSAndreas Gohr * Check if the current user is allowed to use the plugin (if it has been restricted) 59c4127b8eSAndreas Gohr * 60c4127b8eSAndreas Gohr * @return bool 61c4127b8eSAndreas Gohr */ 62c4127b8eSAndreas Gohr public function userMayAccess() 63c4127b8eSAndreas Gohr { 64c4127b8eSAndreas Gohr global $auth; 65c4127b8eSAndreas Gohr global $USERINFO; 66c4127b8eSAndreas Gohr global $INPUT; 67c4127b8eSAndreas Gohr 68c4127b8eSAndreas Gohr if (!$auth) return true; 69c4127b8eSAndreas Gohr if (!$this->getConf('restrict')) return true; 70c4127b8eSAndreas Gohr if (!isset($USERINFO)) return false; 71c4127b8eSAndreas Gohr 72c4127b8eSAndreas Gohr return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 73c4127b8eSAndreas Gohr } 74c4127b8eSAndreas Gohr 75c4127b8eSAndreas Gohr /** 760337f47fSAndreas Gohr * Access the OpenAI client 770337f47fSAndreas Gohr * 78f6ef2e50SAndreas Gohr * @return GPT35Turbo 790337f47fSAndreas Gohr */ 80f6ef2e50SAndreas Gohr public function getModel() 810337f47fSAndreas Gohr { 827ebc7895Ssplitbrain if (!$this->model instanceof AbstractModel) { 839f6b34c4SAndreas Gohr $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model'); 849f6b34c4SAndreas Gohr 859f6b34c4SAndreas Gohr if (!class_exists($class)) { 869f6b34c4SAndreas Gohr throw new \RuntimeException('Configured model not found: ' . $class); 879f6b34c4SAndreas Gohr } 889f6b34c4SAndreas Gohr // FIXME for now we only have OpenAI models, so we can hardcode the auth setup 899f6b34c4SAndreas Gohr $this->model = new $class([ 909f6b34c4SAndreas Gohr 'key' => $this->getConf('openaikey'), 919f6b34c4SAndreas Gohr 'org' => $this->getConf('openaiorg') 929f6b34c4SAndreas Gohr ]); 939f6b34c4SAndreas Gohr } 949f6b34c4SAndreas Gohr 95f6ef2e50SAndreas Gohr return $this->model; 960337f47fSAndreas Gohr } 970337f47fSAndreas Gohr 980337f47fSAndreas Gohr /** 990337f47fSAndreas Gohr * Access the Embeddings interface 1000337f47fSAndreas Gohr * 1010337f47fSAndreas Gohr * @return Embeddings 1020337f47fSAndreas Gohr */ 1030337f47fSAndreas Gohr public function getEmbeddings() 1040337f47fSAndreas Gohr { 1057ebc7895Ssplitbrain if (!$this->embeddings instanceof Embeddings) { 10601f06932SAndreas Gohr $this->embeddings = new Embeddings($this->getModel(), $this->getStorage()); 1073379af09SAndreas Gohr if ($this->logger) { 1083379af09SAndreas Gohr $this->embeddings->setLogger($this->logger); 1093379af09SAndreas Gohr } 1109f6b34c4SAndreas Gohr } 1119f6b34c4SAndreas Gohr 1120337f47fSAndreas Gohr return $this->embeddings; 1130337f47fSAndreas Gohr } 1140337f47fSAndreas Gohr 1150337f47fSAndreas Gohr /** 11601f06932SAndreas Gohr * Access the Storage interface 11701f06932SAndreas Gohr * 11801f06932SAndreas Gohr * @return AbstractStorage 11901f06932SAndreas Gohr */ 12001f06932SAndreas Gohr public function getStorage() 12101f06932SAndreas Gohr { 1227ebc7895Ssplitbrain if (!$this->storage instanceof AbstractStorage) { 12313dbfc23SAndreas Gohr if ($this->getConf('pinecone_apikey')) { 12413dbfc23SAndreas Gohr $this->storage = new PineconeStorage(); 1255e6dd16eSAndreas Gohr } elseif ($this->getConf('chroma_baseurl')) { 1265e6dd16eSAndreas Gohr $this->storage = new ChromaStorage(); 1274c0099a8SAndreas Gohr } elseif ($this->getConf('qdrant_baseurl')) { 1284c0099a8SAndreas Gohr $this->storage = new QdrantStorage(); 12913dbfc23SAndreas Gohr } else { 13001f06932SAndreas Gohr $this->storage = new SQLiteStorage(); 13168b6fa79SAndreas Gohr } 1328285fff9SAndreas Gohr 1333379af09SAndreas Gohr if ($this->logger) { 1343379af09SAndreas Gohr $this->storage->setLogger($this->logger); 1353379af09SAndreas Gohr } 13601f06932SAndreas Gohr } 13701f06932SAndreas Gohr 13801f06932SAndreas Gohr return $this->storage; 13901f06932SAndreas Gohr } 14001f06932SAndreas Gohr 14101f06932SAndreas Gohr /** 1420337f47fSAndreas Gohr * Ask a question with a chat history 1430337f47fSAndreas Gohr * 1440337f47fSAndreas Gohr * @param string $question 1450337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 1460337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1470337f47fSAndreas Gohr * @throws Exception 1480337f47fSAndreas Gohr */ 1490337f47fSAndreas Gohr public function askChatQuestion($question, $history = []) 1500337f47fSAndreas Gohr { 1510337f47fSAndreas Gohr if ($history) { 1520337f47fSAndreas Gohr $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 153754b8394SAndreas Gohr $prev = end($history); 1540337f47fSAndreas Gohr } else { 1550337f47fSAndreas Gohr $standaloneQuestion = $question; 156754b8394SAndreas Gohr $prev = []; 1570337f47fSAndreas Gohr } 158754b8394SAndreas Gohr return $this->askQuestion($standaloneQuestion, $prev); 1590337f47fSAndreas Gohr } 1600337f47fSAndreas Gohr 1610337f47fSAndreas Gohr /** 1620337f47fSAndreas Gohr * Ask a single standalone question 1630337f47fSAndreas Gohr * 1640337f47fSAndreas Gohr * @param string $question 165754b8394SAndreas Gohr * @param array $previous [user, ai] of the previous question 1660337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1670337f47fSAndreas Gohr * @throws Exception 1680337f47fSAndreas Gohr */ 169754b8394SAndreas Gohr public function askQuestion($question, $previous = []) 1700337f47fSAndreas Gohr { 171e33a1d7aSAndreas Gohr $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 1729e81bea7SAndreas Gohr if ($similar) { 173441edf84SAndreas Gohr $context = implode( 174441edf84SAndreas Gohr "\n", 175441edf84SAndreas Gohr array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 176441edf84SAndreas Gohr ); 177219268b1SAndreas Gohr $prompt = $this->getPrompt('question', [ 178219268b1SAndreas Gohr 'context' => $context, 179219268b1SAndreas Gohr 'language' => $this->getLanguagePrompt() 180219268b1SAndreas Gohr ]); 1819e81bea7SAndreas Gohr } else { 1829e81bea7SAndreas Gohr $prompt = $this->getPrompt('noanswer'); 1839e81bea7SAndreas Gohr } 18468908844SAndreas Gohr 1850337f47fSAndreas Gohr $messages = [ 1860337f47fSAndreas Gohr [ 1870337f47fSAndreas Gohr 'role' => 'system', 1880337f47fSAndreas Gohr 'content' => $prompt 1890337f47fSAndreas Gohr ], 1900337f47fSAndreas Gohr [ 1910337f47fSAndreas Gohr 'role' => 'user', 1920337f47fSAndreas Gohr 'content' => $question 1930337f47fSAndreas Gohr ] 1940337f47fSAndreas Gohr ]; 1950337f47fSAndreas Gohr 196754b8394SAndreas Gohr if ($previous) { 197754b8394SAndreas Gohr array_unshift($messages, [ 198754b8394SAndreas Gohr 'role' => 'assistant', 199754b8394SAndreas Gohr 'content' => $previous[1] 200754b8394SAndreas Gohr ]); 201754b8394SAndreas Gohr array_unshift($messages, [ 202754b8394SAndreas Gohr 'role' => 'user', 203754b8394SAndreas Gohr 'content' => $previous[0] 204754b8394SAndreas Gohr ]); 205754b8394SAndreas Gohr } 206754b8394SAndreas Gohr 2079f6b34c4SAndreas Gohr $answer = $this->getModel()->getAnswer($messages); 2080337f47fSAndreas Gohr 2090337f47fSAndreas Gohr return [ 2100337f47fSAndreas Gohr 'question' => $question, 2110337f47fSAndreas Gohr 'answer' => $answer, 2120337f47fSAndreas Gohr 'sources' => $similar, 2130337f47fSAndreas Gohr ]; 2140337f47fSAndreas Gohr } 2150337f47fSAndreas Gohr 2160337f47fSAndreas Gohr /** 2170337f47fSAndreas Gohr * Rephrase a question into a standalone question based on the chat history 2180337f47fSAndreas Gohr * 2190337f47fSAndreas Gohr * @param string $question The original user question 2200337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 2210337f47fSAndreas Gohr * @return string The rephrased question 2220337f47fSAndreas Gohr * @throws Exception 2230337f47fSAndreas Gohr */ 2240337f47fSAndreas Gohr public function rephraseChatQuestion($question, $history) 2250337f47fSAndreas Gohr { 2260337f47fSAndreas Gohr // go back in history as far as possible without hitting the token limit 2270337f47fSAndreas Gohr $chatHistory = ''; 2280337f47fSAndreas Gohr $history = array_reverse($history); 2290337f47fSAndreas Gohr foreach ($history as $row) { 230f6ef2e50SAndreas Gohr if ( 2319f6b34c4SAndreas Gohr count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) > 2329f6b34c4SAndreas Gohr $this->getModel()->getMaxRephrasingTokenLength() 233f6ef2e50SAndreas Gohr ) { 2340337f47fSAndreas Gohr break; 2350337f47fSAndreas Gohr } 2360337f47fSAndreas Gohr 2370337f47fSAndreas Gohr $chatHistory = 2380337f47fSAndreas Gohr "Human: " . $row[0] . "\n" . 2390337f47fSAndreas Gohr "Assistant: " . $row[1] . "\n" . 2400337f47fSAndreas Gohr $chatHistory; 2410337f47fSAndreas Gohr } 2420337f47fSAndreas Gohr 2430337f47fSAndreas Gohr // ask openAI to rephrase the question 2440337f47fSAndreas Gohr $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]); 2450337f47fSAndreas Gohr $messages = [['role' => 'user', 'content' => $prompt]]; 2469f6b34c4SAndreas Gohr return $this->getModel()->getRephrasedQuestion($messages); 2470337f47fSAndreas Gohr } 2480337f47fSAndreas Gohr 2490337f47fSAndreas Gohr /** 2500337f47fSAndreas Gohr * Load the given prompt template and fill in the variables 2510337f47fSAndreas Gohr * 2520337f47fSAndreas Gohr * @param string $type 2530337f47fSAndreas Gohr * @param string[] $vars 2540337f47fSAndreas Gohr * @return string 2550337f47fSAndreas Gohr */ 2560337f47fSAndreas Gohr protected function getPrompt($type, $vars = []) 2570337f47fSAndreas Gohr { 2580337f47fSAndreas Gohr $template = file_get_contents($this->localFN('prompt_' . $type)); 2590337f47fSAndreas Gohr 2607ebc7895Ssplitbrain $replace = []; 2610337f47fSAndreas Gohr foreach ($vars as $key => $val) { 2620337f47fSAndreas Gohr $replace['{{' . strtoupper($key) . '}}'] = $val; 2630337f47fSAndreas Gohr } 2640337f47fSAndreas Gohr 2650337f47fSAndreas Gohr return strtr($template, $replace); 2660337f47fSAndreas Gohr } 267219268b1SAndreas Gohr 268219268b1SAndreas Gohr /** 269219268b1SAndreas Gohr * Construct the prompt to define the answer language 270219268b1SAndreas Gohr * 271219268b1SAndreas Gohr * @return string 272219268b1SAndreas Gohr */ 273219268b1SAndreas Gohr protected function getLanguagePrompt() 274219268b1SAndreas Gohr { 275219268b1SAndreas Gohr global $conf; 276219268b1SAndreas Gohr 277e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 278219268b1SAndreas Gohr $isoLangnames = include(__DIR__ . '/lang/languages.php'); 279219268b1SAndreas Gohr if (isset($isoLangnames[$conf['lang']])) { 280219268b1SAndreas Gohr $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 281219268b1SAndreas Gohr return $languagePrompt; 282219268b1SAndreas Gohr } 283219268b1SAndreas Gohr } 284219268b1SAndreas Gohr 285219268b1SAndreas Gohr $languagePrompt = 'Always answer in the user\'s language.'; 286219268b1SAndreas Gohr return $languagePrompt; 287219268b1SAndreas Gohr } 288e33a1d7aSAndreas Gohr 289e33a1d7aSAndreas Gohr /** 290e33a1d7aSAndreas Gohr * Should sources be limited to current language? 291e33a1d7aSAndreas Gohr * 292e33a1d7aSAndreas Gohr * @return string The current language code or empty string 293e33a1d7aSAndreas Gohr */ 294e33a1d7aSAndreas Gohr public function getLanguageLimit() 295e33a1d7aSAndreas Gohr { 296e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 297e33a1d7aSAndreas Gohr global $conf; 298e33a1d7aSAndreas Gohr return $conf['lang']; 299e33a1d7aSAndreas Gohr } else { 300e33a1d7aSAndreas Gohr return ''; 301e33a1d7aSAndreas Gohr } 302e33a1d7aSAndreas Gohr } 303*e75dc39fSAndreas Gohr 304*e75dc39fSAndreas Gohr /** 305*e75dc39fSAndreas Gohr * Store info about the last run 306*e75dc39fSAndreas Gohr * 307*e75dc39fSAndreas Gohr * @param array $data 308*e75dc39fSAndreas Gohr * @return void 309*e75dc39fSAndreas Gohr */ 310*e75dc39fSAndreas Gohr public function setRunData(array $data) 311*e75dc39fSAndreas Gohr { 312*e75dc39fSAndreas Gohr file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 313*e75dc39fSAndreas Gohr } 314*e75dc39fSAndreas Gohr 315*e75dc39fSAndreas Gohr /** 316*e75dc39fSAndreas Gohr * Get info about the last run 317*e75dc39fSAndreas Gohr * 318*e75dc39fSAndreas Gohr * @return array 319*e75dc39fSAndreas Gohr */ 320*e75dc39fSAndreas Gohr public function getRunData() 321*e75dc39fSAndreas Gohr { 322*e75dc39fSAndreas Gohr if (!file_exists($this->runDataFile)) { 323*e75dc39fSAndreas Gohr return []; 324*e75dc39fSAndreas Gohr } 325*e75dc39fSAndreas Gohr return json_decode(file_get_contents($this->runDataFile), true); 326*e75dc39fSAndreas Gohr } 3270337f47fSAndreas Gohr} 328