10337f47fSAndreas Gohr<?php 20337f47fSAndreas Gohr 37ebc7895Ssplitbrainuse dokuwiki\Extension\Plugin; 43379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 5e33a1d7aSAndreas Gohruse dokuwiki\plugin\aichat\AIChat; 6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 70337f47fSAndreas Gohruse dokuwiki\plugin\aichat\Embeddings; 8754b8394SAndreas Gohruse dokuwiki\plugin\aichat\Model\AbstractModel; 9f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Model\OpenAI\GPT35Turbo; 1001f06932SAndreas Gohruse dokuwiki\plugin\aichat\Storage\AbstractStorage; 1113dbfc23SAndreas Gohruse dokuwiki\plugin\aichat\Storage\PineconeStorage; 12f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Storage\SQLiteStorage; 130337f47fSAndreas Gohr 140337f47fSAndreas Gohr/** 150337f47fSAndreas Gohr * DokuWiki Plugin aichat (Helper Component) 160337f47fSAndreas Gohr * 170337f47fSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 180337f47fSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 190337f47fSAndreas Gohr */ 207ebc7895Ssplitbrainclass helper_plugin_aichat extends Plugin 210337f47fSAndreas Gohr{ 223379af09SAndreas Gohr /** @var CLIPlugin $logger */ 233379af09SAndreas Gohr protected $logger; 24f6ef2e50SAndreas Gohr /** @var AbstractModel */ 25f6ef2e50SAndreas Gohr protected $model; 260337f47fSAndreas Gohr /** @var Embeddings */ 270337f47fSAndreas Gohr protected $embeddings; 2801f06932SAndreas Gohr /** @var AbstractStorage */ 2901f06932SAndreas Gohr protected $storage; 300337f47fSAndreas Gohr 310337f47fSAndreas Gohr /** 32*f8d5ae01SAndreas Gohr * Constructor. Initializes vendor autoloader 33*f8d5ae01SAndreas Gohr */ 34*f8d5ae01SAndreas Gohr public function __construct() 35*f8d5ae01SAndreas Gohr { 36*f8d5ae01SAndreas Gohr require_once __DIR__ . '/vendor/autoload.php'; 37*f8d5ae01SAndreas Gohr } 38*f8d5ae01SAndreas Gohr 39*f8d5ae01SAndreas Gohr /** 403379af09SAndreas Gohr * Use the given CLI plugin for logging 413379af09SAndreas Gohr * 423379af09SAndreas Gohr * @param CLIPlugin $logger 433379af09SAndreas Gohr * @return void 443379af09SAndreas Gohr */ 458285fff9SAndreas Gohr public function setLogger($logger) 468285fff9SAndreas Gohr { 473379af09SAndreas Gohr $this->logger = $logger; 483379af09SAndreas Gohr } 493379af09SAndreas Gohr 503379af09SAndreas Gohr /** 51c4127b8eSAndreas Gohr * Check if the current user is allowed to use the plugin (if it has been restricted) 52c4127b8eSAndreas Gohr * 53c4127b8eSAndreas Gohr * @return bool 54c4127b8eSAndreas Gohr */ 55c4127b8eSAndreas Gohr public function userMayAccess() 56c4127b8eSAndreas Gohr { 57c4127b8eSAndreas Gohr global $auth; 58c4127b8eSAndreas Gohr global $USERINFO; 59c4127b8eSAndreas Gohr global $INPUT; 60c4127b8eSAndreas Gohr 61c4127b8eSAndreas Gohr if (!$auth) return true; 62c4127b8eSAndreas Gohr if (!$this->getConf('restrict')) return true; 63c4127b8eSAndreas Gohr if (!isset($USERINFO)) return false; 64c4127b8eSAndreas Gohr 65c4127b8eSAndreas Gohr return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 66c4127b8eSAndreas Gohr } 67c4127b8eSAndreas Gohr 68c4127b8eSAndreas Gohr /** 690337f47fSAndreas Gohr * Access the OpenAI client 700337f47fSAndreas Gohr * 71f6ef2e50SAndreas Gohr * @return GPT35Turbo 720337f47fSAndreas Gohr */ 73f6ef2e50SAndreas Gohr public function getModel() 740337f47fSAndreas Gohr { 757ebc7895Ssplitbrain if (!$this->model instanceof AbstractModel) { 769f6b34c4SAndreas Gohr $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model'); 779f6b34c4SAndreas Gohr 789f6b34c4SAndreas Gohr if (!class_exists($class)) { 799f6b34c4SAndreas Gohr throw new \RuntimeException('Configured model not found: ' . $class); 809f6b34c4SAndreas Gohr } 819f6b34c4SAndreas Gohr // FIXME for now we only have OpenAI models, so we can hardcode the auth setup 829f6b34c4SAndreas Gohr $this->model = new $class([ 839f6b34c4SAndreas Gohr 'key' => $this->getConf('openaikey'), 849f6b34c4SAndreas Gohr 'org' => $this->getConf('openaiorg') 859f6b34c4SAndreas Gohr ]); 869f6b34c4SAndreas Gohr } 879f6b34c4SAndreas Gohr 88f6ef2e50SAndreas Gohr return $this->model; 890337f47fSAndreas Gohr } 900337f47fSAndreas Gohr 910337f47fSAndreas Gohr /** 920337f47fSAndreas Gohr * Access the Embeddings interface 930337f47fSAndreas Gohr * 940337f47fSAndreas Gohr * @return Embeddings 950337f47fSAndreas Gohr */ 960337f47fSAndreas Gohr public function getEmbeddings() 970337f47fSAndreas Gohr { 987ebc7895Ssplitbrain if (!$this->embeddings instanceof Embeddings) { 9901f06932SAndreas Gohr $this->embeddings = new Embeddings($this->getModel(), $this->getStorage()); 1003379af09SAndreas Gohr if ($this->logger) { 1013379af09SAndreas Gohr $this->embeddings->setLogger($this->logger); 1023379af09SAndreas Gohr } 1039f6b34c4SAndreas Gohr } 1049f6b34c4SAndreas Gohr 1050337f47fSAndreas Gohr return $this->embeddings; 1060337f47fSAndreas Gohr } 1070337f47fSAndreas Gohr 1080337f47fSAndreas Gohr /** 10901f06932SAndreas Gohr * Access the Storage interface 11001f06932SAndreas Gohr * 11101f06932SAndreas Gohr * @return AbstractStorage 11201f06932SAndreas Gohr */ 11301f06932SAndreas Gohr public function getStorage() 11401f06932SAndreas Gohr { 1157ebc7895Ssplitbrain if (!$this->storage instanceof AbstractStorage) { 11613dbfc23SAndreas Gohr if ($this->getConf('pinecone_apikey')) { 11713dbfc23SAndreas Gohr $this->storage = new PineconeStorage(); 11813dbfc23SAndreas Gohr } else { 11901f06932SAndreas Gohr $this->storage = new SQLiteStorage(); 12068b6fa79SAndreas Gohr } 1218285fff9SAndreas Gohr 1223379af09SAndreas Gohr if ($this->logger) { 1233379af09SAndreas Gohr $this->storage->setLogger($this->logger); 1243379af09SAndreas Gohr } 12501f06932SAndreas Gohr } 12601f06932SAndreas Gohr 12701f06932SAndreas Gohr return $this->storage; 12801f06932SAndreas Gohr } 12901f06932SAndreas Gohr 13001f06932SAndreas Gohr /** 1310337f47fSAndreas Gohr * Ask a question with a chat history 1320337f47fSAndreas Gohr * 1330337f47fSAndreas Gohr * @param string $question 1340337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 1350337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1360337f47fSAndreas Gohr * @throws Exception 1370337f47fSAndreas Gohr */ 1380337f47fSAndreas Gohr public function askChatQuestion($question, $history = []) 1390337f47fSAndreas Gohr { 1400337f47fSAndreas Gohr if ($history) { 1410337f47fSAndreas Gohr $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 142754b8394SAndreas Gohr $prev = end($history); 1430337f47fSAndreas Gohr } else { 1440337f47fSAndreas Gohr $standaloneQuestion = $question; 145754b8394SAndreas Gohr $prev = []; 1460337f47fSAndreas Gohr } 147754b8394SAndreas Gohr return $this->askQuestion($standaloneQuestion, $prev); 1480337f47fSAndreas Gohr } 1490337f47fSAndreas Gohr 1500337f47fSAndreas Gohr /** 1510337f47fSAndreas Gohr * Ask a single standalone question 1520337f47fSAndreas Gohr * 1530337f47fSAndreas Gohr * @param string $question 154754b8394SAndreas Gohr * @param array $previous [user, ai] of the previous question 1550337f47fSAndreas Gohr * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 1560337f47fSAndreas Gohr * @throws Exception 1570337f47fSAndreas Gohr */ 158754b8394SAndreas Gohr public function askQuestion($question, $previous = []) 1590337f47fSAndreas Gohr { 160e33a1d7aSAndreas Gohr $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 1619e81bea7SAndreas Gohr if ($similar) { 16255392016SAndreas Gohr $context = implode("\n", array_map(function (Chunk $chunk) { 16368908844SAndreas Gohr return "\n```\n" . $chunk->getText() . "\n```\n"; 16455392016SAndreas Gohr }, $similar)); 165219268b1SAndreas Gohr $prompt = $this->getPrompt('question', [ 166219268b1SAndreas Gohr 'context' => $context, 167219268b1SAndreas Gohr 'language' => $this->getLanguagePrompt() 168219268b1SAndreas Gohr ]); 1699e81bea7SAndreas Gohr } else { 1709e81bea7SAndreas Gohr $prompt = $this->getPrompt('noanswer'); 1719e81bea7SAndreas Gohr } 17268908844SAndreas Gohr 1730337f47fSAndreas Gohr $messages = [ 1740337f47fSAndreas Gohr [ 1750337f47fSAndreas Gohr 'role' => 'system', 1760337f47fSAndreas Gohr 'content' => $prompt 1770337f47fSAndreas Gohr ], 1780337f47fSAndreas Gohr [ 1790337f47fSAndreas Gohr 'role' => 'user', 1800337f47fSAndreas Gohr 'content' => $question 1810337f47fSAndreas Gohr ] 1820337f47fSAndreas Gohr ]; 1830337f47fSAndreas Gohr 184754b8394SAndreas Gohr if ($previous) { 185754b8394SAndreas Gohr array_unshift($messages, [ 186754b8394SAndreas Gohr 'role' => 'assistant', 187754b8394SAndreas Gohr 'content' => $previous[1] 188754b8394SAndreas Gohr ]); 189754b8394SAndreas Gohr array_unshift($messages, [ 190754b8394SAndreas Gohr 'role' => 'user', 191754b8394SAndreas Gohr 'content' => $previous[0] 192754b8394SAndreas Gohr ]); 193754b8394SAndreas Gohr } 194754b8394SAndreas Gohr 1959f6b34c4SAndreas Gohr $answer = $this->getModel()->getAnswer($messages); 1960337f47fSAndreas Gohr 1970337f47fSAndreas Gohr return [ 1980337f47fSAndreas Gohr 'question' => $question, 1990337f47fSAndreas Gohr 'answer' => $answer, 2000337f47fSAndreas Gohr 'sources' => $similar, 2010337f47fSAndreas Gohr ]; 2020337f47fSAndreas Gohr } 2030337f47fSAndreas Gohr 2040337f47fSAndreas Gohr /** 2050337f47fSAndreas Gohr * Rephrase a question into a standalone question based on the chat history 2060337f47fSAndreas Gohr * 2070337f47fSAndreas Gohr * @param string $question The original user question 2080337f47fSAndreas Gohr * @param array[] $history The chat history [[user, ai], [user, ai], ...] 2090337f47fSAndreas Gohr * @return string The rephrased question 2100337f47fSAndreas Gohr * @throws Exception 2110337f47fSAndreas Gohr */ 2120337f47fSAndreas Gohr public function rephraseChatQuestion($question, $history) 2130337f47fSAndreas Gohr { 2140337f47fSAndreas Gohr // go back in history as far as possible without hitting the token limit 2150337f47fSAndreas Gohr $chatHistory = ''; 2160337f47fSAndreas Gohr $history = array_reverse($history); 2170337f47fSAndreas Gohr foreach ($history as $row) { 218f6ef2e50SAndreas Gohr if ( 2199f6b34c4SAndreas Gohr count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) > 2209f6b34c4SAndreas Gohr $this->getModel()->getMaxRephrasingTokenLength() 221f6ef2e50SAndreas Gohr ) { 2220337f47fSAndreas Gohr break; 2230337f47fSAndreas Gohr } 2240337f47fSAndreas Gohr 2250337f47fSAndreas Gohr $chatHistory = 2260337f47fSAndreas Gohr "Human: " . $row[0] . "\n" . 2270337f47fSAndreas Gohr "Assistant: " . $row[1] . "\n" . 2280337f47fSAndreas Gohr $chatHistory; 2290337f47fSAndreas Gohr } 2300337f47fSAndreas Gohr 2310337f47fSAndreas Gohr // ask openAI to rephrase the question 2320337f47fSAndreas Gohr $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]); 2330337f47fSAndreas Gohr $messages = [['role' => 'user', 'content' => $prompt]]; 2349f6b34c4SAndreas Gohr return $this->getModel()->getRephrasedQuestion($messages); 2350337f47fSAndreas Gohr } 2360337f47fSAndreas Gohr 2370337f47fSAndreas Gohr /** 2380337f47fSAndreas Gohr * Load the given prompt template and fill in the variables 2390337f47fSAndreas Gohr * 2400337f47fSAndreas Gohr * @param string $type 2410337f47fSAndreas Gohr * @param string[] $vars 2420337f47fSAndreas Gohr * @return string 2430337f47fSAndreas Gohr */ 2440337f47fSAndreas Gohr protected function getPrompt($type, $vars = []) 2450337f47fSAndreas Gohr { 2460337f47fSAndreas Gohr $template = file_get_contents($this->localFN('prompt_' . $type)); 2470337f47fSAndreas Gohr 2487ebc7895Ssplitbrain $replace = []; 2490337f47fSAndreas Gohr foreach ($vars as $key => $val) { 2500337f47fSAndreas Gohr $replace['{{' . strtoupper($key) . '}}'] = $val; 2510337f47fSAndreas Gohr } 2520337f47fSAndreas Gohr 2530337f47fSAndreas Gohr return strtr($template, $replace); 2540337f47fSAndreas Gohr } 255219268b1SAndreas Gohr 256219268b1SAndreas Gohr /** 257219268b1SAndreas Gohr * Construct the prompt to define the answer language 258219268b1SAndreas Gohr * 259219268b1SAndreas Gohr * @return string 260219268b1SAndreas Gohr */ 261219268b1SAndreas Gohr protected function getLanguagePrompt() 262219268b1SAndreas Gohr { 263219268b1SAndreas Gohr global $conf; 264219268b1SAndreas Gohr 265e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 266219268b1SAndreas Gohr $isoLangnames = include(__DIR__ . '/lang/languages.php'); 267219268b1SAndreas Gohr if (isset($isoLangnames[$conf['lang']])) { 268219268b1SAndreas Gohr $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 269219268b1SAndreas Gohr return $languagePrompt; 270219268b1SAndreas Gohr } 271219268b1SAndreas Gohr } 272219268b1SAndreas Gohr 273219268b1SAndreas Gohr $languagePrompt = 'Always answer in the user\'s language.'; 274219268b1SAndreas Gohr return $languagePrompt; 275219268b1SAndreas Gohr } 276e33a1d7aSAndreas Gohr 277e33a1d7aSAndreas Gohr /** 278e33a1d7aSAndreas Gohr * Should sources be limited to current language? 279e33a1d7aSAndreas Gohr * 280e33a1d7aSAndreas Gohr * @return string The current language code or empty string 281e33a1d7aSAndreas Gohr */ 282e33a1d7aSAndreas Gohr public function getLanguageLimit() 283e33a1d7aSAndreas Gohr { 284e33a1d7aSAndreas Gohr if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 285e33a1d7aSAndreas Gohr global $conf; 286e33a1d7aSAndreas Gohr return $conf['lang']; 287e33a1d7aSAndreas Gohr } else { 288e33a1d7aSAndreas Gohr return ''; 289e33a1d7aSAndreas Gohr } 290e33a1d7aSAndreas Gohr } 2910337f47fSAndreas Gohr} 292