1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use dokuwiki\Extension\Plugin; 5use dokuwiki\plugin\aichat\AIChat; 6use dokuwiki\plugin\aichat\Chunk; 7use dokuwiki\plugin\aichat\Embeddings; 8use dokuwiki\plugin\aichat\Model\ChatInterface; 9use dokuwiki\plugin\aichat\Model\EmbeddingInterface; 10use dokuwiki\plugin\aichat\ModelFactory; 11use dokuwiki\plugin\aichat\Storage\AbstractStorage; 12 13/** 14 * DokuWiki Plugin aichat (Helper Component) 15 * 16 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 17 * @author Andreas Gohr <gohr@cosmocode.de> 18 */ 19class helper_plugin_aichat extends Plugin 20{ 21 /** @var ModelFactory */ 22 public $factory; 23 24 /** @var CLIPlugin $logger */ 25 protected $logger; 26 27 /** @var Embeddings */ 28 protected $embeddings; 29 /** @var AbstractStorage */ 30 protected $storage; 31 32 /** @var array where to store meta data on the last run */ 33 protected $runDataFile; 34 35 36 /** 37 * Constructor. Initializes vendor autoloader 38 */ 39 public function __construct() 40 { 41 require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 42 global $conf; 43 $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 44 $this->loadConfig(); 45 $this->factory = new ModelFactory($this->conf); 46 } 47 48 /** 49 * Use the given CLI plugin for logging 50 * 51 * @param CLIPlugin $logger 52 * @return void 53 */ 54 public function setLogger($logger) 55 { 56 $this->logger = $logger; 57 } 58 59 /** 60 * Update the configuration 61 * 62 * @param array $config 63 * @return void 64 */ 65 public function updateConfig(array $config) 66 { 67 $this->conf = array_merge($this->conf, $config); 68 $this->factory->updateConfig($config); 69 } 70 71 /** 72 * Check if the current user is allowed to use the plugin (if it has been restricted) 73 * 74 * @return bool 75 */ 76 public function userMayAccess() 77 { 78 global $auth; 79 global $USERINFO; 80 global $INPUT; 81 82 if (!$auth) return true; 83 if (!$this->getConf('restrict')) return true; 84 if (!isset($USERINFO)) return false; 85 86 return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 87 } 88 89 /** 90 * Access the Chat Model 91 * 92 * @return ChatInterface 93 */ 94 public function getChatModel() 95 { 96 return $this->factory->getChatModel(); 97 } 98 99 /** 100 * @return ChatInterface 101 */ 102 public function getRephraseModel() 103 { 104 return $this->factory->getRephraseModel(); 105 } 106 107 /** 108 * Access the Embedding Model 109 * 110 * @return EmbeddingInterface 111 */ 112 public function getEmbeddingModel() 113 { 114 return $this->factory->getEmbeddingModel(); 115 } 116 117 /** 118 * Access the Embeddings interface 119 * 120 * @return Embeddings 121 */ 122 public function getEmbeddings() 123 { 124 if ($this->embeddings instanceof Embeddings) { 125 return $this->embeddings; 126 } 127 128 $this->embeddings = new Embeddings( 129 $this->getChatModel(), 130 $this->getEmbeddingModel(), 131 $this->getStorage(), 132 $this->conf 133 ); 134 if ($this->logger) { 135 $this->embeddings->setLogger($this->logger); 136 } 137 138 return $this->embeddings; 139 } 140 141 /** 142 * Access the Storage interface 143 * 144 * @return AbstractStorage 145 */ 146 public function getStorage() 147 { 148 if ($this->storage instanceof AbstractStorage) { 149 return $this->storage; 150 } 151 152 $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage'; 153 $this->storage = new $class($this->conf); 154 155 if ($this->logger) { 156 $this->storage->setLogger($this->logger); 157 } 158 159 return $this->storage; 160 } 161 162 /** 163 * Ask a question with a chat history 164 * 165 * @param string $question 166 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 167 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 168 * @throws Exception 169 */ 170 public function askChatQuestion($question, $history = [], $sourcePage = '') 171 { 172 if ($history && $this->getConf('rephraseHistory') > 0) { 173 $contextQuestion = $this->rephraseChatQuestion($question, $history); 174 175 // Only use the rephrased question if it has more history than the chat history provides 176 if ($this->getConf('rephraseHistory') > $this->getConf('chatHistory')) { 177 $question = $contextQuestion; 178 } 179 } else { 180 $contextQuestion = $question; 181 } 182 return $this->askQuestion($question, $history, $contextQuestion, $sourcePage); 183 } 184 185 /** 186 * Ask a single standalone question 187 * 188 * @param string $question The question to ask 189 * @param array $history [user, ai] of the previous question 190 * @param string $contextQuestion The question to use for context search 191 * @param string $sourcePage The page the question was asked on 192 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 193 * @throws Exception 194 */ 195 public function askQuestion($question, $history = [], $contextQuestion = '', $sourcePage = '') 196 { 197 if ($sourcePage) { 198 // only the current page is context 199 $similar = $this->getEmbeddings()->getPageChunks($sourcePage); 200 } else { 201 if ($this->getConf('fullpagecontext')) { 202 // match chunks but use full pages as context 203 $similar = $this->getEmbeddings()->getSimilarPages( 204 $contextQuestion ?: $question, 205 $this->getLanguageLimit() 206 ); 207 } else { 208 // use the chunks as context 209 $similar = $this->getEmbeddings()->getSimilarChunks( 210 $contextQuestion ?: $question, $this->getLanguageLimit() 211 ); 212 } 213 } 214 215 if ($similar) { 216 $context = implode( 217 "\n", 218 array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 219 ); 220 $prompt = $this->getPrompt('question', [ 221 'context' => $context, 222 'question' => $question, 223 'customprompt' => $this->getConf('customprompt'), 224 ]); 225 } else { 226 $prompt = $this->getPrompt('noanswer', [ 227 'question' => $question, 228 ]); 229 $history = []; 230 } 231 232 $messages = $this->prepareMessages( 233 $this->getChatModel(), 234 $prompt, 235 $history, 236 $this->getConf('chatHistory') 237 ); 238 $answer = $this->getChatModel()->getAnswer($messages); 239 240 return [ 241 'question' => $question, 242 'contextQuestion' => $contextQuestion, 243 'answer' => $answer, 244 'sources' => $similar, 245 ]; 246 } 247 248 /** 249 * Rephrase a question into a standalone question based on the chat history 250 * 251 * @param string $question The original user question 252 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 253 * @return string The rephrased question 254 * @throws Exception 255 */ 256 public function rephraseChatQuestion($question, $history) 257 { 258 $prompt = $this->getPrompt('rephrase', [ 259 'question' => $question, 260 ]); 261 $messages = $this->prepareMessages( 262 $this->getRephraseModel(), 263 $prompt, 264 $history, 265 $this->getConf('rephraseHistory') 266 ); 267 return $this->getRephraseModel()->getAnswer($messages); 268 } 269 270 /** 271 * Prepare the messages for the AI 272 * 273 * @param ChatInterface $model The used model 274 * @param string $promptedQuestion The user question embedded in a prompt 275 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 276 * @param int $historySize The maximum number of messages to use from the history 277 * @return array An OpenAI compatible array of messages 278 */ 279 protected function prepareMessages( 280 ChatInterface $model, 281 string $promptedQuestion, 282 array $history, 283 int $historySize 284 ): array 285 { 286 // calculate the space for context 287 $remainingContext = $model->getMaxInputTokenLength(); // might be 0 288 $remainingContext -= $this->countTokens($promptedQuestion); 289 $safetyMargin = abs($remainingContext) * 0.05; // 5% safety margin 290 $remainingContext -= $safetyMargin; // may be negative, it will be ignored then 291 292 $messages = $this->historyMessages($history, $remainingContext, $historySize); 293 $messages[] = [ 294 'role' => 'user', 295 'content' => $promptedQuestion 296 ]; 297 return $messages; 298 } 299 300 /** 301 * Create an array of OpenAI compatible messages from the given history 302 * 303 * Only as many messages are used as fit into the token limit 304 * 305 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 306 * @param int $tokenLimit The maximum number of tokens to use, negative limit disables this check 307 * @param int $sizeLimit The maximum number of messages to use 308 * @return array 309 */ 310 protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array 311 { 312 $remainingContext = $tokenLimit; 313 314 $messages = []; 315 $history = array_reverse($history); 316 $history = array_slice($history, 0, $sizeLimit); 317 foreach ($history as $row) { 318 $length = $this->countTokens($row[0] . $row[1]); 319 320 if ($tokenLimit > 0 && $length > $remainingContext) { 321 break; 322 } 323 $remainingContext -= $length; 324 325 $messages[] = [ 326 'role' => 'assistant', 327 'content' => $row[1] 328 ]; 329 $messages[] = [ 330 'role' => 'user', 331 'content' => $row[0] 332 ]; 333 } 334 return array_reverse($messages); 335 } 336 337 /** 338 * Get an aproximation of the token count for the given text 339 * 340 * @param $text 341 * @return int 342 */ 343 protected function countTokens($text) 344 { 345 return count($this->getEmbeddings()->getTokenEncoder()->encode($text)); 346 } 347 348 /** 349 * Load the given prompt template and fill in the variables 350 * 351 * @param string $type 352 * @param string[] $vars 353 * @return string 354 */ 355 protected function getPrompt($type, $vars = []) 356 { 357 $template = file_get_contents($this->localFN($type, 'prompt')); 358 $vars['language'] = $this->getLanguagePrompt(); 359 360 $replace = []; 361 foreach ($vars as $key => $val) { 362 $replace['{{' . strtoupper($key) . '}}'] = $val; 363 } 364 365 return strtr($template, $replace); 366 } 367 368 /** 369 * Construct the prompt to define the answer language 370 * 371 * @return string 372 */ 373 protected function getLanguagePrompt() 374 { 375 global $conf; 376 $isoLangnames = include(__DIR__ . '/lang/languages.php'); 377 378 $currentLang = $isoLangnames[$conf['lang']] ?? 'English'; 379 380 if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 381 if (isset($isoLangnames[$conf['lang']])) { 382 $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 383 return $languagePrompt; 384 } 385 } 386 387 $languagePrompt = 'Always answer in the user\'s language. ' . 388 "If you are unsure about the language, speak $currentLang."; 389 return $languagePrompt; 390 } 391 392 /** 393 * Should sources be limited to current language? 394 * 395 * @return string The current language code or empty string 396 */ 397 public function getLanguageLimit() 398 { 399 if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 400 global $conf; 401 return $conf['lang']; 402 } else { 403 return ''; 404 } 405 } 406 407 /** 408 * Store info about the last run 409 * 410 * @param array $data 411 * @return void 412 */ 413 public function setRunData(array $data) 414 { 415 file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 416 } 417 418 /** 419 * Get info about the last run 420 * 421 * @return array 422 */ 423 public function getRunData() 424 { 425 if (!file_exists($this->runDataFile)) { 426 return []; 427 } 428 return json_decode(file_get_contents($this->runDataFile), true); 429 } 430} 431