1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use dokuwiki\Extension\Plugin; 5use dokuwiki\plugin\aichat\AIChat; 6use dokuwiki\plugin\aichat\Chunk; 7use dokuwiki\plugin\aichat\Embeddings; 8use dokuwiki\plugin\aichat\Model\ChatInterface; 9use dokuwiki\plugin\aichat\Model\EmbeddingInterface; 10use dokuwiki\plugin\aichat\Storage\AbstractStorage; 11 12/** 13 * DokuWiki Plugin aichat (Helper Component) 14 * 15 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 16 * @author Andreas Gohr <gohr@cosmocode.de> 17 */ 18class helper_plugin_aichat extends Plugin 19{ 20 /** @var CLIPlugin $logger */ 21 protected $logger; 22 /** @var ChatInterface */ 23 protected $chatModel; 24 /** @var ChatInterface */ 25 protected $rephraseModel; 26 /** @var EmbeddingInterface */ 27 protected $embedModel; 28 /** @var Embeddings */ 29 protected $embeddings; 30 /** @var AbstractStorage */ 31 protected $storage; 32 33 /** @var array where to store meta data on the last run */ 34 protected $runDataFile; 35 36 37 /** 38 * Constructor. Initializes vendor autoloader 39 */ 40 public function __construct() 41 { 42 require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 43 global $conf; 44 $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 45 $this->loadConfig(); 46 } 47 48 /** 49 * Use the given CLI plugin for logging 50 * 51 * @param CLIPlugin $logger 52 * @return void 53 */ 54 public function setLogger($logger) 55 { 56 $this->logger = $logger; 57 } 58 59 /** 60 * Check if the current user is allowed to use the plugin (if it has been restricted) 61 * 62 * @return bool 63 */ 64 public function userMayAccess() 65 { 66 global $auth; 67 global $USERINFO; 68 global $INPUT; 69 70 if (!$auth) return true; 71 if (!$this->getConf('restrict')) return true; 72 if (!isset($USERINFO)) return false; 73 74 return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 75 } 76 77 /** 78 * Access the Chat Model 79 * 80 * @return ChatInterface 81 */ 82 public function getChatModel() 83 { 84 if ($this->chatModel instanceof ChatInterface) { 85 return $this->chatModel; 86 } 87 88 [$namespace, $name] = sexplode(' ', $this->getConf('chatmodel'), 2); 89 $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\ChatModel'; 90 91 if (!class_exists($class)) { 92 throw new \RuntimeException('No ChatModel found for ' . $namespace); 93 } 94 95 $this->chatModel = new $class($name, $this->conf); 96 return $this->chatModel; 97 } 98 99 /** 100 * @return ChatInterface 101 */ 102 public function getRephraseModel() 103 { 104 if ($this->rephraseModel instanceof ChatInterface) { 105 return $this->rephraseModel; 106 } 107 108 [$namespace, $name] = sexplode(' ', $this->getConf('rephrasemodel'), 2); 109 $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\ChatModel'; 110 111 if (!class_exists($class)) { 112 throw new \RuntimeException('No ChatModel found for ' . $namespace); 113 } 114 115 $this->rephraseModel = new $class($name, $this->conf); 116 return $this->rephraseModel; 117 } 118 119 /** 120 * Access the Embedding Model 121 * 122 * @return EmbeddingInterface 123 */ 124 public function getEmbedModel() 125 { 126 if ($this->embedModel instanceof EmbeddingInterface) { 127 return $this->embedModel; 128 } 129 130 [$namespace, $name] = sexplode(' ', $this->getConf('embedmodel'), 2); 131 $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\EmbeddingModel'; 132 133 if (!class_exists($class)) { 134 throw new \RuntimeException('No EmbeddingModel found for ' . $namespace); 135 } 136 137 $this->embedModel = new $class($name, $this->conf); 138 return $this->embedModel; 139 } 140 141 142 /** 143 * Access the Embeddings interface 144 * 145 * @return Embeddings 146 */ 147 public function getEmbeddings() 148 { 149 if ($this->embeddings instanceof Embeddings) { 150 return $this->embeddings; 151 } 152 153 $this->embeddings = new Embeddings( 154 $this->getChatModel(), 155 $this->getEmbedModel(), 156 $this->getStorage(), 157 $this->conf 158 ); 159 if ($this->logger) { 160 $this->embeddings->setLogger($this->logger); 161 } 162 163 return $this->embeddings; 164 } 165 166 /** 167 * Access the Storage interface 168 * 169 * @return AbstractStorage 170 */ 171 public function getStorage() 172 { 173 if ($this->storage instanceof AbstractStorage) { 174 return $this->storage; 175 } 176 177 $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage'; 178 $this->storage = new $class($this->conf); 179 180 if ($this->logger) { 181 $this->storage->setLogger($this->logger); 182 } 183 184 return $this->storage; 185 } 186 187 /** 188 * Ask a question with a chat history 189 * 190 * @param string $question 191 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 192 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 193 * @throws Exception 194 */ 195 public function askChatQuestion($question, $history = []) 196 { 197 if ($history && $this->getConf('rephraseHistory') > 0) { 198 $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 199 } else { 200 $standaloneQuestion = $question; 201 } 202 return $this->askQuestion($standaloneQuestion, $history); 203 } 204 205 /** 206 * Ask a single standalone question 207 * 208 * @param string $question 209 * @param array $history [user, ai] of the previous question 210 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 211 * @throws Exception 212 */ 213 public function askQuestion($question, $history = []) 214 { 215 $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 216 if ($similar) { 217 $context = implode( 218 "\n", 219 array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 220 ); 221 $prompt = $this->getPrompt('question', [ 222 'context' => $context, 223 ]); 224 } else { 225 $prompt = $this->getPrompt('noanswer'); 226 $history = []; 227 } 228 229 $messages = $this->prepareMessages( 230 $this->getChatModel(), $prompt, $question, $history, $this->getConf('chatHistory') 231 ); 232 $answer = $this->getChatModel()->getAnswer($messages); 233 234 return [ 235 'question' => $question, 236 'answer' => $answer, 237 'sources' => $similar, 238 ]; 239 } 240 241 /** 242 * Rephrase a question into a standalone question based on the chat history 243 * 244 * @param string $question The original user question 245 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 246 * @return string The rephrased question 247 * @throws Exception 248 */ 249 public function rephraseChatQuestion($question, $history) 250 { 251 $prompt = $this->getPrompt('rephrase'); 252 $messages = $this->prepareMessages( 253 $this->getRephraseModel(), $prompt, $question, $history, $this->getConf('rephraseHistory') 254 ); 255 return $this->getRephraseModel()->getAnswer($messages); 256 } 257 258 /** 259 * Prepare the messages for the AI 260 * 261 * @param ChatInterface $model The used model 262 * @param string $prompt The fully prepared system prompt 263 * @param string $question The user question 264 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 265 * @param int $historySize The maximum number of messages to use from the history 266 * @return array An OpenAI compatible array of messages 267 */ 268 protected function prepareMessages( 269 ChatInterface $model, string $prompt, string $question, array $history, int $historySize 270 ): array 271 { 272 // calculate the space for context 273 $remainingContext = $model->getMaxInputTokenLength(); 274 $remainingContext -= $this->countTokens($prompt); 275 $remainingContext -= $this->countTokens($question); 276 $safetyMargin = $remainingContext * 0.05; // 5% safety margin 277 $remainingContext -= $safetyMargin; 278 // FIXME we may want to also have an upper limit for the history and not always use the full context 279 280 $messages = $this->historyMessages($history, $remainingContext, $historySize); 281 $messages[] = [ 282 'role' => 'system', 283 'content' => $prompt 284 ]; 285 $messages[] = [ 286 'role' => 'user', 287 'content' => $question 288 ]; 289 return $messages; 290 } 291 292 /** 293 * Create an array of OpenAI compatible messages from the given history 294 * 295 * Only as many messages are used as fit into the token limit 296 * 297 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 298 * @param int $tokenLimit The maximum number of tokens to use 299 * @param int $sizeLimit The maximum number of messages to use 300 * @return array 301 */ 302 protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array 303 { 304 $remainingContext = $tokenLimit; 305 306 $messages = []; 307 $history = array_reverse($history); 308 $history = array_slice($history, 0, $sizeLimit); 309 foreach ($history as $row) { 310 $length = $this->countTokens($row[0] . $row[1]); 311 if ($length > $remainingContext) { 312 break; 313 } 314 $remainingContext -= $length; 315 316 $messages[] = [ 317 'role' => 'assistant', 318 'content' => $row[1] 319 ]; 320 $messages[] = [ 321 'role' => 'user', 322 'content' => $row[0] 323 ]; 324 } 325 return array_reverse($messages); 326 } 327 328 /** 329 * Get an aproximation of the token count for the given text 330 * 331 * @param $text 332 * @return int 333 */ 334 protected function countTokens($text) 335 { 336 return count($this->getEmbeddings()->getTokenEncoder()->encode($text)); 337 } 338 339 /** 340 * Load the given prompt template and fill in the variables 341 * 342 * @param string $type 343 * @param string[] $vars 344 * @return string 345 */ 346 protected function getPrompt($type, $vars = []) 347 { 348 $template = file_get_contents($this->localFN('prompt_' . $type)); 349 $vars['language'] = $this->getLanguagePrompt(); 350 351 $replace = []; 352 foreach ($vars as $key => $val) { 353 $replace['{{' . strtoupper($key) . '}}'] = $val; 354 } 355 356 return strtr($template, $replace); 357 } 358 359 /** 360 * Construct the prompt to define the answer language 361 * 362 * @return string 363 */ 364 protected function getLanguagePrompt() 365 { 366 global $conf; 367 $isoLangnames = include(__DIR__ . '/lang/languages.php'); 368 369 $currentLang = $isoLangnames[$conf['lang']] ?? 'English'; 370 371 if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 372 if (isset($isoLangnames[$conf['lang']])) { 373 $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 374 return $languagePrompt; 375 } 376 } 377 378 $languagePrompt = 'Always answer in the user\'s language. ' . 379 "If you are unsure about the language, speak $currentLang."; 380 return $languagePrompt; 381 } 382 383 /** 384 * Should sources be limited to current language? 385 * 386 * @return string The current language code or empty string 387 */ 388 public function getLanguageLimit() 389 { 390 if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 391 global $conf; 392 return $conf['lang']; 393 } else { 394 return ''; 395 } 396 } 397 398 /** 399 * Store info about the last run 400 * 401 * @param array $data 402 * @return void 403 */ 404 public function setRunData(array $data) 405 { 406 file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 407 } 408 409 /** 410 * Get info about the last run 411 * 412 * @return array 413 */ 414 public function getRunData() 415 { 416 if (!file_exists($this->runDataFile)) { 417 return []; 418 } 419 return json_decode(file_get_contents($this->runDataFile), true); 420 } 421} 422