1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use dokuwiki\Extension\Plugin; 5use dokuwiki\plugin\aichat\AIChat; 6use dokuwiki\plugin\aichat\Chunk; 7use dokuwiki\plugin\aichat\Embeddings; 8use dokuwiki\plugin\aichat\Model\ChatInterface; 9use dokuwiki\plugin\aichat\Model\EmbeddingInterface; 10use dokuwiki\plugin\aichat\ModelFactory; 11use dokuwiki\plugin\aichat\Storage\AbstractStorage; 12 13/** 14 * DokuWiki Plugin aichat (Helper Component) 15 * 16 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 17 * @author Andreas Gohr <gohr@cosmocode.de> 18 */ 19class helper_plugin_aichat extends Plugin 20{ 21 /** @var ModelFactory */ 22 public $factory; 23 24 /** @var CLIPlugin $logger */ 25 protected $logger; 26 27 /** @var Embeddings */ 28 protected $embeddings; 29 /** @var AbstractStorage */ 30 protected $storage; 31 32 /** @var array where to store meta data on the last run */ 33 protected $runDataFile; 34 35 36 /** 37 * Constructor. Initializes vendor autoloader 38 */ 39 public function __construct() 40 { 41 require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 42 global $conf; 43 $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 44 $this->loadConfig(); 45 $this->factory = new ModelFactory($this->conf); 46 } 47 48 /** 49 * Use the given CLI plugin for logging 50 * 51 * @param CLIPlugin $logger 52 * @return void 53 */ 54 public function setLogger($logger) 55 { 56 $this->logger = $logger; 57 } 58 59 /** 60 * Check if the current user is allowed to use the plugin (if it has been restricted) 61 * 62 * @return bool 63 */ 64 public function userMayAccess() 65 { 66 global $auth; 67 global $USERINFO; 68 global $INPUT; 69 70 if (!$auth) return true; 71 if (!$this->getConf('restrict')) return true; 72 if (!isset($USERINFO)) return false; 73 74 return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 75 } 76 77 /** 78 * Access the Chat Model 79 * 80 * @return ChatInterface 81 */ 82 public function getChatModel() 83 { 84 return $this->factory->getChatModel(); 85 } 86 87 /** 88 * @return ChatInterface 89 */ 90 public function getRephraseModel() 91 { 92 return $this->factory->getRephraseModel(); 93 } 94 95 /** 96 * Access the Embedding Model 97 * 98 * @return EmbeddingInterface 99 */ 100 public function getEmbeddingModel() 101 { 102 return $this->factory->getEmbeddingModel(); 103 } 104 105 /** 106 * Access the Embeddings interface 107 * 108 * @return Embeddings 109 */ 110 public function getEmbeddings() 111 { 112 if ($this->embeddings instanceof Embeddings) { 113 return $this->embeddings; 114 } 115 116 $this->embeddings = new Embeddings( 117 $this->getChatModel(), 118 $this->getEmbeddingModel(), 119 $this->getStorage(), 120 $this->conf 121 ); 122 if ($this->logger) { 123 $this->embeddings->setLogger($this->logger); 124 } 125 126 return $this->embeddings; 127 } 128 129 /** 130 * Access the Storage interface 131 * 132 * @return AbstractStorage 133 */ 134 public function getStorage() 135 { 136 if ($this->storage instanceof AbstractStorage) { 137 return $this->storage; 138 } 139 140 $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage'; 141 $this->storage = new $class($this->conf); 142 143 if ($this->logger) { 144 $this->storage->setLogger($this->logger); 145 } 146 147 return $this->storage; 148 } 149 150 /** 151 * Ask a question with a chat history 152 * 153 * @param string $question 154 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 155 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 156 * @throws Exception 157 */ 158 public function askChatQuestion($question, $history = []) 159 { 160 if ($history && $this->getConf('rephraseHistory') > 0) { 161 $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 162 } else { 163 $standaloneQuestion = $question; 164 } 165 return $this->askQuestion($standaloneQuestion, $history); 166 } 167 168 /** 169 * Ask a single standalone question 170 * 171 * @param string $question 172 * @param array $history [user, ai] of the previous question 173 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 174 * @throws Exception 175 */ 176 public function askQuestion($question, $history = []) 177 { 178 $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 179 if ($similar) { 180 $context = implode( 181 "\n", 182 array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 183 ); 184 $prompt = $this->getPrompt('question', [ 185 'context' => $context, 186 ]); 187 } else { 188 $prompt = $this->getPrompt('noanswer'); 189 $history = []; 190 } 191 192 $messages = $this->prepareMessages( 193 $this->getChatModel(), $prompt, $question, $history, $this->getConf('chatHistory') 194 ); 195 $answer = $this->getChatModel()->getAnswer($messages); 196 197 return [ 198 'question' => $question, 199 'answer' => $answer, 200 'sources' => $similar, 201 ]; 202 } 203 204 /** 205 * Rephrase a question into a standalone question based on the chat history 206 * 207 * @param string $question The original user question 208 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 209 * @return string The rephrased question 210 * @throws Exception 211 */ 212 public function rephraseChatQuestion($question, $history) 213 { 214 $prompt = $this->getPrompt('rephrase'); 215 $messages = $this->prepareMessages( 216 $this->getRephraseModel(), $prompt, $question, $history, $this->getConf('rephraseHistory') 217 ); 218 return $this->getRephraseModel()->getAnswer($messages); 219 } 220 221 /** 222 * Prepare the messages for the AI 223 * 224 * @param ChatInterface $model The used model 225 * @param string $prompt The fully prepared system prompt 226 * @param string $question The user question 227 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 228 * @param int $historySize The maximum number of messages to use from the history 229 * @return array An OpenAI compatible array of messages 230 */ 231 protected function prepareMessages( 232 ChatInterface $model, string $prompt, string $question, array $history, int $historySize 233 ): array 234 { 235 // calculate the space for context 236 $remainingContext = $model->getMaxInputTokenLength(); 237 $remainingContext -= $this->countTokens($prompt); 238 $remainingContext -= $this->countTokens($question); 239 $safetyMargin = $remainingContext * 0.05; // 5% safety margin 240 $remainingContext -= $safetyMargin; 241 // FIXME we may want to also have an upper limit for the history and not always use the full context 242 243 $messages = $this->historyMessages($history, $remainingContext, $historySize); 244 $messages[] = [ 245 'role' => 'system', 246 'content' => $prompt 247 ]; 248 $messages[] = [ 249 'role' => 'user', 250 'content' => $question 251 ]; 252 return $messages; 253 } 254 255 /** 256 * Create an array of OpenAI compatible messages from the given history 257 * 258 * Only as many messages are used as fit into the token limit 259 * 260 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 261 * @param int $tokenLimit The maximum number of tokens to use 262 * @param int $sizeLimit The maximum number of messages to use 263 * @return array 264 */ 265 protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array 266 { 267 $remainingContext = $tokenLimit; 268 269 $messages = []; 270 $history = array_reverse($history); 271 $history = array_slice($history, 0, $sizeLimit); 272 foreach ($history as $row) { 273 $length = $this->countTokens($row[0] . $row[1]); 274 if ($length > $remainingContext) { 275 break; 276 } 277 $remainingContext -= $length; 278 279 $messages[] = [ 280 'role' => 'assistant', 281 'content' => $row[1] 282 ]; 283 $messages[] = [ 284 'role' => 'user', 285 'content' => $row[0] 286 ]; 287 } 288 return array_reverse($messages); 289 } 290 291 /** 292 * Get an aproximation of the token count for the given text 293 * 294 * @param $text 295 * @return int 296 */ 297 protected function countTokens($text) 298 { 299 return count($this->getEmbeddings()->getTokenEncoder()->encode($text)); 300 } 301 302 /** 303 * Load the given prompt template and fill in the variables 304 * 305 * @param string $type 306 * @param string[] $vars 307 * @return string 308 */ 309 protected function getPrompt($type, $vars = []) 310 { 311 $template = file_get_contents($this->localFN('prompt_' . $type)); 312 $vars['language'] = $this->getLanguagePrompt(); 313 314 $replace = []; 315 foreach ($vars as $key => $val) { 316 $replace['{{' . strtoupper($key) . '}}'] = $val; 317 } 318 319 return strtr($template, $replace); 320 } 321 322 /** 323 * Construct the prompt to define the answer language 324 * 325 * @return string 326 */ 327 protected function getLanguagePrompt() 328 { 329 global $conf; 330 $isoLangnames = include(__DIR__ . '/lang/languages.php'); 331 332 $currentLang = $isoLangnames[$conf['lang']] ?? 'English'; 333 334 if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 335 if (isset($isoLangnames[$conf['lang']])) { 336 $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 337 return $languagePrompt; 338 } 339 } 340 341 $languagePrompt = 'Always answer in the user\'s language. ' . 342 "If you are unsure about the language, speak $currentLang."; 343 return $languagePrompt; 344 } 345 346 /** 347 * Should sources be limited to current language? 348 * 349 * @return string The current language code or empty string 350 */ 351 public function getLanguageLimit() 352 { 353 if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 354 global $conf; 355 return $conf['lang']; 356 } else { 357 return ''; 358 } 359 } 360 361 /** 362 * Store info about the last run 363 * 364 * @param array $data 365 * @return void 366 */ 367 public function setRunData(array $data) 368 { 369 file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 370 } 371 372 /** 373 * Get info about the last run 374 * 375 * @return array 376 */ 377 public function getRunData() 378 { 379 if (!file_exists($this->runDataFile)) { 380 return []; 381 } 382 return json_decode(file_get_contents($this->runDataFile), true); 383 } 384} 385