1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use dokuwiki\Extension\Plugin; 5use dokuwiki\plugin\aichat\AIChat; 6use dokuwiki\plugin\aichat\Chunk; 7use dokuwiki\plugin\aichat\Embeddings; 8use dokuwiki\plugin\aichat\Model\ChatInterface; 9use dokuwiki\plugin\aichat\Model\EmbeddingInterface; 10use dokuwiki\plugin\aichat\ModelFactory; 11use dokuwiki\plugin\aichat\Storage\AbstractStorage; 12 13/** 14 * DokuWiki Plugin aichat (Helper Component) 15 * 16 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 17 * @author Andreas Gohr <gohr@cosmocode.de> 18 */ 19class helper_plugin_aichat extends Plugin 20{ 21 /** @var ModelFactory */ 22 public $factory; 23 24 /** @var CLIPlugin $logger */ 25 protected $logger; 26 27 /** @var Embeddings */ 28 protected $embeddings; 29 /** @var AbstractStorage */ 30 protected $storage; 31 32 /** @var array where to store meta data on the last run */ 33 protected $runDataFile; 34 35 36 /** 37 * Constructor. Initializes vendor autoloader 38 */ 39 public function __construct() 40 { 41 require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 42 global $conf; 43 $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 44 $this->loadConfig(); 45 $this->factory = new ModelFactory($this->conf); 46 } 47 48 /** 49 * Use the given CLI plugin for logging 50 * 51 * @param CLIPlugin $logger 52 * @return void 53 */ 54 public function setLogger($logger) 55 { 56 $this->logger = $logger; 57 } 58 59 /** 60 * Check if the current user is allowed to use the plugin (if it has been restricted) 61 * 62 * @return bool 63 */ 64 public function userMayAccess() 65 { 66 global $auth; 67 global $USERINFO; 68 global $INPUT; 69 70 if (!$auth) return true; 71 if (!$this->getConf('restrict')) return true; 72 if (!isset($USERINFO)) return false; 73 74 return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 75 } 76 77 /** 78 * Access the Chat Model 79 * 80 * @return ChatInterface 81 */ 82 public function getChatModel() 83 { 84 return $this->factory->getChatModel(); 85 } 86 87 /** 88 * @return ChatInterface 89 */ 90 public function getRephraseModel() 91 { 92 return $this->factory->getRephraseModel(); 93 } 94 95 /** 96 * Access the Embedding Model 97 * 98 * @return EmbeddingInterface 99 */ 100 public function getEmbeddingModel() 101 { 102 return $this->factory->getEmbeddingModel(); 103 } 104 105 /** 106 * Access the Embeddings interface 107 * 108 * @return Embeddings 109 */ 110 public function getEmbeddings() 111 { 112 if ($this->embeddings instanceof Embeddings) { 113 return $this->embeddings; 114 } 115 116 $this->embeddings = new Embeddings( 117 $this->getChatModel(), 118 $this->getEmbeddingModel(), 119 $this->getStorage(), 120 $this->conf 121 ); 122 if ($this->logger) { 123 $this->embeddings->setLogger($this->logger); 124 } 125 126 return $this->embeddings; 127 } 128 129 /** 130 * Access the Storage interface 131 * 132 * @return AbstractStorage 133 */ 134 public function getStorage() 135 { 136 if ($this->storage instanceof AbstractStorage) { 137 return $this->storage; 138 } 139 140 $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage'; 141 $this->storage = new $class($this->conf); 142 143 if ($this->logger) { 144 $this->storage->setLogger($this->logger); 145 } 146 147 return $this->storage; 148 } 149 150 /** 151 * Ask a question with a chat history 152 * 153 * @param string $question 154 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 155 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 156 * @throws Exception 157 */ 158 public function askChatQuestion($question, $history = []) 159 { 160 if ($history && $this->getConf('rephraseHistory') > 0) { 161 $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 162 } else { 163 $standaloneQuestion = $question; 164 } 165 return $this->askQuestion($standaloneQuestion, $history); 166 } 167 168 /** 169 * Ask a single standalone question 170 * 171 * @param string $question 172 * @param array $history [user, ai] of the previous question 173 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 174 * @throws Exception 175 */ 176 public function askQuestion($question, $history = []) 177 { 178 $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 179 if ($similar) { 180 $context = implode( 181 "\n", 182 array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 183 ); 184 $prompt = $this->getPrompt('question', [ 185 'context' => $context, 186 'question' => $question, 187 ]); 188 } else { 189 $prompt = $this->getPrompt('noanswer', [ 190 'question' => $question, 191 ]); 192 $history = []; 193 } 194 195 $messages = $this->prepareMessages( 196 $this->getChatModel(), 197 $prompt, 198 $history, 199 $this->getConf('chatHistory') 200 ); 201 $answer = $this->getChatModel()->getAnswer($messages); 202 203 return [ 204 'question' => $question, 205 'answer' => $answer, 206 'sources' => $similar, 207 ]; 208 } 209 210 /** 211 * Rephrase a question into a standalone question based on the chat history 212 * 213 * @param string $question The original user question 214 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 215 * @return string The rephrased question 216 * @throws Exception 217 */ 218 public function rephraseChatQuestion($question, $history) 219 { 220 $prompt = $this->getPrompt('rephrase', [ 221 'question' => $question, 222 ]); 223 $messages = $this->prepareMessages( 224 $this->getRephraseModel(), 225 $prompt, 226 $history, 227 $this->getConf('rephraseHistory') 228 ); 229 return $this->getRephraseModel()->getAnswer($messages); 230 } 231 232 /** 233 * Prepare the messages for the AI 234 * 235 * @param ChatInterface $model The used model 236 * @param string $promptedQuestion The user question embedded in a prompt 237 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 238 * @param int $historySize The maximum number of messages to use from the history 239 * @return array An OpenAI compatible array of messages 240 */ 241 protected function prepareMessages( 242 ChatInterface $model, 243 string $promptedQuestion, 244 array $history, 245 int $historySize 246 ): array { 247 // calculate the space for context 248 $remainingContext = $model->getMaxInputTokenLength(); 249 $remainingContext -= $this->countTokens($promptedQuestion); 250 $safetyMargin = $remainingContext * 0.05; // 5% safety margin 251 $remainingContext -= $safetyMargin; 252 // FIXME we may want to also have an upper limit for the history and not always use the full context 253 254 $messages = $this->historyMessages($history, $remainingContext, $historySize); 255 $messages[] = [ 256 'role' => 'user', 257 'content' => $promptedQuestion 258 ]; 259 return $messages; 260 } 261 262 /** 263 * Create an array of OpenAI compatible messages from the given history 264 * 265 * Only as many messages are used as fit into the token limit 266 * 267 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 268 * @param int $tokenLimit The maximum number of tokens to use 269 * @param int $sizeLimit The maximum number of messages to use 270 * @return array 271 */ 272 protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array 273 { 274 $remainingContext = $tokenLimit; 275 276 $messages = []; 277 $history = array_reverse($history); 278 $history = array_slice($history, 0, $sizeLimit); 279 foreach ($history as $row) { 280 $length = $this->countTokens($row[0] . $row[1]); 281 if ($length > $remainingContext) { 282 break; 283 } 284 $remainingContext -= $length; 285 286 $messages[] = [ 287 'role' => 'assistant', 288 'content' => $row[1] 289 ]; 290 $messages[] = [ 291 'role' => 'user', 292 'content' => $row[0] 293 ]; 294 } 295 return array_reverse($messages); 296 } 297 298 /** 299 * Get an aproximation of the token count for the given text 300 * 301 * @param $text 302 * @return int 303 */ 304 protected function countTokens($text) 305 { 306 return count($this->getEmbeddings()->getTokenEncoder()->encode($text)); 307 } 308 309 /** 310 * Load the given prompt template and fill in the variables 311 * 312 * @param string $type 313 * @param string[] $vars 314 * @return string 315 */ 316 protected function getPrompt($type, $vars = []) 317 { 318 $template = file_get_contents($this->localFN($type, 'prompt')); 319 $vars['language'] = $this->getLanguagePrompt(); 320 321 $replace = []; 322 foreach ($vars as $key => $val) { 323 $replace['{{' . strtoupper($key) . '}}'] = $val; 324 } 325 326 return strtr($template, $replace); 327 } 328 329 /** 330 * Construct the prompt to define the answer language 331 * 332 * @return string 333 */ 334 protected function getLanguagePrompt() 335 { 336 global $conf; 337 $isoLangnames = include(__DIR__ . '/lang/languages.php'); 338 339 $currentLang = $isoLangnames[$conf['lang']] ?? 'English'; 340 341 if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 342 if (isset($isoLangnames[$conf['lang']])) { 343 $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 344 return $languagePrompt; 345 } 346 } 347 348 $languagePrompt = 'Always answer in the user\'s language. ' . 349 "If you are unsure about the language, speak $currentLang."; 350 return $languagePrompt; 351 } 352 353 /** 354 * Should sources be limited to current language? 355 * 356 * @return string The current language code or empty string 357 */ 358 public function getLanguageLimit() 359 { 360 if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 361 global $conf; 362 return $conf['lang']; 363 } else { 364 return ''; 365 } 366 } 367 368 /** 369 * Store info about the last run 370 * 371 * @param array $data 372 * @return void 373 */ 374 public function setRunData(array $data) 375 { 376 file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 377 } 378 379 /** 380 * Get info about the last run 381 * 382 * @return array 383 */ 384 public function getRunData() 385 { 386 if (!file_exists($this->runDataFile)) { 387 return []; 388 } 389 return json_decode(file_get_contents($this->runDataFile), true); 390 } 391} 392