1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use dokuwiki\Extension\Plugin; 5use dokuwiki\plugin\aichat\AIChat; 6use dokuwiki\plugin\aichat\Chunk; 7use dokuwiki\plugin\aichat\Embeddings; 8use dokuwiki\plugin\aichat\Model\ChatInterface; 9use dokuwiki\plugin\aichat\Model\EmbeddingInterface; 10use dokuwiki\plugin\aichat\ModelFactory; 11use dokuwiki\plugin\aichat\Storage\AbstractStorage; 12 13/** 14 * DokuWiki Plugin aichat (Helper Component) 15 * 16 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 17 * @author Andreas Gohr <gohr@cosmocode.de> 18 */ 19class helper_plugin_aichat extends Plugin 20{ 21 /** @var ModelFactory */ 22 public $factory; 23 24 /** @var CLIPlugin $logger */ 25 protected $logger; 26 27 /** @var Embeddings */ 28 protected $embeddings; 29 /** @var AbstractStorage */ 30 protected $storage; 31 32 /** @var array where to store meta data on the last run */ 33 protected $runDataFile; 34 35 36 /** 37 * Constructor. Initializes vendor autoloader 38 */ 39 public function __construct() 40 { 41 require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 42 global $conf; 43 $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 44 $this->loadConfig(); 45 $this->factory = new ModelFactory($this->conf); 46 } 47 48 /** 49 * Use the given CLI plugin for logging 50 * 51 * @param CLIPlugin $logger 52 * @return void 53 */ 54 public function setLogger($logger) 55 { 56 $this->logger = $logger; 57 } 58 59 /** 60 * Check if the current user is allowed to use the plugin (if it has been restricted) 61 * 62 * @return bool 63 */ 64 public function userMayAccess() 65 { 66 global $auth; 67 global $USERINFO; 68 global $INPUT; 69 70 if (!$auth) return true; 71 if (!$this->getConf('restrict')) return true; 72 if (!isset($USERINFO)) return false; 73 74 return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 75 } 76 77 /** 78 * Access the Chat Model 79 * 80 * @return ChatInterface 81 */ 82 public function getChatModel() 83 { 84 return $this->factory->getChatModel(); 85 } 86 87 /** 88 * @return ChatInterface 89 */ 90 public function getRephraseModel() 91 { 92 return $this->factory->getRephraseModel(); 93 } 94 95 /** 96 * Access the Embedding Model 97 * 98 * @return EmbeddingInterface 99 */ 100 public function getEmbeddingModel() 101 { 102 return $this->factory->getEmbeddingModel(); 103 } 104 105 /** 106 * Access the Embeddings interface 107 * 108 * @return Embeddings 109 */ 110 public function getEmbeddings() 111 { 112 if ($this->embeddings instanceof Embeddings) { 113 return $this->embeddings; 114 } 115 116 $this->embeddings = new Embeddings( 117 $this->getChatModel(), 118 $this->getEmbeddingModel(), 119 $this->getStorage(), 120 $this->conf 121 ); 122 if ($this->logger) { 123 $this->embeddings->setLogger($this->logger); 124 } 125 126 return $this->embeddings; 127 } 128 129 /** 130 * Access the Storage interface 131 * 132 * @return AbstractStorage 133 */ 134 public function getStorage() 135 { 136 if ($this->storage instanceof AbstractStorage) { 137 return $this->storage; 138 } 139 140 $class = '\\dokuwiki\\plugin\\aichat\\Storage\\' . $this->getConf('storage') . 'Storage'; 141 $this->storage = new $class($this->conf); 142 143 if ($this->logger) { 144 $this->storage->setLogger($this->logger); 145 } 146 147 return $this->storage; 148 } 149 150 /** 151 * Ask a question with a chat history 152 * 153 * @param string $question 154 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 155 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 156 * @throws Exception 157 */ 158 public function askChatQuestion($question, $history = []) 159 { 160 if ($history && $this->getConf('rephraseHistory') > 0) { 161 $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 162 } else { 163 $standaloneQuestion = $question; 164 } 165 return $this->askQuestion($standaloneQuestion, $history); 166 } 167 168 /** 169 * Ask a single standalone question 170 * 171 * @param string $question 172 * @param array $history [user, ai] of the previous question 173 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 174 * @throws Exception 175 */ 176 public function askQuestion($question, $history = []) 177 { 178 $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 179 if ($similar) { 180 $context = implode( 181 "\n", 182 array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 183 ); 184 $prompt = $this->getPrompt('question', [ 185 'context' => $context, 186 'question' => $question, 187 ]); 188 } else { 189 $prompt = $this->getPrompt('noanswer', [ 190 'question' => $question, 191 ]); 192 $history = []; 193 } 194 195 $messages = $this->prepareMessages( 196 $this->getChatModel(), $prompt, $history, $this->getConf('chatHistory') 197 ); 198 $answer = $this->getChatModel()->getAnswer($messages); 199 200 return [ 201 'question' => $question, 202 'answer' => $answer, 203 'sources' => $similar, 204 ]; 205 } 206 207 /** 208 * Rephrase a question into a standalone question based on the chat history 209 * 210 * @param string $question The original user question 211 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 212 * @return string The rephrased question 213 * @throws Exception 214 */ 215 public function rephraseChatQuestion($question, $history) 216 { 217 $prompt = $this->getPrompt('rephrase', [ 218 'question' => $question, 219 ]); 220 $messages = $this->prepareMessages( 221 $this->getRephraseModel(), $prompt, $history, $this->getConf('rephraseHistory') 222 ); 223 return $this->getRephraseModel()->getAnswer($messages); 224 } 225 226 /** 227 * Prepare the messages for the AI 228 * 229 * @param ChatInterface $model The used model 230 * @param string $promptedQuestion The user question embedded in a prompt 231 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 232 * @param int $historySize The maximum number of messages to use from the history 233 * @return array An OpenAI compatible array of messages 234 */ 235 protected function prepareMessages( 236 ChatInterface $model, string $promptedQuestion, array $history, int $historySize 237 ): array 238 { 239 // calculate the space for context 240 $remainingContext = $model->getMaxInputTokenLength(); 241 $remainingContext -= $this->countTokens($promptedQuestion); 242 $safetyMargin = $remainingContext * 0.05; // 5% safety margin 243 $remainingContext -= $safetyMargin; 244 // FIXME we may want to also have an upper limit for the history and not always use the full context 245 246 $messages = $this->historyMessages($history, $remainingContext, $historySize); 247 $messages[] = [ 248 'role' => 'user', 249 'content' => $promptedQuestion 250 ]; 251 return $messages; 252 } 253 254 /** 255 * Create an array of OpenAI compatible messages from the given history 256 * 257 * Only as many messages are used as fit into the token limit 258 * 259 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 260 * @param int $tokenLimit The maximum number of tokens to use 261 * @param int $sizeLimit The maximum number of messages to use 262 * @return array 263 */ 264 protected function historyMessages(array $history, int $tokenLimit, int $sizeLimit): array 265 { 266 $remainingContext = $tokenLimit; 267 268 $messages = []; 269 $history = array_reverse($history); 270 $history = array_slice($history, 0, $sizeLimit); 271 foreach ($history as $row) { 272 $length = $this->countTokens($row[0] . $row[1]); 273 if ($length > $remainingContext) { 274 break; 275 } 276 $remainingContext -= $length; 277 278 $messages[] = [ 279 'role' => 'assistant', 280 'content' => $row[1] 281 ]; 282 $messages[] = [ 283 'role' => 'user', 284 'content' => $row[0] 285 ]; 286 } 287 return array_reverse($messages); 288 } 289 290 /** 291 * Get an aproximation of the token count for the given text 292 * 293 * @param $text 294 * @return int 295 */ 296 protected function countTokens($text) 297 { 298 return count($this->getEmbeddings()->getTokenEncoder()->encode($text)); 299 } 300 301 /** 302 * Load the given prompt template and fill in the variables 303 * 304 * @param string $type 305 * @param string[] $vars 306 * @return string 307 */ 308 protected function getPrompt($type, $vars = []) 309 { 310 $template = file_get_contents($this->localFN($type, 'prompt')); 311 $vars['language'] = $this->getLanguagePrompt(); 312 313 $replace = []; 314 foreach ($vars as $key => $val) { 315 $replace['{{' . strtoupper($key) . '}}'] = $val; 316 } 317 318 return strtr($template, $replace); 319 } 320 321 /** 322 * Construct the prompt to define the answer language 323 * 324 * @return string 325 */ 326 protected function getLanguagePrompt() 327 { 328 global $conf; 329 $isoLangnames = include(__DIR__ . '/lang/languages.php'); 330 331 $currentLang = $isoLangnames[$conf['lang']] ?? 'English'; 332 333 if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 334 if (isset($isoLangnames[$conf['lang']])) { 335 $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 336 return $languagePrompt; 337 } 338 } 339 340 $languagePrompt = 'Always answer in the user\'s language. ' . 341 "If you are unsure about the language, speak $currentLang."; 342 return $languagePrompt; 343 } 344 345 /** 346 * Should sources be limited to current language? 347 * 348 * @return string The current language code or empty string 349 */ 350 public function getLanguageLimit() 351 { 352 if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 353 global $conf; 354 return $conf['lang']; 355 } else { 356 return ''; 357 } 358 } 359 360 /** 361 * Store info about the last run 362 * 363 * @param array $data 364 * @return void 365 */ 366 public function setRunData(array $data) 367 { 368 file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 369 } 370 371 /** 372 * Get info about the last run 373 * 374 * @return array 375 */ 376 public function getRunData() 377 { 378 if (!file_exists($this->runDataFile)) { 379 return []; 380 } 381 return json_decode(file_get_contents($this->runDataFile), true); 382 } 383} 384