1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use dokuwiki\Extension\Plugin; 5use dokuwiki\plugin\aichat\AIChat; 6use dokuwiki\plugin\aichat\Chunk; 7use dokuwiki\plugin\aichat\Embeddings; 8use dokuwiki\plugin\aichat\Model\ChatInterface; 9use dokuwiki\plugin\aichat\Model\EmbeddingInterface; 10use dokuwiki\plugin\aichat\Model\OpenAI\Embedding3Small; 11use dokuwiki\plugin\aichat\Model\OpenAI\EmbeddingAda02; 12use dokuwiki\plugin\aichat\Storage\AbstractStorage; 13use dokuwiki\plugin\aichat\Storage\ChromaStorage; 14use dokuwiki\plugin\aichat\Storage\PineconeStorage; 15use dokuwiki\plugin\aichat\Storage\QdrantStorage; 16use dokuwiki\plugin\aichat\Storage\SQLiteStorage; 17 18/** 19 * DokuWiki Plugin aichat (Helper Component) 20 * 21 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 22 * @author Andreas Gohr <gohr@cosmocode.de> 23 */ 24class helper_plugin_aichat extends Plugin 25{ 26 /** @var CLIPlugin $logger */ 27 protected $logger; 28 /** @var ChatInterface */ 29 protected $chatModel; 30 /** @var EmbeddingInterface */ 31 protected $embedModel; 32 /** @var Embeddings */ 33 protected $embeddings; 34 /** @var AbstractStorage */ 35 protected $storage; 36 37 /** @var array where to store meta data on the last run */ 38 protected $runDataFile; 39 40 /** 41 * Constructor. Initializes vendor autoloader 42 */ 43 public function __construct() 44 { 45 require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 46 global $conf; 47 $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 48 $this->loadConfig(); 49 } 50 51 /** 52 * Use the given CLI plugin for logging 53 * 54 * @param CLIPlugin $logger 55 * @return void 56 */ 57 public function setLogger($logger) 58 { 59 $this->logger = $logger; 60 } 61 62 /** 63 * Check if the current user is allowed to use the plugin (if it has been restricted) 64 * 65 * @return bool 66 */ 67 public function userMayAccess() 68 { 69 global $auth; 70 global $USERINFO; 71 global $INPUT; 72 73 if (!$auth) return true; 74 if (!$this->getConf('restrict')) return true; 75 if (!isset($USERINFO)) return false; 76 77 return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 78 } 79 80 /** 81 * Access the Chat Model 82 * 83 * @return ChatInterface 84 */ 85 public function getChatModel() 86 { 87 if ($this->chatModel instanceof ChatInterface) { 88 return $this->chatModel; 89 } 90 91 $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model'); 92 93 //$class = Claude3Haiku::class; 94 95 if (!class_exists($class)) { 96 throw new \RuntimeException('Configured model not found: ' . $class); 97 } 98 99 // FIXME for now we only have OpenAI models, so we can hardcode the auth setup 100 $this->chatModel = new $class($this->conf); 101 102 return $this->chatModel; 103 } 104 105 /** 106 * Access the Embedding Model 107 * 108 * @return EmbeddingInterface 109 */ 110 public function getEmbedModel() 111 { 112 // FIXME this is hardcoded to OpenAI for now 113 if ($this->embedModel instanceof EmbeddingInterface) { 114 return $this->embedModel; 115 } 116 117 //$this->embedModel = new Embedding3Small($this->conf); 118 $this->embedModel = new EmbeddingAda02($this->conf); 119 120 return $this->embedModel; 121 } 122 123 124 /** 125 * Access the Embeddings interface 126 * 127 * @return Embeddings 128 */ 129 public function getEmbeddings() 130 { 131 if ($this->embeddings instanceof Embeddings) { 132 return $this->embeddings; 133 } 134 135 $this->embeddings = new Embeddings( 136 $this->getChatModel(), 137 $this->getEmbedModel(), 138 $this->getStorage(), 139 $this->conf 140 ); 141 if ($this->logger) { 142 $this->embeddings->setLogger($this->logger); 143 } 144 145 return $this->embeddings; 146 } 147 148 /** 149 * Access the Storage interface 150 * 151 * @return AbstractStorage 152 */ 153 public function getStorage() 154 { 155 if ($this->storage instanceof AbstractStorage) { 156 return $this->storage; 157 } 158 159 if ($this->getConf('pinecone_apikey')) { 160 $this->storage = new PineconeStorage(); 161 } elseif ($this->getConf('chroma_baseurl')) { 162 $this->storage = new ChromaStorage(); 163 } elseif ($this->getConf('qdrant_baseurl')) { 164 $this->storage = new QdrantStorage(); 165 } else { 166 $this->storage = new SQLiteStorage(); 167 } 168 169 if ($this->logger) { 170 $this->storage->setLogger($this->logger); 171 } 172 173 return $this->storage; 174 } 175 176 /** 177 * Ask a question with a chat history 178 * 179 * @param string $question 180 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 181 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 182 * @throws Exception 183 */ 184 public function askChatQuestion($question, $history = []) 185 { 186 if ($history) { 187 $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 188 } else { 189 $standaloneQuestion = $question; 190 } 191 return $this->askQuestion($standaloneQuestion, $history); 192 } 193 194 /** 195 * Ask a single standalone question 196 * 197 * @param string $question 198 * @param array $history [user, ai] of the previous question 199 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 200 * @throws Exception 201 */ 202 public function askQuestion($question, $history = []) 203 { 204 $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 205 if ($similar) { 206 $context = implode( 207 "\n", 208 array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 209 ); 210 $prompt = $this->getPrompt('question', [ 211 'context' => $context, 212 ]); 213 } else { 214 $prompt = $this->getPrompt('noanswer'); 215 $history = []; 216 } 217 218 $messages = $this->prepareMessages($prompt, $question, $history); 219 $answer = $this->getChatModel()->getAnswer($messages); 220 221 return [ 222 'question' => $question, 223 'answer' => $answer, 224 'sources' => $similar, 225 ]; 226 } 227 228 /** 229 * Rephrase a question into a standalone question based on the chat history 230 * 231 * @param string $question The original user question 232 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 233 * @return string The rephrased question 234 * @throws Exception 235 */ 236 public function rephraseChatQuestion($question, $history) 237 { 238 $prompt = $this->getPrompt('rephrase'); 239 $messages = $this->prepareMessages($prompt, $question, $history); 240 return $this->getChatModel()->getAnswer($messages); 241 } 242 243 /** 244 * Prepare the messages for the AI 245 * 246 * @param string $prompt The fully prepared system prompt 247 * @param string $question The user question 248 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 249 * @return array An OpenAI compatible array of messages 250 */ 251 protected function prepareMessages($prompt, $question, $history) 252 { 253 // calculate the space for context 254 $remainingContext = $this->getChatModel()->getMaxInputTokenLength(); 255 $remainingContext -= $this->countTokens($prompt); 256 $remainingContext -= $this->countTokens($question); 257 $safetyMargin = $remainingContext * 0.05; // 5% safety margin 258 $remainingContext -= $safetyMargin; 259 // FIXME we may want to also have an upper limit for the history and not always use the full context 260 261 $messages = $this->historyMessages($history, $remainingContext); 262 $messages[] = [ 263 'role' => 'system', 264 'content' => $prompt 265 ]; 266 $messages[] = [ 267 'role' => 'user', 268 'content' => $question 269 ]; 270 return $messages; 271 } 272 273 /** 274 * Create an array of OpenAI compatible messages from the given history 275 * 276 * Only as many messages are used as fit into the token limit 277 * 278 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 279 * @param int $tokenLimit 280 * @return array 281 */ 282 protected function historyMessages($history, $tokenLimit) 283 { 284 $remainingContext = $tokenLimit; 285 286 $messages = []; 287 $history = array_reverse($history); 288 foreach ($history as $row) { 289 $length = $this->countTokens($row[0] . $row[1]); 290 if ($length > $remainingContext) { 291 break; 292 } 293 $remainingContext -= $length; 294 295 $messages[] = [ 296 'role' => 'assistant', 297 'content' => $row[1] 298 ]; 299 $messages[] = [ 300 'role' => 'user', 301 'content' => $row[0] 302 ]; 303 } 304 return array_reverse($messages); 305 } 306 307 /** 308 * Get an aproximation of the token count for the given text 309 * 310 * @param $text 311 * @return int 312 */ 313 protected function countTokens($text) 314 { 315 return count($this->getEmbeddings()->getTokenEncoder()->encode($text)); 316 } 317 318 /** 319 * Load the given prompt template and fill in the variables 320 * 321 * @param string $type 322 * @param string[] $vars 323 * @return string 324 */ 325 protected function getPrompt($type, $vars = []) 326 { 327 $template = file_get_contents($this->localFN('prompt_' . $type)); 328 $vars['language'] = $this->getLanguagePrompt(); 329 330 $replace = []; 331 foreach ($vars as $key => $val) { 332 $replace['{{' . strtoupper($key) . '}}'] = $val; 333 } 334 335 return strtr($template, $replace); 336 } 337 338 /** 339 * Construct the prompt to define the answer language 340 * 341 * @return string 342 */ 343 protected function getLanguagePrompt() 344 { 345 global $conf; 346 $isoLangnames = include(__DIR__ . '/lang/languages.php'); 347 348 $currentLang = $isoLangnames[$conf['lang']] ?? 'English'; 349 350 if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 351 if (isset($isoLangnames[$conf['lang']])) { 352 $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 353 return $languagePrompt; 354 } 355 } 356 357 $languagePrompt = 'Always answer in the user\'s language. ' . 358 "If you are unsure about the language, speak $currentLang."; 359 return $languagePrompt; 360 } 361 362 /** 363 * Should sources be limited to current language? 364 * 365 * @return string The current language code or empty string 366 */ 367 public function getLanguageLimit() 368 { 369 if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 370 global $conf; 371 return $conf['lang']; 372 } else { 373 return ''; 374 } 375 } 376 377 /** 378 * Store info about the last run 379 * 380 * @param array $data 381 * @return void 382 */ 383 public function setRunData(array $data) 384 { 385 file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 386 } 387 388 /** 389 * Get info about the last run 390 * 391 * @return array 392 */ 393 public function getRunData() 394 { 395 if (!file_exists($this->runDataFile)) { 396 return []; 397 } 398 return json_decode(file_get_contents($this->runDataFile), true); 399 } 400} 401