1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use dokuwiki\Extension\Plugin; 5use dokuwiki\plugin\aichat\AIChat; 6use dokuwiki\plugin\aichat\Chunk; 7use dokuwiki\plugin\aichat\Embeddings; 8use dokuwiki\plugin\aichat\Model\AbstractChatModel; 9use dokuwiki\plugin\aichat\Model\AbstractEmbeddingModel; 10use dokuwiki\plugin\aichat\Model\OpenAI\EmbeddingAda02; 11use dokuwiki\plugin\aichat\Storage\AbstractStorage; 12use dokuwiki\plugin\aichat\Storage\ChromaStorage; 13use dokuwiki\plugin\aichat\Storage\PineconeStorage; 14use dokuwiki\plugin\aichat\Storage\QdrantStorage; 15use dokuwiki\plugin\aichat\Storage\SQLiteStorage; 16 17/** 18 * DokuWiki Plugin aichat (Helper Component) 19 * 20 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 21 * @author Andreas Gohr <gohr@cosmocode.de> 22 */ 23class helper_plugin_aichat extends Plugin 24{ 25 /** @var CLIPlugin $logger */ 26 protected $logger; 27 /** @var AbstractChatModel */ 28 protected $chatModel; 29 /** @var AbstractEmbeddingModel */ 30 protected $embedModel; 31 /** @var Embeddings */ 32 protected $embeddings; 33 /** @var AbstractStorage */ 34 protected $storage; 35 36 /** @var array where to store meta data on the last run */ 37 protected $runDataFile; 38 39 /** 40 * Constructor. Initializes vendor autoloader 41 */ 42 public function __construct() 43 { 44 require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 45 global $conf; 46 $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 47 } 48 49 /** 50 * Use the given CLI plugin for logging 51 * 52 * @param CLIPlugin $logger 53 * @return void 54 */ 55 public function setLogger($logger) 56 { 57 $this->logger = $logger; 58 } 59 60 /** 61 * Check if the current user is allowed to use the plugin (if it has been restricted) 62 * 63 * @return bool 64 */ 65 public function userMayAccess() 66 { 67 global $auth; 68 global $USERINFO; 69 global $INPUT; 70 71 if (!$auth) return true; 72 if (!$this->getConf('restrict')) return true; 73 if (!isset($USERINFO)) return false; 74 75 return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 76 } 77 78 /** 79 * Access the Chat Model 80 * 81 * @return AbstractChatModel 82 */ 83 public function getChatModel() 84 { 85 if ($this->chatModel instanceof AbstractChatModel) { 86 return $this->chatModel; 87 } 88 89 $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model'); 90 91 if (!class_exists($class)) { 92 throw new \RuntimeException('Configured model not found: ' . $class); 93 } 94 // FIXME for now we only have OpenAI models, so we can hardcode the auth setup 95 $this->chatModel = new $class([ 96 'key' => $this->getConf('openaikey'), 97 'org' => $this->getConf('openaiorg') 98 ]); 99 100 return $this->chatModel; 101 } 102 103 /** 104 * Access the Embedding Model 105 * 106 * @return AbstractEmbeddingModel 107 */ 108 public function getEmbedModel() 109 { 110 // FIXME this is hardcoded to OpenAI for now 111 if ($this->embedModel instanceof AbstractEmbeddingModel) { 112 return $this->embedModel; 113 } 114 115 116 $this->embedModel = new EmbeddingAda02([ 117 'key' => $this->getConf('openaikey'), 118 'org' => $this->getConf('openaiorg') 119 ]); 120 121 return $this->embedModel; 122 } 123 124 125 /** 126 * Access the Embeddings interface 127 * 128 * @return Embeddings 129 */ 130 public function getEmbeddings() 131 { 132 if ($this->embeddings instanceof Embeddings) { 133 return $this->embeddings; 134 } 135 136 $this->embeddings = new Embeddings($this->getChatModel(), $this->getEmbedModel(), $this->getStorage()); 137 if ($this->logger) { 138 $this->embeddings->setLogger($this->logger); 139 } 140 141 return $this->embeddings; 142 } 143 144 /** 145 * Access the Storage interface 146 * 147 * @return AbstractStorage 148 */ 149 public function getStorage() 150 { 151 if ($this->storage instanceof AbstractStorage) { 152 return $this->storage; 153 } 154 155 if ($this->getConf('pinecone_apikey')) { 156 $this->storage = new PineconeStorage(); 157 } elseif ($this->getConf('chroma_baseurl')) { 158 $this->storage = new ChromaStorage(); 159 } elseif ($this->getConf('qdrant_baseurl')) { 160 $this->storage = new QdrantStorage(); 161 } else { 162 $this->storage = new SQLiteStorage(); 163 } 164 165 if ($this->logger) { 166 $this->storage->setLogger($this->logger); 167 } 168 169 return $this->storage; 170 } 171 172 /** 173 * Ask a question with a chat history 174 * 175 * @param string $question 176 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 177 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 178 * @throws Exception 179 */ 180 public function askChatQuestion($question, $history = []) 181 { 182 if ($history) { 183 $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 184 $prev = end($history); 185 } else { 186 $standaloneQuestion = $question; 187 $prev = []; 188 } 189 return $this->askQuestion($standaloneQuestion, $prev); 190 } 191 192 /** 193 * Ask a single standalone question 194 * 195 * @param string $question 196 * @param array $previous [user, ai] of the previous question 197 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 198 * @throws Exception 199 */ 200 public function askQuestion($question, $previous = []) 201 { 202 $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 203 if ($similar) { 204 $context = implode( 205 "\n", 206 array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 207 ); 208 $prompt = $this->getPrompt('question', [ 209 'context' => $context, 210 'language' => $this->getLanguagePrompt() 211 ]); 212 } else { 213 $prompt = $this->getPrompt('noanswer') . ' ' . $this->getLanguagePrompt(); 214 } 215 216 $messages = [ 217 [ 218 'role' => 'system', 219 'content' => $prompt 220 ], 221 [ 222 'role' => 'user', 223 'content' => $question 224 ] 225 ]; 226 227 if ($previous) { 228 array_unshift($messages, [ 229 'role' => 'assistant', 230 'content' => $previous[1] 231 ]); 232 array_unshift($messages, [ 233 'role' => 'user', 234 'content' => $previous[0] 235 ]); 236 } 237 238 $answer = $this->getChatModel()->getAnswer($messages); 239 240 return [ 241 'question' => $question, 242 'answer' => $answer, 243 'sources' => $similar, 244 ]; 245 } 246 247 /** 248 * Rephrase a question into a standalone question based on the chat history 249 * 250 * @param string $question The original user question 251 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 252 * @return string The rephrased question 253 * @throws Exception 254 */ 255 public function rephraseChatQuestion($question, $history) 256 { 257 // go back in history as far as possible without hitting the token limit 258 $chatHistory = ''; 259 $history = array_reverse($history); 260 foreach ($history as $row) { 261 if ( 262 count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) > 263 $this->getChatModel()->getMaxRephrasingTokenLength() 264 ) { 265 break; 266 } 267 268 $chatHistory = 269 "Human: " . $row[0] . "\n" . 270 "Assistant: " . $row[1] . "\n" . 271 $chatHistory; 272 } 273 274 // ask openAI to rephrase the question 275 $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]); 276 $messages = [['role' => 'user', 'content' => $prompt]]; 277 return $this->getChatModel()->getRephrasedQuestion($messages); 278 } 279 280 /** 281 * Load the given prompt template and fill in the variables 282 * 283 * @param string $type 284 * @param string[] $vars 285 * @return string 286 */ 287 protected function getPrompt($type, $vars = []) 288 { 289 $template = file_get_contents($this->localFN('prompt_' . $type)); 290 291 $replace = []; 292 foreach ($vars as $key => $val) { 293 $replace['{{' . strtoupper($key) . '}}'] = $val; 294 } 295 296 return strtr($template, $replace); 297 } 298 299 /** 300 * Construct the prompt to define the answer language 301 * 302 * @return string 303 */ 304 protected function getLanguagePrompt() 305 { 306 global $conf; 307 $isoLangnames = include(__DIR__ . '/lang/languages.php'); 308 309 $currentLang = $isoLangnames[$conf['lang']] ?? 'English'; 310 311 if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 312 if (isset($isoLangnames[$conf['lang']])) { 313 $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 314 return $languagePrompt; 315 } 316 } 317 318 $languagePrompt = 'Always answer in the user\'s language.' . 319 "If you are unsure about the language, speak $currentLang."; 320 return $languagePrompt; 321 } 322 323 /** 324 * Should sources be limited to current language? 325 * 326 * @return string The current language code or empty string 327 */ 328 public function getLanguageLimit() 329 { 330 if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 331 global $conf; 332 return $conf['lang']; 333 } else { 334 return ''; 335 } 336 } 337 338 /** 339 * Store info about the last run 340 * 341 * @param array $data 342 * @return void 343 */ 344 public function setRunData(array $data) 345 { 346 file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 347 } 348 349 /** 350 * Get info about the last run 351 * 352 * @return array 353 */ 354 public function getRunData() 355 { 356 if (!file_exists($this->runDataFile)) { 357 return []; 358 } 359 return json_decode(file_get_contents($this->runDataFile), true); 360 } 361} 362