1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use dokuwiki\Extension\Plugin; 5use dokuwiki\plugin\aichat\AIChat; 6use dokuwiki\plugin\aichat\Chunk; 7use dokuwiki\plugin\aichat\Embeddings; 8use dokuwiki\plugin\aichat\Model\AbstractModel; 9use dokuwiki\plugin\aichat\Model\OpenAI\GPT35Turbo; 10use dokuwiki\plugin\aichat\Storage\AbstractStorage; 11use dokuwiki\plugin\aichat\Storage\ChromaStorage; 12use dokuwiki\plugin\aichat\Storage\PineconeStorage; 13use dokuwiki\plugin\aichat\Storage\QdrantStorage; 14use dokuwiki\plugin\aichat\Storage\SQLiteStorage; 15 16/** 17 * DokuWiki Plugin aichat (Helper Component) 18 * 19 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 20 * @author Andreas Gohr <gohr@cosmocode.de> 21 */ 22class helper_plugin_aichat extends Plugin 23{ 24 /** @var CLIPlugin $logger */ 25 protected $logger; 26 /** @var AbstractModel */ 27 protected $model; 28 /** @var Embeddings */ 29 protected $embeddings; 30 /** @var AbstractStorage */ 31 protected $storage; 32 33 /** @var array where to store meta data on the last run */ 34 protected $runDataFile; 35 36 /** 37 * Constructor. Initializes vendor autoloader 38 */ 39 public function __construct() 40 { 41 require_once __DIR__ . '/vendor/autoload.php'; // FIXME obsolete from Kaos onwards 42 global $conf; 43 $this->runDataFile = $conf['metadir'] . '/aichat__run.json'; 44 } 45 46 /** 47 * Use the given CLI plugin for logging 48 * 49 * @param CLIPlugin $logger 50 * @return void 51 */ 52 public function setLogger($logger) 53 { 54 $this->logger = $logger; 55 } 56 57 /** 58 * Check if the current user is allowed to use the plugin (if it has been restricted) 59 * 60 * @return bool 61 */ 62 public function userMayAccess() 63 { 64 global $auth; 65 global $USERINFO; 66 global $INPUT; 67 68 if (!$auth) return true; 69 if (!$this->getConf('restrict')) return true; 70 if (!isset($USERINFO)) return false; 71 72 return auth_isMember($this->getConf('restrict'), $INPUT->server->str('REMOTE_USER'), $USERINFO['grps']); 73 } 74 75 /** 76 * Access the OpenAI client 77 * 78 * @return GPT35Turbo 79 */ 80 public function getModel() 81 { 82 if (!$this->model instanceof AbstractModel) { 83 $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model'); 84 85 if (!class_exists($class)) { 86 throw new \RuntimeException('Configured model not found: ' . $class); 87 } 88 // FIXME for now we only have OpenAI models, so we can hardcode the auth setup 89 $this->model = new $class([ 90 'key' => $this->getConf('openaikey'), 91 'org' => $this->getConf('openaiorg') 92 ]); 93 } 94 95 return $this->model; 96 } 97 98 /** 99 * Access the Embeddings interface 100 * 101 * @return Embeddings 102 */ 103 public function getEmbeddings() 104 { 105 if (!$this->embeddings instanceof Embeddings) { 106 $this->embeddings = new Embeddings($this->getModel(), $this->getStorage()); 107 if ($this->logger) { 108 $this->embeddings->setLogger($this->logger); 109 } 110 } 111 112 return $this->embeddings; 113 } 114 115 /** 116 * Access the Storage interface 117 * 118 * @return AbstractStorage 119 */ 120 public function getStorage() 121 { 122 if (!$this->storage instanceof AbstractStorage) { 123 if ($this->getConf('pinecone_apikey')) { 124 $this->storage = new PineconeStorage(); 125 } elseif ($this->getConf('chroma_baseurl')) { 126 $this->storage = new ChromaStorage(); 127 } elseif ($this->getConf('qdrant_baseurl')) { 128 $this->storage = new QdrantStorage(); 129 } else { 130 $this->storage = new SQLiteStorage(); 131 } 132 133 if ($this->logger) { 134 $this->storage->setLogger($this->logger); 135 } 136 } 137 138 return $this->storage; 139 } 140 141 /** 142 * Ask a question with a chat history 143 * 144 * @param string $question 145 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 146 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 147 * @throws Exception 148 */ 149 public function askChatQuestion($question, $history = []) 150 { 151 if ($history) { 152 $standaloneQuestion = $this->rephraseChatQuestion($question, $history); 153 $prev = end($history); 154 } else { 155 $standaloneQuestion = $question; 156 $prev = []; 157 } 158 return $this->askQuestion($standaloneQuestion, $prev); 159 } 160 161 /** 162 * Ask a single standalone question 163 * 164 * @param string $question 165 * @param array $previous [user, ai] of the previous question 166 * @return array ['question' => $question, 'answer' => $answer, 'sources' => $sources] 167 * @throws Exception 168 */ 169 public function askQuestion($question, $previous = []) 170 { 171 $similar = $this->getEmbeddings()->getSimilarChunks($question, $this->getLanguageLimit()); 172 if ($similar) { 173 $context = implode( 174 "\n", 175 array_map(static fn(Chunk $chunk) => "\n```\n" . $chunk->getText() . "\n```\n", $similar) 176 ); 177 $prompt = $this->getPrompt('question', [ 178 'context' => $context, 179 'language' => $this->getLanguagePrompt() 180 ]); 181 } else { 182 $prompt = $this->getPrompt('noanswer') . ' ' . $this->getLanguagePrompt(); 183 } 184 185 $messages = [ 186 [ 187 'role' => 'system', 188 'content' => $prompt 189 ], 190 [ 191 'role' => 'user', 192 'content' => $question 193 ] 194 ]; 195 196 if ($previous) { 197 array_unshift($messages, [ 198 'role' => 'assistant', 199 'content' => $previous[1] 200 ]); 201 array_unshift($messages, [ 202 'role' => 'user', 203 'content' => $previous[0] 204 ]); 205 } 206 207 $answer = $this->getModel()->getAnswer($messages); 208 209 return [ 210 'question' => $question, 211 'answer' => $answer, 212 'sources' => $similar, 213 ]; 214 } 215 216 /** 217 * Rephrase a question into a standalone question based on the chat history 218 * 219 * @param string $question The original user question 220 * @param array[] $history The chat history [[user, ai], [user, ai], ...] 221 * @return string The rephrased question 222 * @throws Exception 223 */ 224 public function rephraseChatQuestion($question, $history) 225 { 226 // go back in history as far as possible without hitting the token limit 227 $chatHistory = ''; 228 $history = array_reverse($history); 229 foreach ($history as $row) { 230 if ( 231 count($this->getEmbeddings()->getTokenEncoder()->encode($chatHistory)) > 232 $this->getModel()->getMaxRephrasingTokenLength() 233 ) { 234 break; 235 } 236 237 $chatHistory = 238 "Human: " . $row[0] . "\n" . 239 "Assistant: " . $row[1] . "\n" . 240 $chatHistory; 241 } 242 243 // ask openAI to rephrase the question 244 $prompt = $this->getPrompt('rephrase', ['history' => $chatHistory, 'question' => $question]); 245 $messages = [['role' => 'user', 'content' => $prompt]]; 246 return $this->getModel()->getRephrasedQuestion($messages); 247 } 248 249 /** 250 * Load the given prompt template and fill in the variables 251 * 252 * @param string $type 253 * @param string[] $vars 254 * @return string 255 */ 256 protected function getPrompt($type, $vars = []) 257 { 258 $template = file_get_contents($this->localFN('prompt_' . $type)); 259 260 $replace = []; 261 foreach ($vars as $key => $val) { 262 $replace['{{' . strtoupper($key) . '}}'] = $val; 263 } 264 265 return strtr($template, $replace); 266 } 267 268 /** 269 * Construct the prompt to define the answer language 270 * 271 * @return string 272 */ 273 protected function getLanguagePrompt() 274 { 275 global $conf; 276 $isoLangnames = include(__DIR__ . '/lang/languages.php'); 277 278 $currentLang = $isoLangnames[$conf['lang']] ?? 'English'; 279 280 if ($this->getConf('preferUIlanguage') > AIChat::LANG_AUTO_ALL) { 281 if (isset($isoLangnames[$conf['lang']])) { 282 $languagePrompt = 'Always answer in ' . $isoLangnames[$conf['lang']] . '.'; 283 return $languagePrompt; 284 } 285 } 286 287 $languagePrompt = 'Always answer in the user\'s language.' . 288 "If you are unsure about the language, speak $currentLang."; 289 return $languagePrompt; 290 } 291 292 /** 293 * Should sources be limited to current language? 294 * 295 * @return string The current language code or empty string 296 */ 297 public function getLanguageLimit() 298 { 299 if ($this->getConf('preferUIlanguage') >= AIChat::LANG_UI_LIMITED) { 300 global $conf; 301 return $conf['lang']; 302 } else { 303 return ''; 304 } 305 } 306 307 /** 308 * Store info about the last run 309 * 310 * @param array $data 311 * @return void 312 */ 313 public function setRunData(array $data) 314 { 315 file_put_contents($this->runDataFile, json_encode($data, JSON_PRETTY_PRINT)); 316 } 317 318 /** 319 * Get info about the last run 320 * 321 * @return array 322 */ 323 public function getRunData() 324 { 325 if (!file_exists($this->runDataFile)) { 326 return []; 327 } 328 return json_decode(file_get_contents($this->runDataFile), true); 329 } 330} 331