1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use dokuwiki\plugin\aichat\Chunk; 5use dokuwiki\Search\Indexer; 6use splitbrain\phpcli\Colors; 7use splitbrain\phpcli\Options; 8use splitbrain\phpcli\TableFormatter; 9 10/** 11 * DokuWiki Plugin aichat (CLI Component) 12 * 13 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 14 * @author Andreas Gohr <gohr@cosmocode.de> 15 */ 16class cli_plugin_aichat extends CLIPlugin 17{ 18 /** @var helper_plugin_aichat */ 19 protected $helper; 20 21 public function __construct($autocatch = true) 22 { 23 parent::__construct($autocatch); 24 $this->helper = plugin_load('helper', 'aichat'); 25 $this->helper->setLogger($this); 26 } 27 28 /** @inheritDoc */ 29 protected function setup(Options $options) 30 { 31 $options->useCompactHelp(); 32 33 $options->setHelp( 34 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 35 'This may incur costs.' 36 ); 37 38 $options->registerCommand( 39 'embed', 40 'Create embeddings for all pages. This skips pages that already have embeddings' 41 ); 42 $options->registerOption( 43 'clear', 44 'Clear all existing embeddings before creating new ones', 45 'c', 46 false, 47 'embed' 48 ); 49 50 $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.'); 51 52 $options->registerCommand('similar', 'Search for similar pages'); 53 $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 54 55 $options->registerCommand('ask', 'Ask a question'); 56 $options->registerArgument('question', 'The question to ask', true, 'ask'); 57 58 $options->registerCommand('chat', 'Start an interactive chat session'); 59 60 $options->registerCommand('info', 'Get Info about the vector storage and other stats'); 61 62 $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 63 $options->registerArgument('page', 'The page to split', true, 'split'); 64 65 $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 66 $options->registerArgument('page', 'The page to check', true, 'page'); 67 $options->registerOption('dump', 'Dump the chunks', 'd', false, 'page'); 68 69 $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' . 70 ' Not supported on all storages.'); 71 $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv'); 72 $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv'); 73 } 74 75 /** @inheritDoc */ 76 protected function main(Options $options) 77 { 78 ini_set('memory_limit', -1); 79 switch ($options->getCmd()) { 80 case 'embed': 81 $this->createEmbeddings($options->getOpt('clear')); 82 break; 83 case 'maintenance': 84 $this->runMaintenance(); 85 break; 86 case 'similar': 87 $this->similar($options->getArgs()[0]); 88 break; 89 case 'ask': 90 $this->ask($options->getArgs()[0]); 91 break; 92 case 'chat': 93 $this->chat(); 94 break; 95 case 'split': 96 $this->split($options->getArgs()[0]); 97 break; 98 case 'page': 99 $this->page($options->getArgs()[0], $options->getOpt('dump')); 100 break; 101 case 'info': 102 $this->showinfo(); 103 break; 104 case 'tsv': 105 $args = $options->getArgs(); 106 $vector = $args[0] ?? 'vector.tsv'; 107 $meta = $args[1] ?? 'meta.tsv'; 108 $this->tsv($vector, $meta); 109 break; 110 default: 111 echo $options->help(); 112 } 113 } 114 115 /** 116 * @return void 117 */ 118 protected function showinfo() 119 { 120 $stats = [ 121 'model' => $this->getConf('model'), 122 ]; 123 $stats = array_merge( 124 $stats, 125 array_map('dformat', $this->helper->getRunData()), 126 $this->helper->getStorage()->statistics() 127 ); 128 $this->printTable($stats); 129 } 130 131 /** 132 * Print key value data as tabular data 133 * 134 * @param array $data 135 * @param int $level 136 * @return void 137 */ 138 protected function printTable($data, $level = 0) 139 { 140 $tf = new TableFormatter($this->colors); 141 foreach ($data as $key => $value) { 142 if (is_array($value)) { 143 echo $tf->format( 144 [$level * 2, 20, '*'], 145 ['', $key, ''], 146 [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 147 ); 148 $this->printTable($value, $level + 1); 149 } else { 150 echo $tf->format( 151 [$level * 2, 20, '*'], 152 ['', $key, $value], 153 [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY] 154 ); 155 } 156 } 157 } 158 159 /** 160 * Check chunk availability for a given page 161 * 162 * @param string $page 163 * @return void 164 */ 165 protected function page($page, $dump = false) 166 { 167 $indexer = new Indexer(); 168 $pages = $indexer->getPages(); 169 $pos = array_search(cleanID($page), $pages); 170 171 if ($pos === false) { 172 $this->error('Page not found'); 173 return; 174 } 175 176 $storage = $this->helper->getStorage(); 177 $chunks = $storage->getPageChunks($page, $pos * 100); 178 if ($chunks) { 179 $this->success('Found ' . count($chunks) . ' chunks'); 180 if ($dump) { 181 echo json_encode($chunks, JSON_PRETTY_PRINT); 182 } 183 } else { 184 $this->error('No chunks found'); 185 } 186 } 187 188 /** 189 * Split the given page into chunks and print them 190 * 191 * @param string $page 192 * @return void 193 * @throws Exception 194 */ 195 protected function split($page) 196 { 197 $text = rawWiki($page); 198 $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 199 foreach ($chunks as $chunk) { 200 echo $chunk; 201 echo "\n"; 202 $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 203 } 204 $this->success('Split into ' . count($chunks) . ' chunks'); 205 } 206 207 /** 208 * Interactive Chat Session 209 * 210 * @return void 211 * @throws Exception 212 */ 213 protected function chat() 214 { 215 if($this->loglevel['debug']['enabled']) { 216 $this->helper->getChatModel()->setDebug(true); 217 } 218 219 $history = []; 220 while ($q = $this->readLine('Your Question')) { 221 $this->helper->getChatModel()->resetUsageStats(); 222 $result = $this->helper->askChatQuestion($q, $history); 223 $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 224 $history[] = [$result['question'], $result['answer']]; 225 $this->printAnswer($result); 226 } 227 } 228 229 /** 230 * Handle a single, standalone question 231 * 232 * @param string $query 233 * @return void 234 * @throws Exception 235 */ 236 protected function ask($query) 237 { 238 if($this->loglevel['debug']['enabled']) { 239 $this->helper->getChatModel()->setDebug(true); 240 } 241 242 $result = $this->helper->askQuestion($query); 243 $this->printAnswer($result); 244 } 245 246 /** 247 * Get the pages that are similar to the query 248 * 249 * @param string $query 250 * @return void 251 */ 252 protected function similar($query) 253 { 254 $langlimit = $this->helper->getLanguageLimit(); 255 if ($langlimit) { 256 $this->info('Limiting results to {lang}', ['lang' => $langlimit]); 257 } 258 259 $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit); 260 $this->printSources($sources); 261 } 262 263 /** 264 * Run the maintenance tasks 265 * 266 * @return void 267 */ 268 protected function runMaintenance() 269 { 270 $start = time(); 271 $this->helper->getStorage()->runMaintenance(); 272 $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 273 $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 274 275 $data = $this->helper->getRunData(); 276 $data['maintenance ran at'] = time(); 277 $this->helper->setRunData($data); 278 } 279 280 /** 281 * Recreate chunks and embeddings for all pages 282 * 283 * @return void 284 */ 285 protected function createEmbeddings($clear) 286 { 287 [$skipRE, $matchRE] = $this->getRegexps(); 288 289 $start = time(); 290 $this->helper->getEmbeddings()->createNewIndex($skipRE, $matchRE, $clear); 291 $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 292 $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 293 294 $data = $this->helper->getRunData(); 295 $data['embed ran at'] = time(); 296 $this->helper->setRunData($data); 297 } 298 299 /** 300 * Dump TSV files for debugging 301 * 302 * @return void 303 */ 304 protected function tsv($vector, $meta) 305 { 306 307 $storage = $this->helper->getStorage(); 308 $storage->dumpTSV($vector, $meta); 309 $this->success('written to ' . $vector . ' and ' . $meta); 310 } 311 312 /** 313 * Print the given detailed answer in a nice way 314 * 315 * @param array $answer 316 * @return void 317 */ 318 protected function printAnswer($answer) 319 { 320 $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 321 echo "\n"; 322 $this->printSources($answer['sources']); 323 echo "\n"; 324 $this->printUsage(); 325 } 326 327 /** 328 * Print the given sources 329 * 330 * @param Chunk[] $sources 331 * @return void 332 */ 333 protected function printSources($sources) 334 { 335 foreach ($sources as $source) { 336 /** @var Chunk $source */ 337 $this->colors->ptln( 338 "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 339 Colors::C_LIGHTBLUE 340 ); 341 } 342 } 343 344 /** 345 * Print the usage statistics for OpenAI 346 * 347 * @return void 348 */ 349 protected function printUsage() 350 { 351 $this->info( 352 'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.', 353 $this->helper->getChatModel()->getUsageStats() 354 ); 355 } 356 357 /** 358 * Interactively ask for a value from the user 359 * 360 * @param string $prompt 361 * @return string 362 */ 363 protected function readLine($prompt) 364 { 365 $value = ''; 366 367 while ($value === '') { 368 echo $prompt; 369 echo ': '; 370 371 $fh = fopen('php://stdin', 'r'); 372 $value = trim(fgets($fh)); 373 fclose($fh); 374 } 375 376 return $value; 377 } 378 379 /** 380 * Read the skip and match regex from the config 381 * 382 * Ensures the regular expressions are valid 383 * 384 * @return string[] [$skipRE, $matchRE] 385 */ 386 protected function getRegexps() 387 { 388 $skip = $this->getConf('skipRegex'); 389 $skipRE = ''; 390 $match = $this->getConf('matchRegex'); 391 $matchRE = ''; 392 393 if ($skip) { 394 $skipRE = '/' . $skip . '/'; 395 if (@preg_match($skipRE, '') === false) { 396 $this->error(preg_last_error_msg()); 397 $this->error('Invalid regular expression in $conf[\'skipRegex\']. Ignored.'); 398 $skipRE = ''; 399 } else { 400 $this->success('Skipping pages matching ' . $skipRE); 401 } 402 } 403 404 if ($match) { 405 $matchRE = '/' . $match . '/'; 406 if (@preg_match($matchRE, '') === false) { 407 $this->error(preg_last_error_msg()); 408 $this->error('Invalid regular expression in $conf[\'matchRegex\']. Ignored.'); 409 $matchRE = ''; 410 } else { 411 $this->success('Only indexing pages matching ' . $matchRE); 412 } 413 } 414 return [$skipRE, $matchRE]; 415 } 416} 417