1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use dokuwiki\plugin\aichat\Chunk; 5use dokuwiki\Search\Indexer; 6use splitbrain\phpcli\Colors; 7use splitbrain\phpcli\Options; 8use splitbrain\phpcli\TableFormatter; 9 10/** 11 * DokuWiki Plugin aichat (CLI Component) 12 * 13 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 14 * @author Andreas Gohr <gohr@cosmocode.de> 15 */ 16class cli_plugin_aichat extends CLIPlugin 17{ 18 /** @var helper_plugin_aichat */ 19 protected $helper; 20 21 public function __construct($autocatch = true) 22 { 23 parent::__construct($autocatch); 24 $this->helper = plugin_load('helper', 'aichat'); 25 $this->helper->setLogger($this); 26 } 27 28 /** @inheritDoc */ 29 protected function setup(Options $options) 30 { 31 $options->useCompactHelp(); 32 33 $options->setHelp( 34 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 35 'This may incur costs.' 36 ); 37 38 $options->registerCommand( 39 'embed', 40 'Create embeddings for all pages. This skips pages that already have embeddings' 41 ); 42 $options->registerOption( 43 'clear', 44 'Clear all existing embeddings before creating new ones', 45 'c', 46 false, 47 'embed' 48 ); 49 50 $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.'); 51 52 $options->registerCommand('similar', 'Search for similar pages'); 53 $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 54 55 $options->registerCommand('ask', 'Ask a question'); 56 $options->registerArgument('question', 'The question to ask', true, 'ask'); 57 58 $options->registerCommand('chat', 'Start an interactive chat session'); 59 60 $options->registerCommand('info', 'Get Info about the vector storage'); 61 62 $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 63 $options->registerArgument('page', 'The page to split', true, 'split'); 64 65 $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 66 $options->registerArgument('page', 'The page to check', true, 'page'); 67 68 $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' . 69 ' Not supported on all storages.'); 70 $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv'); 71 $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv'); 72 } 73 74 /** @inheritDoc */ 75 protected function main(Options $options) 76 { 77 ini_set('memory_limit', -1); 78 switch ($options->getCmd()) { 79 case 'embed': 80 $this->createEmbeddings($options->getOpt('clear')); 81 break; 82 case 'maintenance': 83 $this->runMaintenance(); 84 break; 85 case 'similar': 86 $this->similar($options->getArgs()[0]); 87 break; 88 case 'ask': 89 $this->ask($options->getArgs()[0]); 90 break; 91 case 'chat': 92 $this->chat(); 93 break; 94 case 'split': 95 $this->split($options->getArgs()[0]); 96 break; 97 case 'page': 98 $this->page($options->getArgs()[0]); 99 break; 100 case 'info': 101 $this->showinfo(); 102 break; 103 case 'tsv': 104 $args = $options->getArgs(); 105 $vector = $args[0] ?? 'vector.tsv'; 106 $meta = $args[1] ?? 'meta.tsv'; 107 $this->tsv($vector, $meta); 108 break; 109 default: 110 echo $options->help(); 111 } 112 } 113 114 /** 115 * @return void 116 */ 117 protected function showinfo() 118 { 119 $stats = [ 120 'model' => $this->getConf('model'), 121 ]; 122 $stats = array_merge($stats, $this->helper->getStorage()->statistics()); 123 $this->printTable($stats); 124 } 125 126 /** 127 * Print key value data as tabular data 128 * 129 * @param array $data 130 * @param int $level 131 * @return void 132 */ 133 protected function printTable($data, $level = 0) 134 { 135 $tf = new TableFormatter($this->colors); 136 foreach ($data as $key => $value) { 137 if (is_array($value)) { 138 echo $tf->format( 139 [$level * 2, 15, '*'], 140 ['', $key, ''], 141 [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 142 ); 143 $this->printTable($value, $level + 1); 144 } else { 145 echo $tf->format( 146 [$level * 2, 15, '*'], 147 ['', $key, $value], 148 [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY] 149 ); 150 } 151 } 152 } 153 154 /** 155 * Check chunk availability for a given page 156 * 157 * @param string $page 158 * @return void 159 */ 160 protected function page($page) 161 { 162 $indexer = new Indexer(); 163 $pages = $indexer->getPages(); 164 $pos = array_search(cleanID($page), $pages); 165 166 if ($pos === false) { 167 $this->error('Page not found'); 168 return; 169 } 170 171 $storage = $this->helper->getStorage(); 172 $chunks = $storage->getPageChunks($page, $pos * 100); 173 if ($chunks) { 174 $this->success('Found ' . count($chunks) . ' chunks'); 175 } else { 176 $this->error('No chunks found'); 177 } 178 } 179 180 /** 181 * Split the given page into chunks and print them 182 * 183 * @param string $page 184 * @return void 185 * @throws Exception 186 */ 187 protected function split($page) 188 { 189 $text = rawWiki($page); 190 $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 191 foreach ($chunks as $chunk) { 192 echo $chunk; 193 echo "\n"; 194 $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 195 } 196 $this->success('Split into ' . count($chunks) . ' chunks'); 197 } 198 199 /** 200 * Interactive Chat Session 201 * 202 * @return void 203 * @throws Exception 204 */ 205 protected function chat() 206 { 207 $history = []; 208 while ($q = $this->readLine('Your Question')) { 209 $this->helper->getModel()->resetUsageStats(); 210 $result = $this->helper->askChatQuestion($q, $history); 211 $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 212 $history[] = [$result['question'], $result['answer']]; 213 $this->printAnswer($result); 214 } 215 } 216 217 /** 218 * Handle a single, standalone question 219 * 220 * @param string $query 221 * @return void 222 * @throws Exception 223 */ 224 protected function ask($query) 225 { 226 $result = $this->helper->askQuestion($query); 227 $this->printAnswer($result); 228 } 229 230 /** 231 * Get the pages that are similar to the query 232 * 233 * @param string $query 234 * @return void 235 */ 236 protected function similar($query) 237 { 238 $langlimit = $this->helper->getLanguageLimit(); 239 if ($langlimit) { 240 $this->info('Limiting results to {lang}', ['lang' => $langlimit]); 241 } 242 243 $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit); 244 $this->printSources($sources); 245 } 246 247 /** 248 * Run the maintenance tasks 249 * 250 * @return void 251 */ 252 protected function runMaintenance() 253 { 254 $start = time(); 255 $this->helper->getStorage()->runMaintenance(); 256 $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 257 $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 258 } 259 260 /** 261 * Recreate chunks and embeddings for all pages 262 * 263 * @return void 264 * @todo make skip regex configurable 265 */ 266 protected function createEmbeddings($clear) 267 { 268 $start = time(); 269 $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear); 270 $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 271 $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 272 } 273 274 /** 275 * Dump TSV files for debugging 276 * 277 * @return void 278 */ 279 protected function tsv($vector, $meta) 280 { 281 282 $storage = $this->helper->getStorage(); 283 $storage->dumpTSV($vector, $meta); 284 $this->success('written to ' . $vector . ' and ' . $meta); 285 } 286 287 /** 288 * Print the given detailed answer in a nice way 289 * 290 * @param array $answer 291 * @return void 292 */ 293 protected function printAnswer($answer) 294 { 295 $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 296 echo "\n"; 297 $this->printSources($answer['sources']); 298 echo "\n"; 299 $this->printUsage(); 300 } 301 302 /** 303 * Print the given sources 304 * 305 * @param Chunk[] $sources 306 * @return void 307 */ 308 protected function printSources($sources) 309 { 310 foreach ($sources as $source) { 311 /** @var Chunk $source */ 312 $this->colors->ptln( 313 "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 314 Colors::C_LIGHTBLUE 315 ); 316 } 317 } 318 319 /** 320 * Print the usage statistics for OpenAI 321 * 322 * @return void 323 */ 324 protected function printUsage() 325 { 326 $this->info( 327 'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.', 328 $this->helper->getModel()->getUsageStats() 329 ); 330 } 331 332 /** 333 * Interactively ask for a value from the user 334 * 335 * @param string $prompt 336 * @return string 337 */ 338 protected function readLine($prompt) 339 { 340 $value = ''; 341 342 while ($value === '') { 343 echo $prompt; 344 echo ': '; 345 346 $fh = fopen('php://stdin', 'r'); 347 $value = trim(fgets($fh)); 348 fclose($fh); 349 } 350 351 return $value; 352 } 353} 354