1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use dokuwiki\plugin\aichat\Chunk; 5use dokuwiki\Search\Indexer; 6use splitbrain\phpcli\Colors; 7use splitbrain\phpcli\Options; 8use splitbrain\phpcli\TableFormatter; 9 10 11/** 12 * DokuWiki Plugin aichat (CLI Component) 13 * 14 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 15 * @author Andreas Gohr <gohr@cosmocode.de> 16 */ 17class cli_plugin_aichat extends CLIPlugin 18{ 19 /** @var helper_plugin_aichat */ 20 protected $helper; 21 22 public function __construct($autocatch = true) 23 { 24 parent::__construct($autocatch); 25 $this->helper = plugin_load('helper', 'aichat'); 26 $this->helper->setLogger($this); 27 } 28 29 /** @inheritDoc */ 30 protected function setup(Options $options) 31 { 32 $options->useCompactHelp(); 33 34 $options->setHelp( 35 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 36 'This may incur costs.' 37 ); 38 39 $options->registerCommand( 40 'embed', 41 'Create embeddings for all pages. This skips pages that already have embeddings' 42 ); 43 $options->registerOption( 44 'clear', 45 'Clear all existing embeddings before creating new ones', 46 'c', false, 'embed' 47 ); 48 49 $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.'); 50 51 $options->registerCommand('similar', 'Search for similar pages'); 52 $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 53 54 $options->registerCommand('ask', 'Ask a question'); 55 $options->registerArgument('question', 'The question to ask', true, 'ask'); 56 57 $options->registerCommand('chat', 'Start an interactive chat session'); 58 59 $options->registerCommand('info', 'Get Info about the vector storage'); 60 61 $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 62 $options->registerArgument('page', 'The page to split', true, 'split'); 63 64 $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 65 $options->registerArgument('page', 'The page to check', true, 'page'); 66 67 $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' . 68 ' Not supported on all storages.'); 69 $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv'); 70 $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv'); 71 72 } 73 74 /** @inheritDoc */ 75 protected function main(Options $options) 76 { 77 ini_set('memory_limit', -1); 78 switch ($options->getCmd()) { 79 80 case 'embed': 81 $this->createEmbeddings($options->getOpt('clear')); 82 break; 83 case 'maintenance': 84 $this->runMaintenance(); 85 break; 86 case 'similar': 87 $this->similar($options->getArgs()[0]); 88 break; 89 case 'ask': 90 $this->ask($options->getArgs()[0]); 91 break; 92 case 'chat': 93 $this->chat(); 94 break; 95 case 'split': 96 $this->split($options->getArgs()[0]); 97 break; 98 case 'page': 99 $this->page($options->getArgs()[0]); 100 break; 101 case 'info': 102 $this->showinfo(); 103 break; 104 case 'tsv': 105 $args = $options->getArgs(); 106 $vector = $args[0] ?? 'vector.tsv'; 107 $meta = $args[1] ?? 'meta.tsv'; 108 $this->tsv($vector, $meta); 109 break; 110 default: 111 echo $options->help(); 112 } 113 } 114 115 /** 116 * @return void 117 */ 118 protected function showinfo() 119 { 120 $stats = [ 121 'model' => $this->getConf('model'), 122 ]; 123 $stats = array_merge($stats, $this->helper->getStorage()->statistics()); 124 $this->printTable($stats); 125 } 126 127 /** 128 * Print key value data as tabular data 129 * 130 * @param array $data 131 * @param int $level 132 * @return void 133 */ 134 protected function printTable($data, $level = 0) 135 { 136 $tf = new TableFormatter($this->colors); 137 foreach ($data as $key => $value) { 138 if (is_array($value)) { 139 echo $tf->format( 140 [$level * 2, 15, '*'], 141 ['', $key, ''], 142 [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 143 ); 144 $this->printTable($value, $level + 1); 145 } else { 146 echo $tf->format( 147 [$level * 2, 15, '*'], 148 ['', $key, $value], 149 [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY] 150 ); 151 } 152 } 153 } 154 155 /** 156 * Check chunk availability for a given page 157 * 158 * @param string $page 159 * @return void 160 */ 161 protected function page($page) 162 { 163 $indexer = new Indexer(); 164 $pages = $indexer->getPages(); 165 $pos = array_search(cleanID($page), $pages); 166 167 if ($pos === false) { 168 $this->error('Page not found'); 169 return; 170 } 171 172 $storage = $this->helper->getStorage(); 173 $chunks = $storage->getPageChunks($page, $pos * 100); 174 if ($chunks) { 175 $this->success('Found ' . count($chunks) . ' chunks'); 176 } else { 177 $this->error('No chunks found'); 178 } 179 } 180 181 /** 182 * Split the given page into chunks and print them 183 * 184 * @param string $page 185 * @return void 186 * @throws Exception 187 */ 188 protected function split($page) 189 { 190 $text = rawWiki($page); 191 $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 192 foreach ($chunks as $chunk) { 193 echo $chunk; 194 echo "\n"; 195 $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 196 } 197 $this->success('Split into ' . count($chunks) . ' chunks'); 198 } 199 200 /** 201 * Interactive Chat Session 202 * 203 * @return void 204 * @throws Exception 205 */ 206 protected function chat() 207 { 208 $history = []; 209 while ($q = $this->readLine('Your Question')) { 210 $this->helper->getModel()->resetUsageStats(); 211 $result = $this->helper->askChatQuestion($q, $history); 212 $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 213 $history[] = [$result['question'], $result['answer']]; 214 $this->printAnswer($result); 215 } 216 } 217 218 /** 219 * Handle a single, standalone question 220 * 221 * @param string $query 222 * @return void 223 * @throws Exception 224 */ 225 protected function ask($query) 226 { 227 $result = $this->helper->askQuestion($query); 228 $this->printAnswer($result); 229 } 230 231 /** 232 * Get the pages that are similar to the query 233 * 234 * @param string $query 235 * @return void 236 */ 237 protected function similar($query) 238 { 239 $sources = $this->helper->getEmbeddings()->getSimilarChunks($query); 240 $this->printSources($sources); 241 } 242 243 /** 244 * Run the maintenance tasks 245 * 246 * @return void 247 */ 248 protected function runMaintenance() 249 { 250 $start = time(); 251 $this->helper->getStorage()->runMaintenance(); 252 $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 253 $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 254 } 255 256 /** 257 * Recreate chunks and embeddings for all pages 258 * 259 * @return void 260 * @todo make skip regex configurable 261 */ 262 protected function createEmbeddings($clear) 263 { 264 $start = time(); 265 $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear); 266 $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 267 $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 268 } 269 270 /** 271 * Dump TSV files for debugging 272 * 273 * @return void 274 */ 275 protected function tsv($vector, $meta) 276 { 277 278 $storage = $this->helper->getStorage(); 279 $storage->dumpTSV($vector, $meta); 280 $this->success('written to ' . $vector . ' and ' . $meta); 281 } 282 283 /** 284 * Print the given detailed answer in a nice way 285 * 286 * @param array $answer 287 * @return void 288 */ 289 protected function printAnswer($answer) 290 { 291 $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 292 echo "\n"; 293 $this->printSources($answer['sources']); 294 echo "\n"; 295 $this->printUsage(); 296 } 297 298 /** 299 * Print the given sources 300 * 301 * @param Chunk[] $sources 302 * @return void 303 */ 304 protected function printSources($sources) 305 { 306 foreach ($sources as $source) { 307 /** @var Chunk $source */ 308 $this->colors->ptln( 309 "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 310 Colors::C_LIGHTBLUE 311 ); 312 } 313 } 314 315 /** 316 * Print the usage statistics for OpenAI 317 * 318 * @return void 319 */ 320 protected function printUsage() 321 { 322 $this->info( 323 'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.', 324 $this->helper->getModel()->getUsageStats() 325 ); 326 } 327 328 /** 329 * Interactively ask for a value from the user 330 * 331 * @param string $prompt 332 * @return string 333 */ 334 protected function readLine($prompt) 335 { 336 $value = ''; 337 338 while ($value === '') { 339 echo $prompt; 340 echo ': '; 341 342 $fh = fopen('php://stdin', 'r'); 343 $value = trim(fgets($fh)); 344 fclose($fh); 345 } 346 347 return $value; 348 } 349} 350