1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use dokuwiki\plugin\aichat\Chunk; 5use dokuwiki\Search\Indexer; 6use splitbrain\phpcli\Colors; 7use splitbrain\phpcli\Options; 8use splitbrain\phpcli\TableFormatter; 9 10 11/** 12 * DokuWiki Plugin aichat (CLI Component) 13 * 14 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 15 * @author Andreas Gohr <gohr@cosmocode.de> 16 */ 17class cli_plugin_aichat extends CLIPlugin 18{ 19 /** @var helper_plugin_aichat */ 20 protected $helper; 21 22 public function __construct($autocatch = true) 23 { 24 parent::__construct($autocatch); 25 $this->helper = plugin_load('helper', 'aichat'); 26 $this->helper->setLogger($this); 27 } 28 29 /** @inheritDoc */ 30 protected function setup(Options $options) 31 { 32 $options->useCompactHelp(); 33 34 $options->setHelp( 35 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 36 'This may incur costs.' 37 ); 38 39 $options->registerCommand( 40 'embed', 41 'Create embeddings for all pages. This skips pages that already have embeddings' 42 ); 43 $options->registerOption( 44 'clear', 45 'Clear all existing embeddings before creating new ones', 46 'c', false, 'embed' 47 ); 48 49 $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.'); 50 51 $options->registerCommand('similar', 'Search for similar pages'); 52 $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 53 54 $options->registerCommand('ask', 'Ask a question'); 55 $options->registerArgument('question', 'The question to ask', true, 'ask'); 56 57 $options->registerCommand('chat', 'Start an interactive chat session'); 58 59 $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 60 $options->registerArgument('page', 'The page to split', true, 'split'); 61 62 $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 63 $options->registerArgument('page', 'The page to check', true, 'page'); 64 65 $options->registerCommand('info', 'Get Info about the vector storage'); 66 } 67 68 /** @inheritDoc */ 69 protected function main(Options $options) 70 { 71 ini_set('memory_limit', -1); 72 switch ($options->getCmd()) { 73 74 case 'embed': 75 $this->createEmbeddings($options->getOpt('clear')); 76 break; 77 case 'maintenance': 78 $this->runMaintenance(); 79 break; 80 case 'similar': 81 $this->similar($options->getArgs()[0]); 82 break; 83 case 'ask': 84 $this->ask($options->getArgs()[0]); 85 break; 86 case 'chat': 87 $this->chat(); 88 break; 89 case 'split': 90 $this->split($options->getArgs()[0]); 91 break; 92 case 'page': 93 $this->page($options->getArgs()[0]); 94 break; 95 case 'info': 96 $this->showinfo(); 97 break; 98 default: 99 echo $options->help(); 100 } 101 } 102 103 /** 104 * @return void 105 */ 106 protected function showinfo() 107 { 108 $stats = [ 109 'model' => $this->getConf('model'), 110 ]; 111 $stats = array_merge($stats, $this->helper->getStorage()->statistics()); 112 $this->printTable($stats); 113 } 114 115 /** 116 * Print key value data as tabular data 117 * 118 * @param array $data 119 * @param int $level 120 * @return void 121 */ 122 protected function printTable($data, $level = 0) 123 { 124 $tf = new TableFormatter($this->colors); 125 foreach ($data as $key => $value) { 126 if (is_array($value)) { 127 echo $tf->format( 128 [$level * 2, 15, '*'], 129 ['', $key, ''], 130 [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 131 ); 132 $this->printTable($value, $level + 1); 133 } else { 134 echo $tf->format( 135 [$level * 2, 15, '*'], 136 ['', $key, $value], 137 [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY] 138 ); 139 } 140 } 141 } 142 143 /** 144 * Check chunk availability for a given page 145 * 146 * @param string $page 147 * @return void 148 */ 149 protected function page($page) 150 { 151 $indexer = new Indexer(); 152 $pages = $indexer->getPages(); 153 $pos = array_search(cleanID($page), $pages); 154 155 if ($pos === false) { 156 $this->error('Page not found'); 157 return; 158 } 159 160 $storage = $this->helper->getStorage(); 161 $chunks = $storage->getPageChunks($page, $pos * 100); 162 if ($chunks) { 163 $this->success('Found ' . count($chunks) . ' chunks'); 164 } else { 165 $this->error('No chunks found'); 166 } 167 } 168 169 /** 170 * Split the given page into chunks and print them 171 * 172 * @param string $page 173 * @return void 174 * @throws Exception 175 */ 176 protected function split($page) 177 { 178 $text = rawWiki($page); 179 $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 180 foreach ($chunks as $chunk) { 181 echo $chunk; 182 echo "\n"; 183 $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 184 } 185 $this->success('Split into ' . count($chunks) . ' chunks'); 186 } 187 188 /** 189 * Interactive Chat Session 190 * 191 * @return void 192 * @throws Exception 193 */ 194 protected function chat() 195 { 196 $history = []; 197 while ($q = $this->readLine('Your Question')) { 198 $this->helper->getModel()->resetUsageStats(); 199 $result = $this->helper->askChatQuestion($q, $history); 200 $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 201 $history[] = [$result['question'], $result['answer']]; 202 $this->printAnswer($result); 203 } 204 } 205 206 /** 207 * Handle a single, standalone question 208 * 209 * @param string $query 210 * @return void 211 * @throws Exception 212 */ 213 protected function ask($query) 214 { 215 $result = $this->helper->askQuestion($query); 216 $this->printAnswer($result); 217 } 218 219 /** 220 * Get the pages that are similar to the query 221 * 222 * @param string $query 223 * @return void 224 */ 225 protected function similar($query) 226 { 227 $sources = $this->helper->getEmbeddings()->getSimilarChunks($query); 228 $this->printSources($sources); 229 } 230 231 /** 232 * Run the maintenance tasks 233 * 234 * @return void 235 */ 236 protected function runMaintenance() 237 { 238 $start = time(); 239 $this->helper->getStorage()->runMaintenance(); 240 $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 241 $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 242 } 243 244 /** 245 * Recreate chunks and embeddings for all pages 246 * 247 * @return void 248 * @todo make skip regex configurable 249 */ 250 protected function createEmbeddings($clear) 251 { 252 $start = time(); 253 $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear); 254 $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 255 $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 256 } 257 258 /** 259 * Print the given detailed answer in a nice way 260 * 261 * @param array $answer 262 * @return void 263 */ 264 protected function printAnswer($answer) 265 { 266 $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 267 echo "\n"; 268 $this->printSources($answer['sources']); 269 echo "\n"; 270 $this->printUsage(); 271 } 272 273 /** 274 * Print the given sources 275 * 276 * @param Chunk[] $sources 277 * @return void 278 */ 279 protected function printSources($sources) 280 { 281 foreach ($sources as $source) { 282 /** @var Chunk $source */ 283 $this->colors->ptln( 284 "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 285 Colors::C_LIGHTBLUE 286 ); 287 } 288 } 289 290 /** 291 * Print the usage statistics for OpenAI 292 * 293 * @return void 294 */ 295 protected function printUsage() 296 { 297 $this->info( 298 'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.', 299 $this->helper->getModel()->getUsageStats() 300 ); 301 } 302 303 /** 304 * Interactively ask for a value from the user 305 * 306 * @param string $prompt 307 * @return string 308 */ 309 protected function readLine($prompt) 310 { 311 $value = ''; 312 313 while ($value === '') { 314 echo $prompt; 315 echo ': '; 316 317 $fh = fopen('php://stdin', 'r'); 318 $value = trim(fgets($fh)); 319 fclose($fh); 320 } 321 322 return $value; 323 } 324} 325