18817535bSAndreas Gohr<?php 28817535bSAndreas Gohr 3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 4f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 501f06932SAndreas Gohruse dokuwiki\Search\Indexer; 6c4584168SAndreas Gohruse splitbrain\phpcli\Colors; 78817535bSAndreas Gohruse splitbrain\phpcli\Options; 83379af09SAndreas Gohruse splitbrain\phpcli\TableFormatter; 98817535bSAndreas Gohr 108817535bSAndreas Gohr/** 118817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component) 128817535bSAndreas Gohr * 138817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 148817535bSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 158817535bSAndreas Gohr */ 16f6ef2e50SAndreas Gohrclass cli_plugin_aichat extends CLIPlugin 178817535bSAndreas Gohr{ 180337f47fSAndreas Gohr /** @var helper_plugin_aichat */ 190337f47fSAndreas Gohr protected $helper; 200337f47fSAndreas Gohr 210337f47fSAndreas Gohr public function __construct($autocatch = true) 220337f47fSAndreas Gohr { 230337f47fSAndreas Gohr parent::__construct($autocatch); 240337f47fSAndreas Gohr $this->helper = plugin_load('helper', 'aichat'); 253379af09SAndreas Gohr $this->helper->setLogger($this); 260337f47fSAndreas Gohr } 270337f47fSAndreas Gohr 288817535bSAndreas Gohr /** @inheritDoc */ 298817535bSAndreas Gohr protected function setup(Options $options) 308817535bSAndreas Gohr { 31bddd899cSAndreas Gohr $options->useCompactHelp(); 32bddd899cSAndreas Gohr 335284515dSAndreas Gohr $options->setHelp( 345284515dSAndreas Gohr 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 355284515dSAndreas Gohr 'This may incur costs.' 365284515dSAndreas Gohr ); 378817535bSAndreas Gohr 385284515dSAndreas Gohr $options->registerCommand( 395284515dSAndreas Gohr 'embed', 405284515dSAndreas Gohr 'Create embeddings for all pages. This skips pages that already have embeddings' 415284515dSAndreas Gohr ); 425284515dSAndreas Gohr $options->registerOption( 435284515dSAndreas Gohr 'clear', 445284515dSAndreas Gohr 'Clear all existing embeddings before creating new ones', 457ebc7895Ssplitbrain 'c', 467ebc7895Ssplitbrain false, 477ebc7895Ssplitbrain 'embed' 485284515dSAndreas Gohr ); 498817535bSAndreas Gohr 50e8451b21SAndreas Gohr $options->registerCommand('maintenance', 'Run storage maintenance. Refer to the documentation for details.'); 513379af09SAndreas Gohr 528817535bSAndreas Gohr $options->registerCommand('similar', 'Search for similar pages'); 538817535bSAndreas Gohr $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 548817535bSAndreas Gohr 558817535bSAndreas Gohr $options->registerCommand('ask', 'Ask a question'); 568817535bSAndreas Gohr $options->registerArgument('question', 'The question to ask', true, 'ask'); 57c4584168SAndreas Gohr 58c4584168SAndreas Gohr $options->registerCommand('chat', 'Start an interactive chat session'); 59ad38c5fdSAndreas Gohr 60e8451b21SAndreas Gohr $options->registerCommand('models', 'List available models'); 61e8451b21SAndreas Gohr 62e75dc39fSAndreas Gohr $options->registerCommand('info', 'Get Info about the vector storage and other stats'); 638c8b7ba6SAndreas Gohr 64ad38c5fdSAndreas Gohr $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 65ad38c5fdSAndreas Gohr $options->registerArgument('page', 'The page to split', true, 'split'); 665786be46SAndreas Gohr 6701f06932SAndreas Gohr $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 6801f06932SAndreas Gohr $options->registerArgument('page', 'The page to check', true, 'page'); 69dc355d57SAndreas Gohr $options->registerOption('dump', 'Dump the chunks', 'd', false, 'page'); 7001f06932SAndreas Gohr 718c8b7ba6SAndreas Gohr $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' . 728c8b7ba6SAndreas Gohr ' Not supported on all storages.'); 738c8b7ba6SAndreas Gohr $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv'); 748c8b7ba6SAndreas Gohr $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv'); 758817535bSAndreas Gohr } 768817535bSAndreas Gohr 778817535bSAndreas Gohr /** @inheritDoc */ 788817535bSAndreas Gohr protected function main(Options $options) 798817535bSAndreas Gohr { 80e8451b21SAndreas Gohr $this->loadConfig(); 813379af09SAndreas Gohr ini_set('memory_limit', -1); 828817535bSAndreas Gohr switch ($options->getCmd()) { 838817535bSAndreas Gohr case 'embed': 845284515dSAndreas Gohr $this->createEmbeddings($options->getOpt('clear')); 858817535bSAndreas Gohr break; 863379af09SAndreas Gohr case 'maintenance': 873379af09SAndreas Gohr $this->runMaintenance(); 883379af09SAndreas Gohr break; 898817535bSAndreas Gohr case 'similar': 908817535bSAndreas Gohr $this->similar($options->getArgs()[0]); 918817535bSAndreas Gohr break; 927552f1aaSAndreas Gohr case 'ask': 937552f1aaSAndreas Gohr $this->ask($options->getArgs()[0]); 947552f1aaSAndreas Gohr break; 95c4584168SAndreas Gohr case 'chat': 96c4584168SAndreas Gohr $this->chat(); 97c4584168SAndreas Gohr break; 98e8451b21SAndreas Gohr case 'models': 99e8451b21SAndreas Gohr $this->models(); 100e8451b21SAndreas Gohr break; 101ad38c5fdSAndreas Gohr case 'split': 102ad38c5fdSAndreas Gohr $this->split($options->getArgs()[0]); 103ad38c5fdSAndreas Gohr break; 10401f06932SAndreas Gohr case 'page': 105dc355d57SAndreas Gohr $this->page($options->getArgs()[0], $options->getOpt('dump')); 10601f06932SAndreas Gohr break; 1075786be46SAndreas Gohr case 'info': 108f6ef2e50SAndreas Gohr $this->showinfo(); 1095786be46SAndreas Gohr break; 1108c8b7ba6SAndreas Gohr case 'tsv': 1118c8b7ba6SAndreas Gohr $args = $options->getArgs(); 1128c8b7ba6SAndreas Gohr $vector = $args[0] ?? 'vector.tsv'; 1138c8b7ba6SAndreas Gohr $meta = $args[1] ?? 'meta.tsv'; 1148c8b7ba6SAndreas Gohr $this->tsv($vector, $meta); 1158c8b7ba6SAndreas Gohr break; 1168817535bSAndreas Gohr default: 1178817535bSAndreas Gohr echo $options->help(); 1188817535bSAndreas Gohr } 1198817535bSAndreas Gohr } 1208817535bSAndreas Gohr 121c4584168SAndreas Gohr /** 1225786be46SAndreas Gohr * @return void 1235786be46SAndreas Gohr */ 124f6ef2e50SAndreas Gohr protected function showinfo() 1255786be46SAndreas Gohr { 1263379af09SAndreas Gohr $stats = [ 12799b713bfSAndreas Gohr 'chat model' => $this->getConf('chatmodel'), 12899b713bfSAndreas Gohr 'embed model' => $this->getConf('embedmodel'), 1293379af09SAndreas Gohr ]; 130e75dc39fSAndreas Gohr $stats = array_merge( 131e75dc39fSAndreas Gohr $stats, 132e75dc39fSAndreas Gohr array_map('dformat', $this->helper->getRunData()), 133e75dc39fSAndreas Gohr $this->helper->getStorage()->statistics() 134e75dc39fSAndreas Gohr ); 1353379af09SAndreas Gohr $this->printTable($stats); 1367ee8b02dSAndreas Gohr } 137911314cdSAndreas Gohr 1383379af09SAndreas Gohr /** 1393379af09SAndreas Gohr * Print key value data as tabular data 1403379af09SAndreas Gohr * 1413379af09SAndreas Gohr * @param array $data 1423379af09SAndreas Gohr * @param int $level 1433379af09SAndreas Gohr * @return void 1443379af09SAndreas Gohr */ 1453379af09SAndreas Gohr protected function printTable($data, $level = 0) 1463379af09SAndreas Gohr { 1473379af09SAndreas Gohr $tf = new TableFormatter($this->colors); 1483379af09SAndreas Gohr foreach ($data as $key => $value) { 1493379af09SAndreas Gohr if (is_array($value)) { 1503379af09SAndreas Gohr echo $tf->format( 151e75dc39fSAndreas Gohr [$level * 2, 20, '*'], 1523379af09SAndreas Gohr ['', $key, ''], 1533379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 1543379af09SAndreas Gohr ); 1553379af09SAndreas Gohr $this->printTable($value, $level + 1); 1563379af09SAndreas Gohr } else { 1573379af09SAndreas Gohr echo $tf->format( 158e75dc39fSAndreas Gohr [$level * 2, 20, '*'], 1593379af09SAndreas Gohr ['', $key, $value], 1603379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY] 1613379af09SAndreas Gohr ); 1623379af09SAndreas Gohr } 1633379af09SAndreas Gohr } 1645786be46SAndreas Gohr } 1655786be46SAndreas Gohr 1665786be46SAndreas Gohr /** 16701f06932SAndreas Gohr * Check chunk availability for a given page 16801f06932SAndreas Gohr * 16901f06932SAndreas Gohr * @param string $page 17001f06932SAndreas Gohr * @return void 17101f06932SAndreas Gohr */ 172dc355d57SAndreas Gohr protected function page($page, $dump = false) 17301f06932SAndreas Gohr { 17401f06932SAndreas Gohr $indexer = new Indexer(); 17501f06932SAndreas Gohr $pages = $indexer->getPages(); 17601f06932SAndreas Gohr $pos = array_search(cleanID($page), $pages); 17701f06932SAndreas Gohr 17801f06932SAndreas Gohr if ($pos === false) { 17901f06932SAndreas Gohr $this->error('Page not found'); 18001f06932SAndreas Gohr return; 18101f06932SAndreas Gohr } 18201f06932SAndreas Gohr 18301f06932SAndreas Gohr $storage = $this->helper->getStorage(); 18401f06932SAndreas Gohr $chunks = $storage->getPageChunks($page, $pos * 100); 18501f06932SAndreas Gohr if ($chunks) { 18601f06932SAndreas Gohr $this->success('Found ' . count($chunks) . ' chunks'); 187dc355d57SAndreas Gohr if ($dump) { 188dc355d57SAndreas Gohr echo json_encode($chunks, JSON_PRETTY_PRINT); 189dc355d57SAndreas Gohr } 19001f06932SAndreas Gohr } else { 19101f06932SAndreas Gohr $this->error('No chunks found'); 19201f06932SAndreas Gohr } 19301f06932SAndreas Gohr } 19401f06932SAndreas Gohr 19501f06932SAndreas Gohr /** 196ad38c5fdSAndreas Gohr * Split the given page into chunks and print them 197ad38c5fdSAndreas Gohr * 198ad38c5fdSAndreas Gohr * @param string $page 199ad38c5fdSAndreas Gohr * @return void 200ad38c5fdSAndreas Gohr * @throws Exception 201ad38c5fdSAndreas Gohr */ 202ad38c5fdSAndreas Gohr protected function split($page) 203ad38c5fdSAndreas Gohr { 204ad38c5fdSAndreas Gohr $text = rawWiki($page); 205ad38c5fdSAndreas Gohr $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 206ad38c5fdSAndreas Gohr foreach ($chunks as $chunk) { 207ad38c5fdSAndreas Gohr echo $chunk; 208ad38c5fdSAndreas Gohr echo "\n"; 209ad38c5fdSAndreas Gohr $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 210ad38c5fdSAndreas Gohr } 211ad38c5fdSAndreas Gohr $this->success('Split into ' . count($chunks) . ' chunks'); 212ad38c5fdSAndreas Gohr } 213ad38c5fdSAndreas Gohr 214ad38c5fdSAndreas Gohr /** 215c4584168SAndreas Gohr * Interactive Chat Session 216c4584168SAndreas Gohr * 217c4584168SAndreas Gohr * @return void 218c4584168SAndreas Gohr * @throws Exception 219c4584168SAndreas Gohr */ 220c4584168SAndreas Gohr protected function chat() 221c4584168SAndreas Gohr { 22234a1c478SAndreas Gohr if ($this->loglevel['debug']['enabled']) { 22334a1c478SAndreas Gohr $this->helper->getChatModel()->setDebug(true); 224*51aa8517SAndreas Gohr $this->helper->getRephraseModel()->setDebug(true); 225*51aa8517SAndreas Gohr $this->helper->getEmbedModel()->setDebug(true); 22634a1c478SAndreas Gohr } 22734a1c478SAndreas Gohr 228c4584168SAndreas Gohr $history = []; 229c4584168SAndreas Gohr while ($q = $this->readLine('Your Question')) { 2306a18e0f4SAndreas Gohr $this->helper->getChatModel()->resetUsageStats(); 231*51aa8517SAndreas Gohr $this->helper->getRephraseModel()->resetUsageStats(); 232*51aa8517SAndreas Gohr $this->helper->getEmbedModel()->resetUsageStats(); 233f6ef2e50SAndreas Gohr $result = $this->helper->askChatQuestion($q, $history); 234f6ef2e50SAndreas Gohr $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 235f6ef2e50SAndreas Gohr $history[] = [$result['question'], $result['answer']]; 236c4584168SAndreas Gohr $this->printAnswer($result); 237c4584168SAndreas Gohr } 238c4584168SAndreas Gohr } 239c4584168SAndreas Gohr 240e8451b21SAndreas Gohr protected function models() 241e8451b21SAndreas Gohr { 242e8451b21SAndreas Gohr $result = [ 243e8451b21SAndreas Gohr 'chat' => [], 244e8451b21SAndreas Gohr 'embedding' => [], 245e8451b21SAndreas Gohr ]; 246e8451b21SAndreas Gohr 247e8451b21SAndreas Gohr 248e8451b21SAndreas Gohr $jsons = glob(__DIR__ . '/Model/*/models.json'); 249e8451b21SAndreas Gohr foreach ($jsons as $json) { 250e8451b21SAndreas Gohr $models = json_decode(file_get_contents($json), true); 251e8451b21SAndreas Gohr foreach ($models as $type => $model) { 252e8451b21SAndreas Gohr $namespace = basename(dirname($json)); 253e8451b21SAndreas Gohr foreach ($model as $name => $info) { 254e8451b21SAndreas Gohr 255e8451b21SAndreas Gohr 256e8451b21SAndreas Gohr $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\' . ucfirst($type) . 'Model'; 257e8451b21SAndreas Gohr try { 258e8451b21SAndreas Gohr new $class($name, $this->conf); 259e8451b21SAndreas Gohr $info['confok'] = true; 260e8451b21SAndreas Gohr } catch (Exception $e) { 261e8451b21SAndreas Gohr $info['confok'] = false; 262e8451b21SAndreas Gohr } 263e8451b21SAndreas Gohr 264e8451b21SAndreas Gohr $result[$type]["$namespace $name"] = $info; 265e8451b21SAndreas Gohr } 266e8451b21SAndreas Gohr } 267e8451b21SAndreas Gohr } 268e8451b21SAndreas Gohr 269e8451b21SAndreas Gohr $td = new TableFormatter($this->colors); 270e8451b21SAndreas Gohr $cols = [30, 20, 20, '*']; 271e8451b21SAndreas Gohr echo "==== Chat Models ====\n\n"; 272e8451b21SAndreas Gohr echo $td->format( 273e8451b21SAndreas Gohr $cols, 274e8451b21SAndreas Gohr ['Model', 'Token Limits', 'Price USD/M', 'Description'], 275e8451b21SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 276e8451b21SAndreas Gohr ); 277e8451b21SAndreas Gohr foreach ($result['chat'] as $name => $info) { 278e8451b21SAndreas Gohr echo $td->format( 279e8451b21SAndreas Gohr $cols, 280e8451b21SAndreas Gohr [ 281e8451b21SAndreas Gohr $name, 282e8451b21SAndreas Gohr sprintf(" In: %7d\nOut: %7d", $info['inputTokens'], $info['outputTokens']), 2832045e15aSAndreas Gohr sprintf(" In: %.2f\nOut: %.2f", $info['inputTokenPrice'], $info['outputTokenPrice']), 284e8451b21SAndreas Gohr $info['description'] . "\n" 285e8451b21SAndreas Gohr ], 286e8451b21SAndreas Gohr [ 287e8451b21SAndreas Gohr $info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED, 288e8451b21SAndreas Gohr ] 289e8451b21SAndreas Gohr ); 290e8451b21SAndreas Gohr } 291e8451b21SAndreas Gohr 29287e46484SAndreas Gohr $cols = [30, 10, 10, 10, '*']; 293e8451b21SAndreas Gohr echo "==== Embedding Models ====\n\n"; 294e8451b21SAndreas Gohr echo $td->format( 295e8451b21SAndreas Gohr $cols, 29687e46484SAndreas Gohr ['Model', 'Token Limits', 'Price USD/M', 'Dimensions', 'Description'], 29787e46484SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 298e8451b21SAndreas Gohr ); 299e8451b21SAndreas Gohr foreach ($result['embedding'] as $name => $info) { 300e8451b21SAndreas Gohr echo $td->format( 301e8451b21SAndreas Gohr $cols, 302e8451b21SAndreas Gohr [ 303e8451b21SAndreas Gohr $name, 304e8451b21SAndreas Gohr sprintf("%7d", $info['inputTokens']), 305e8451b21SAndreas Gohr sprintf("%.2f", $info['inputTokenPrice']), 30687e46484SAndreas Gohr $info['dimensions'], 307e8451b21SAndreas Gohr $info['description'] . "\n" 308e8451b21SAndreas Gohr ], 309e8451b21SAndreas Gohr [ 310e8451b21SAndreas Gohr $info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED, 311e8451b21SAndreas Gohr ] 312e8451b21SAndreas Gohr ); 313e8451b21SAndreas Gohr } 314e8451b21SAndreas Gohr 315e8451b21SAndreas Gohr $this->colors->ptln('Current prices may differ', Colors::C_RED); 316e8451b21SAndreas Gohr } 317e8451b21SAndreas Gohr 318c4584168SAndreas Gohr /** 319c4584168SAndreas Gohr * Handle a single, standalone question 320c4584168SAndreas Gohr * 321c4584168SAndreas Gohr * @param string $query 322c4584168SAndreas Gohr * @return void 323c4584168SAndreas Gohr * @throws Exception 324c4584168SAndreas Gohr */ 325c4584168SAndreas Gohr protected function ask($query) 326c4584168SAndreas Gohr { 32734a1c478SAndreas Gohr if ($this->loglevel['debug']['enabled']) { 32834a1c478SAndreas Gohr $this->helper->getChatModel()->setDebug(true); 329*51aa8517SAndreas Gohr $this->helper->getRephraseModel()->setDebug(true); 330*51aa8517SAndreas Gohr $this->helper->getEmbedModel()->setDebug(true); 33134a1c478SAndreas Gohr } 33234a1c478SAndreas Gohr 3330337f47fSAndreas Gohr $result = $this->helper->askQuestion($query); 334c4584168SAndreas Gohr $this->printAnswer($result); 3357552f1aaSAndreas Gohr } 3367552f1aaSAndreas Gohr 337c4584168SAndreas Gohr /** 338c4584168SAndreas Gohr * Get the pages that are similar to the query 339c4584168SAndreas Gohr * 340c4584168SAndreas Gohr * @param string $query 341c4584168SAndreas Gohr * @return void 342c4584168SAndreas Gohr */ 3438817535bSAndreas Gohr protected function similar($query) 3448817535bSAndreas Gohr { 345e33a1d7aSAndreas Gohr $langlimit = $this->helper->getLanguageLimit(); 346e33a1d7aSAndreas Gohr if ($langlimit) { 347e33a1d7aSAndreas Gohr $this->info('Limiting results to {lang}', ['lang' => $langlimit]); 348e33a1d7aSAndreas Gohr } 349e33a1d7aSAndreas Gohr 350e33a1d7aSAndreas Gohr $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit); 351f6ef2e50SAndreas Gohr $this->printSources($sources); 3528817535bSAndreas Gohr } 3538817535bSAndreas Gohr 354c4584168SAndreas Gohr /** 3553379af09SAndreas Gohr * Run the maintenance tasks 3563379af09SAndreas Gohr * 3573379af09SAndreas Gohr * @return void 3583379af09SAndreas Gohr */ 3593379af09SAndreas Gohr protected function runMaintenance() 3603379af09SAndreas Gohr { 3613379af09SAndreas Gohr $start = time(); 3623379af09SAndreas Gohr $this->helper->getStorage()->runMaintenance(); 3633379af09SAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 3643379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 365e75dc39fSAndreas Gohr 366e75dc39fSAndreas Gohr $data = $this->helper->getRunData(); 367e75dc39fSAndreas Gohr $data['maintenance ran at'] = time(); 368e75dc39fSAndreas Gohr $this->helper->setRunData($data); 3693379af09SAndreas Gohr } 3703379af09SAndreas Gohr 3713379af09SAndreas Gohr /** 372c4584168SAndreas Gohr * Recreate chunks and embeddings for all pages 373c4584168SAndreas Gohr * 374c4584168SAndreas Gohr * @return void 375c4584168SAndreas Gohr */ 3765284515dSAndreas Gohr protected function createEmbeddings($clear) 3778817535bSAndreas Gohr { 378d5c102b3SAndreas Gohr [$skipRE, $matchRE] = $this->getRegexps(); 379d5c102b3SAndreas Gohr 3803379af09SAndreas Gohr $start = time(); 381d5c102b3SAndreas Gohr $this->helper->getEmbeddings()->createNewIndex($skipRE, $matchRE, $clear); 382ad38c5fdSAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 3833379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 384e75dc39fSAndreas Gohr 385e75dc39fSAndreas Gohr $data = $this->helper->getRunData(); 386e75dc39fSAndreas Gohr $data['embed ran at'] = time(); 387e75dc39fSAndreas Gohr $this->helper->setRunData($data); 3888817535bSAndreas Gohr } 3898817535bSAndreas Gohr 390c4584168SAndreas Gohr /** 3918c8b7ba6SAndreas Gohr * Dump TSV files for debugging 3928c8b7ba6SAndreas Gohr * 3938c8b7ba6SAndreas Gohr * @return void 3948c8b7ba6SAndreas Gohr */ 3958c8b7ba6SAndreas Gohr protected function tsv($vector, $meta) 3968c8b7ba6SAndreas Gohr { 3978c8b7ba6SAndreas Gohr 3988c8b7ba6SAndreas Gohr $storage = $this->helper->getStorage(); 3998c8b7ba6SAndreas Gohr $storage->dumpTSV($vector, $meta); 4008c8b7ba6SAndreas Gohr $this->success('written to ' . $vector . ' and ' . $meta); 4018c8b7ba6SAndreas Gohr } 4028c8b7ba6SAndreas Gohr 4038c8b7ba6SAndreas Gohr /** 40455392016SAndreas Gohr * Print the given detailed answer in a nice way 40555392016SAndreas Gohr * 40655392016SAndreas Gohr * @param array $answer 40755392016SAndreas Gohr * @return void 40855392016SAndreas Gohr */ 40955392016SAndreas Gohr protected function printAnswer($answer) 41055392016SAndreas Gohr { 41155392016SAndreas Gohr $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 41255392016SAndreas Gohr echo "\n"; 413f6ef2e50SAndreas Gohr $this->printSources($answer['sources']); 41455392016SAndreas Gohr echo "\n"; 41555392016SAndreas Gohr $this->printUsage(); 41655392016SAndreas Gohr } 41755392016SAndreas Gohr 41855392016SAndreas Gohr /** 419f6ef2e50SAndreas Gohr * Print the given sources 420f6ef2e50SAndreas Gohr * 421f6ef2e50SAndreas Gohr * @param Chunk[] $sources 422f6ef2e50SAndreas Gohr * @return void 423f6ef2e50SAndreas Gohr */ 424f6ef2e50SAndreas Gohr protected function printSources($sources) 425f6ef2e50SAndreas Gohr { 426f6ef2e50SAndreas Gohr foreach ($sources as $source) { 427f6ef2e50SAndreas Gohr /** @var Chunk $source */ 4289b3d1b36SAndreas Gohr $this->colors->ptln( 4299b3d1b36SAndreas Gohr "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 4309b3d1b36SAndreas Gohr Colors::C_LIGHTBLUE 4319b3d1b36SAndreas Gohr ); 432f6ef2e50SAndreas Gohr } 433f6ef2e50SAndreas Gohr } 434f6ef2e50SAndreas Gohr 435f6ef2e50SAndreas Gohr /** 43655392016SAndreas Gohr * Print the usage statistics for OpenAI 43755392016SAndreas Gohr * 43855392016SAndreas Gohr * @return void 43955392016SAndreas Gohr */ 440f6ef2e50SAndreas Gohr protected function printUsage() 441f6ef2e50SAndreas Gohr { 442*51aa8517SAndreas Gohr $chat = $this->helper->getChatModel()->getUsageStats(); 443*51aa8517SAndreas Gohr $rephrase = $this->helper->getRephraseModel()->getUsageStats(); 444*51aa8517SAndreas Gohr $embed = $this->helper->getEmbedModel()->getUsageStats(); 445*51aa8517SAndreas Gohr 44655392016SAndreas Gohr $this->info( 447*51aa8517SAndreas Gohr 'Made {requests} requests in {time}s to models. Used {tokens} tokens for about ${cost}.', 448*51aa8517SAndreas Gohr [ 449*51aa8517SAndreas Gohr 'requests' => $chat['requests'] + $rephrase['requests'] + $embed['requests'], 450*51aa8517SAndreas Gohr 'time' => $chat['time'] + $rephrase['time'] + $embed['time'], 451*51aa8517SAndreas Gohr 'tokens' => $chat['tokens'] + $chat['tokens'] + $embed['tokens'], 452*51aa8517SAndreas Gohr 'cost' => $chat['cost'] + $chat['cost'] + $embed['cost'], 453*51aa8517SAndreas Gohr ] 45455392016SAndreas Gohr ); 45555392016SAndreas Gohr } 45655392016SAndreas Gohr 45755392016SAndreas Gohr /** 458c4584168SAndreas Gohr * Interactively ask for a value from the user 459c4584168SAndreas Gohr * 460c4584168SAndreas Gohr * @param string $prompt 461c4584168SAndreas Gohr * @return string 462c4584168SAndreas Gohr */ 463c4584168SAndreas Gohr protected function readLine($prompt) 464c4584168SAndreas Gohr { 465c4584168SAndreas Gohr $value = ''; 4668817535bSAndreas Gohr 467c4584168SAndreas Gohr while ($value === '') { 468c4584168SAndreas Gohr echo $prompt; 469c4584168SAndreas Gohr echo ': '; 470c4584168SAndreas Gohr 471c4584168SAndreas Gohr $fh = fopen('php://stdin', 'r'); 472c4584168SAndreas Gohr $value = trim(fgets($fh)); 473c4584168SAndreas Gohr fclose($fh); 474c4584168SAndreas Gohr } 475c4584168SAndreas Gohr 476c4584168SAndreas Gohr return $value; 477c4584168SAndreas Gohr } 478d5c102b3SAndreas Gohr 479d5c102b3SAndreas Gohr /** 480d5c102b3SAndreas Gohr * Read the skip and match regex from the config 481d5c102b3SAndreas Gohr * 482d5c102b3SAndreas Gohr * Ensures the regular expressions are valid 483d5c102b3SAndreas Gohr * 484d5c102b3SAndreas Gohr * @return string[] [$skipRE, $matchRE] 485d5c102b3SAndreas Gohr */ 486d5c102b3SAndreas Gohr protected function getRegexps() 487d5c102b3SAndreas Gohr { 488d5c102b3SAndreas Gohr $skip = $this->getConf('skipRegex'); 489d5c102b3SAndreas Gohr $skipRE = ''; 490d5c102b3SAndreas Gohr $match = $this->getConf('matchRegex'); 491d5c102b3SAndreas Gohr $matchRE = ''; 492d5c102b3SAndreas Gohr 493d5c102b3SAndreas Gohr if ($skip) { 494d5c102b3SAndreas Gohr $skipRE = '/' . $skip . '/'; 49549a7d3ccSsplitbrain if (@preg_match($skipRE, '') === false) { 496d5c102b3SAndreas Gohr $this->error(preg_last_error_msg()); 497d5c102b3SAndreas Gohr $this->error('Invalid regular expression in $conf[\'skipRegex\']. Ignored.'); 498d5c102b3SAndreas Gohr $skipRE = ''; 499d5c102b3SAndreas Gohr } else { 500d5c102b3SAndreas Gohr $this->success('Skipping pages matching ' . $skipRE); 501d5c102b3SAndreas Gohr } 502d5c102b3SAndreas Gohr } 503d5c102b3SAndreas Gohr 504d5c102b3SAndreas Gohr if ($match) { 505d5c102b3SAndreas Gohr $matchRE = '/' . $match . '/'; 50649a7d3ccSsplitbrain if (@preg_match($matchRE, '') === false) { 507d5c102b3SAndreas Gohr $this->error(preg_last_error_msg()); 508d5c102b3SAndreas Gohr $this->error('Invalid regular expression in $conf[\'matchRegex\']. Ignored.'); 509d5c102b3SAndreas Gohr $matchRE = ''; 510d5c102b3SAndreas Gohr } else { 511d5c102b3SAndreas Gohr $this->success('Only indexing pages matching ' . $matchRE); 512d5c102b3SAndreas Gohr } 513d5c102b3SAndreas Gohr } 514d5c102b3SAndreas Gohr return [$skipRE, $matchRE]; 515d5c102b3SAndreas Gohr } 5168817535bSAndreas Gohr} 517