18817535bSAndreas Gohr<?php 28817535bSAndreas Gohr 3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 4*0de7e020SAndreas Gohruse dokuwiki\plugin\aichat\AbstractCLI; 5f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 6c2b7a1f7SAndreas Gohruse dokuwiki\plugin\aichat\ModelFactory; 701f06932SAndreas Gohruse dokuwiki\Search\Indexer; 8c4584168SAndreas Gohruse splitbrain\phpcli\Colors; 98817535bSAndreas Gohruse splitbrain\phpcli\Options; 103379af09SAndreas Gohruse splitbrain\phpcli\TableFormatter; 118817535bSAndreas Gohr 128817535bSAndreas Gohr/** 138817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component) 148817535bSAndreas Gohr * 158817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 168817535bSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 178817535bSAndreas Gohr */ 18*0de7e020SAndreas Gohrclass cli_plugin_aichat extends AbstractCLI 198817535bSAndreas Gohr{ 200337f47fSAndreas Gohr /** @var helper_plugin_aichat */ 210337f47fSAndreas Gohr protected $helper; 220337f47fSAndreas Gohr 238817535bSAndreas Gohr /** @inheritDoc */ 248817535bSAndreas Gohr protected function setup(Options $options) 258817535bSAndreas Gohr { 26*0de7e020SAndreas Gohr parent::setup($options); 27bddd899cSAndreas Gohr 285284515dSAndreas Gohr $options->setHelp( 295284515dSAndreas Gohr 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 305284515dSAndreas Gohr 'This may incur costs.' 315284515dSAndreas Gohr ); 328817535bSAndreas Gohr 33*0de7e020SAndreas Gohr $options->registerOption( 34*0de7e020SAndreas Gohr 'model', 35*0de7e020SAndreas Gohr 'Overrides the chat and rephrasing model settings and uses this model instead', 36*0de7e020SAndreas Gohr '', 37*0de7e020SAndreas Gohr 'model' 38*0de7e020SAndreas Gohr ); 39*0de7e020SAndreas Gohr 405284515dSAndreas Gohr $options->registerCommand( 415284515dSAndreas Gohr 'embed', 425284515dSAndreas Gohr 'Create embeddings for all pages. This skips pages that already have embeddings' 435284515dSAndreas Gohr ); 445284515dSAndreas Gohr $options->registerOption( 455284515dSAndreas Gohr 'clear', 465284515dSAndreas Gohr 'Clear all existing embeddings before creating new ones', 477ebc7895Ssplitbrain 'c', 487ebc7895Ssplitbrain false, 497ebc7895Ssplitbrain 'embed' 505284515dSAndreas Gohr ); 518817535bSAndreas Gohr 52e8451b21SAndreas Gohr $options->registerCommand('maintenance', 'Run storage maintenance. Refer to the documentation for details.'); 533379af09SAndreas Gohr 548817535bSAndreas Gohr $options->registerCommand('similar', 'Search for similar pages'); 558817535bSAndreas Gohr $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 568817535bSAndreas Gohr 578817535bSAndreas Gohr $options->registerCommand('ask', 'Ask a question'); 588817535bSAndreas Gohr $options->registerArgument('question', 'The question to ask', true, 'ask'); 59c4584168SAndreas Gohr 60c4584168SAndreas Gohr $options->registerCommand('chat', 'Start an interactive chat session'); 61ad38c5fdSAndreas Gohr 62e8451b21SAndreas Gohr $options->registerCommand('models', 'List available models'); 63e8451b21SAndreas Gohr 64e75dc39fSAndreas Gohr $options->registerCommand('info', 'Get Info about the vector storage and other stats'); 658c8b7ba6SAndreas Gohr 66ad38c5fdSAndreas Gohr $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 67ad38c5fdSAndreas Gohr $options->registerArgument('page', 'The page to split', true, 'split'); 685786be46SAndreas Gohr 6901f06932SAndreas Gohr $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 7001f06932SAndreas Gohr $options->registerArgument('page', 'The page to check', true, 'page'); 71dc355d57SAndreas Gohr $options->registerOption('dump', 'Dump the chunks', 'd', false, 'page'); 7201f06932SAndreas Gohr 738c8b7ba6SAndreas Gohr $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' . 748c8b7ba6SAndreas Gohr ' Not supported on all storages.'); 758c8b7ba6SAndreas Gohr $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv'); 768c8b7ba6SAndreas Gohr $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv'); 778817535bSAndreas Gohr } 788817535bSAndreas Gohr 798817535bSAndreas Gohr /** @inheritDoc */ 808817535bSAndreas Gohr protected function main(Options $options) 818817535bSAndreas Gohr { 82*0de7e020SAndreas Gohr parent::main($options); 83*0de7e020SAndreas Gohr 84*0de7e020SAndreas Gohr $model = $options->getOpt('model'); 85*0de7e020SAndreas Gohr if($model) { 86*0de7e020SAndreas Gohr $this->helper->updateConfig( 87*0de7e020SAndreas Gohr ['chatmodel' => $model, 'rephasemodel' => $model] 88*0de7e020SAndreas Gohr ); 89c2b7a1f7SAndreas Gohr } 90c2b7a1f7SAndreas Gohr 918817535bSAndreas Gohr switch ($options->getCmd()) { 928817535bSAndreas Gohr case 'embed': 935284515dSAndreas Gohr $this->createEmbeddings($options->getOpt('clear')); 948817535bSAndreas Gohr break; 953379af09SAndreas Gohr case 'maintenance': 963379af09SAndreas Gohr $this->runMaintenance(); 973379af09SAndreas Gohr break; 988817535bSAndreas Gohr case 'similar': 998817535bSAndreas Gohr $this->similar($options->getArgs()[0]); 1008817535bSAndreas Gohr break; 1017552f1aaSAndreas Gohr case 'ask': 1027552f1aaSAndreas Gohr $this->ask($options->getArgs()[0]); 1037552f1aaSAndreas Gohr break; 104c4584168SAndreas Gohr case 'chat': 105c4584168SAndreas Gohr $this->chat(); 106c4584168SAndreas Gohr break; 107e8451b21SAndreas Gohr case 'models': 108e8451b21SAndreas Gohr $this->models(); 109e8451b21SAndreas Gohr break; 110ad38c5fdSAndreas Gohr case 'split': 111ad38c5fdSAndreas Gohr $this->split($options->getArgs()[0]); 112ad38c5fdSAndreas Gohr break; 11301f06932SAndreas Gohr case 'page': 114dc355d57SAndreas Gohr $this->page($options->getArgs()[0], $options->getOpt('dump')); 11501f06932SAndreas Gohr break; 1165786be46SAndreas Gohr case 'info': 117f6ef2e50SAndreas Gohr $this->showinfo(); 1185786be46SAndreas Gohr break; 1198c8b7ba6SAndreas Gohr case 'tsv': 1208c8b7ba6SAndreas Gohr $args = $options->getArgs(); 1218c8b7ba6SAndreas Gohr $vector = $args[0] ?? 'vector.tsv'; 1228c8b7ba6SAndreas Gohr $meta = $args[1] ?? 'meta.tsv'; 1238c8b7ba6SAndreas Gohr $this->tsv($vector, $meta); 1248c8b7ba6SAndreas Gohr break; 1258817535bSAndreas Gohr default: 1268817535bSAndreas Gohr echo $options->help(); 1278817535bSAndreas Gohr } 1288817535bSAndreas Gohr } 1298817535bSAndreas Gohr 130c4584168SAndreas Gohr /** 1315786be46SAndreas Gohr * @return void 1325786be46SAndreas Gohr */ 133f6ef2e50SAndreas Gohr protected function showinfo() 1345786be46SAndreas Gohr { 1353379af09SAndreas Gohr $stats = [ 13699b713bfSAndreas Gohr 'chat model' => $this->getConf('chatmodel'), 13799b713bfSAndreas Gohr 'embed model' => $this->getConf('embedmodel'), 1383379af09SAndreas Gohr ]; 139e75dc39fSAndreas Gohr $stats = array_merge( 140e75dc39fSAndreas Gohr $stats, 141e75dc39fSAndreas Gohr array_map('dformat', $this->helper->getRunData()), 142e75dc39fSAndreas Gohr $this->helper->getStorage()->statistics() 143e75dc39fSAndreas Gohr ); 1443379af09SAndreas Gohr $this->printTable($stats); 1457ee8b02dSAndreas Gohr } 146911314cdSAndreas Gohr 1473379af09SAndreas Gohr /** 1483379af09SAndreas Gohr * Print key value data as tabular data 1493379af09SAndreas Gohr * 1503379af09SAndreas Gohr * @param array $data 1513379af09SAndreas Gohr * @param int $level 1523379af09SAndreas Gohr * @return void 1533379af09SAndreas Gohr */ 1543379af09SAndreas Gohr protected function printTable($data, $level = 0) 1553379af09SAndreas Gohr { 1563379af09SAndreas Gohr $tf = new TableFormatter($this->colors); 1573379af09SAndreas Gohr foreach ($data as $key => $value) { 1583379af09SAndreas Gohr if (is_array($value)) { 1593379af09SAndreas Gohr echo $tf->format( 160e75dc39fSAndreas Gohr [$level * 2, 20, '*'], 1613379af09SAndreas Gohr ['', $key, ''], 1623379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 1633379af09SAndreas Gohr ); 1643379af09SAndreas Gohr $this->printTable($value, $level + 1); 1653379af09SAndreas Gohr } else { 1663379af09SAndreas Gohr echo $tf->format( 167e75dc39fSAndreas Gohr [$level * 2, 20, '*'], 1683379af09SAndreas Gohr ['', $key, $value], 1693379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY] 1703379af09SAndreas Gohr ); 1713379af09SAndreas Gohr } 1723379af09SAndreas Gohr } 1735786be46SAndreas Gohr } 1745786be46SAndreas Gohr 1755786be46SAndreas Gohr /** 17601f06932SAndreas Gohr * Check chunk availability for a given page 17701f06932SAndreas Gohr * 17801f06932SAndreas Gohr * @param string $page 17901f06932SAndreas Gohr * @return void 18001f06932SAndreas Gohr */ 181dc355d57SAndreas Gohr protected function page($page, $dump = false) 18201f06932SAndreas Gohr { 18301f06932SAndreas Gohr $indexer = new Indexer(); 18401f06932SAndreas Gohr $pages = $indexer->getPages(); 18501f06932SAndreas Gohr $pos = array_search(cleanID($page), $pages); 18601f06932SAndreas Gohr 18701f06932SAndreas Gohr if ($pos === false) { 18801f06932SAndreas Gohr $this->error('Page not found'); 18901f06932SAndreas Gohr return; 19001f06932SAndreas Gohr } 19101f06932SAndreas Gohr 19201f06932SAndreas Gohr $storage = $this->helper->getStorage(); 19301f06932SAndreas Gohr $chunks = $storage->getPageChunks($page, $pos * 100); 19401f06932SAndreas Gohr if ($chunks) { 19501f06932SAndreas Gohr $this->success('Found ' . count($chunks) . ' chunks'); 196dc355d57SAndreas Gohr if ($dump) { 197dc355d57SAndreas Gohr echo json_encode($chunks, JSON_PRETTY_PRINT); 198dc355d57SAndreas Gohr } 19901f06932SAndreas Gohr } else { 20001f06932SAndreas Gohr $this->error('No chunks found'); 20101f06932SAndreas Gohr } 20201f06932SAndreas Gohr } 20301f06932SAndreas Gohr 20401f06932SAndreas Gohr /** 205ad38c5fdSAndreas Gohr * Split the given page into chunks and print them 206ad38c5fdSAndreas Gohr * 207ad38c5fdSAndreas Gohr * @param string $page 208ad38c5fdSAndreas Gohr * @return void 209ad38c5fdSAndreas Gohr * @throws Exception 210ad38c5fdSAndreas Gohr */ 211ad38c5fdSAndreas Gohr protected function split($page) 212ad38c5fdSAndreas Gohr { 213ad38c5fdSAndreas Gohr $text = rawWiki($page); 214ad38c5fdSAndreas Gohr $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 215ad38c5fdSAndreas Gohr foreach ($chunks as $chunk) { 216ad38c5fdSAndreas Gohr echo $chunk; 217ad38c5fdSAndreas Gohr echo "\n"; 218ad38c5fdSAndreas Gohr $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 219ad38c5fdSAndreas Gohr } 220ad38c5fdSAndreas Gohr $this->success('Split into ' . count($chunks) . ' chunks'); 221ad38c5fdSAndreas Gohr } 222ad38c5fdSAndreas Gohr 223ad38c5fdSAndreas Gohr /** 224c4584168SAndreas Gohr * Interactive Chat Session 225c4584168SAndreas Gohr * 226c4584168SAndreas Gohr * @return void 227c4584168SAndreas Gohr * @throws Exception 228c4584168SAndreas Gohr */ 229c4584168SAndreas Gohr protected function chat() 230c4584168SAndreas Gohr { 231c4584168SAndreas Gohr $history = []; 232c4584168SAndreas Gohr while ($q = $this->readLine('Your Question')) { 2336a18e0f4SAndreas Gohr $this->helper->getChatModel()->resetUsageStats(); 23451aa8517SAndreas Gohr $this->helper->getRephraseModel()->resetUsageStats(); 235c2b7a1f7SAndreas Gohr $this->helper->getEmbeddingModel()->resetUsageStats(); 236f6ef2e50SAndreas Gohr $result = $this->helper->askChatQuestion($q, $history); 237f6ef2e50SAndreas Gohr $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 238f6ef2e50SAndreas Gohr $history[] = [$result['question'], $result['answer']]; 239c4584168SAndreas Gohr $this->printAnswer($result); 240c4584168SAndreas Gohr } 241c4584168SAndreas Gohr } 242c4584168SAndreas Gohr 243c2b7a1f7SAndreas Gohr /** 244c2b7a1f7SAndreas Gohr * Print information about the available models 245c2b7a1f7SAndreas Gohr * 246c2b7a1f7SAndreas Gohr * @return void 247c2b7a1f7SAndreas Gohr */ 248e8451b21SAndreas Gohr protected function models() 249e8451b21SAndreas Gohr { 250c2b7a1f7SAndreas Gohr $result = (new ModelFactory($this->conf))->getModels(); 251e8451b21SAndreas Gohr 252e8451b21SAndreas Gohr $td = new TableFormatter($this->colors); 253e8451b21SAndreas Gohr $cols = [30, 20, 20, '*']; 254e8451b21SAndreas Gohr echo "==== Chat Models ====\n\n"; 255e8451b21SAndreas Gohr echo $td->format( 256e8451b21SAndreas Gohr $cols, 257e8451b21SAndreas Gohr ['Model', 'Token Limits', 'Price USD/M', 'Description'], 258e8451b21SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 259e8451b21SAndreas Gohr ); 260e8451b21SAndreas Gohr foreach ($result['chat'] as $name => $info) { 261e8451b21SAndreas Gohr echo $td->format( 262e8451b21SAndreas Gohr $cols, 263e8451b21SAndreas Gohr [ 264e8451b21SAndreas Gohr $name, 265e8451b21SAndreas Gohr sprintf(" In: %7d\nOut: %7d", $info['inputTokens'], $info['outputTokens']), 2662045e15aSAndreas Gohr sprintf(" In: %.2f\nOut: %.2f", $info['inputTokenPrice'], $info['outputTokenPrice']), 267e8451b21SAndreas Gohr $info['description'] . "\n" 268e8451b21SAndreas Gohr ], 269e8451b21SAndreas Gohr [ 270c2b7a1f7SAndreas Gohr $info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED, 271e8451b21SAndreas Gohr ] 272e8451b21SAndreas Gohr ); 273e8451b21SAndreas Gohr } 274e8451b21SAndreas Gohr 27587e46484SAndreas Gohr $cols = [30, 10, 10, 10, '*']; 276e8451b21SAndreas Gohr echo "==== Embedding Models ====\n\n"; 277e8451b21SAndreas Gohr echo $td->format( 278e8451b21SAndreas Gohr $cols, 27987e46484SAndreas Gohr ['Model', 'Token Limits', 'Price USD/M', 'Dimensions', 'Description'], 28087e46484SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 281e8451b21SAndreas Gohr ); 282e8451b21SAndreas Gohr foreach ($result['embedding'] as $name => $info) { 283e8451b21SAndreas Gohr echo $td->format( 284e8451b21SAndreas Gohr $cols, 285e8451b21SAndreas Gohr [ 286e8451b21SAndreas Gohr $name, 287e8451b21SAndreas Gohr sprintf("%7d", $info['inputTokens']), 288e8451b21SAndreas Gohr sprintf("%.2f", $info['inputTokenPrice']), 28987e46484SAndreas Gohr $info['dimensions'], 290e8451b21SAndreas Gohr $info['description'] . "\n" 291e8451b21SAndreas Gohr ], 292e8451b21SAndreas Gohr [ 293c2b7a1f7SAndreas Gohr $info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED, 294e8451b21SAndreas Gohr ] 295e8451b21SAndreas Gohr ); 296e8451b21SAndreas Gohr } 297e8451b21SAndreas Gohr 298e8451b21SAndreas Gohr $this->colors->ptln('Current prices may differ', Colors::C_RED); 299e8451b21SAndreas Gohr } 300e8451b21SAndreas Gohr 301c4584168SAndreas Gohr /** 302c4584168SAndreas Gohr * Handle a single, standalone question 303c4584168SAndreas Gohr * 304c4584168SAndreas Gohr * @param string $query 305c4584168SAndreas Gohr * @return void 306c4584168SAndreas Gohr * @throws Exception 307c4584168SAndreas Gohr */ 308c4584168SAndreas Gohr protected function ask($query) 309c4584168SAndreas Gohr { 3100337f47fSAndreas Gohr $result = $this->helper->askQuestion($query); 311c4584168SAndreas Gohr $this->printAnswer($result); 3127552f1aaSAndreas Gohr } 3137552f1aaSAndreas Gohr 314c4584168SAndreas Gohr /** 315c4584168SAndreas Gohr * Get the pages that are similar to the query 316c4584168SAndreas Gohr * 317c4584168SAndreas Gohr * @param string $query 318c4584168SAndreas Gohr * @return void 319c4584168SAndreas Gohr */ 3208817535bSAndreas Gohr protected function similar($query) 3218817535bSAndreas Gohr { 322e33a1d7aSAndreas Gohr $langlimit = $this->helper->getLanguageLimit(); 323e33a1d7aSAndreas Gohr if ($langlimit) { 324e33a1d7aSAndreas Gohr $this->info('Limiting results to {lang}', ['lang' => $langlimit]); 325e33a1d7aSAndreas Gohr } 326e33a1d7aSAndreas Gohr 327e33a1d7aSAndreas Gohr $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit); 328f6ef2e50SAndreas Gohr $this->printSources($sources); 3298817535bSAndreas Gohr } 3308817535bSAndreas Gohr 331c4584168SAndreas Gohr /** 3323379af09SAndreas Gohr * Run the maintenance tasks 3333379af09SAndreas Gohr * 3343379af09SAndreas Gohr * @return void 3353379af09SAndreas Gohr */ 3363379af09SAndreas Gohr protected function runMaintenance() 3373379af09SAndreas Gohr { 3383379af09SAndreas Gohr $start = time(); 3393379af09SAndreas Gohr $this->helper->getStorage()->runMaintenance(); 3403379af09SAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 3413379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 342e75dc39fSAndreas Gohr 343e75dc39fSAndreas Gohr $data = $this->helper->getRunData(); 344e75dc39fSAndreas Gohr $data['maintenance ran at'] = time(); 345e75dc39fSAndreas Gohr $this->helper->setRunData($data); 3463379af09SAndreas Gohr } 3473379af09SAndreas Gohr 3483379af09SAndreas Gohr /** 349c4584168SAndreas Gohr * Recreate chunks and embeddings for all pages 350c4584168SAndreas Gohr * 351c4584168SAndreas Gohr * @return void 352c4584168SAndreas Gohr */ 3535284515dSAndreas Gohr protected function createEmbeddings($clear) 3548817535bSAndreas Gohr { 355d5c102b3SAndreas Gohr [$skipRE, $matchRE] = $this->getRegexps(); 356d5c102b3SAndreas Gohr 3573379af09SAndreas Gohr $start = time(); 358d5c102b3SAndreas Gohr $this->helper->getEmbeddings()->createNewIndex($skipRE, $matchRE, $clear); 359ad38c5fdSAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 3603379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 361e75dc39fSAndreas Gohr 362e75dc39fSAndreas Gohr $data = $this->helper->getRunData(); 363e75dc39fSAndreas Gohr $data['embed ran at'] = time(); 364e75dc39fSAndreas Gohr $this->helper->setRunData($data); 3658817535bSAndreas Gohr } 3668817535bSAndreas Gohr 367c4584168SAndreas Gohr /** 3688c8b7ba6SAndreas Gohr * Dump TSV files for debugging 3698c8b7ba6SAndreas Gohr * 3708c8b7ba6SAndreas Gohr * @return void 3718c8b7ba6SAndreas Gohr */ 3728c8b7ba6SAndreas Gohr protected function tsv($vector, $meta) 3738c8b7ba6SAndreas Gohr { 3748c8b7ba6SAndreas Gohr 3758c8b7ba6SAndreas Gohr $storage = $this->helper->getStorage(); 3768c8b7ba6SAndreas Gohr $storage->dumpTSV($vector, $meta); 3778c8b7ba6SAndreas Gohr $this->success('written to ' . $vector . ' and ' . $meta); 3788c8b7ba6SAndreas Gohr } 3798c8b7ba6SAndreas Gohr 3808c8b7ba6SAndreas Gohr /** 38155392016SAndreas Gohr * Print the given detailed answer in a nice way 38255392016SAndreas Gohr * 38355392016SAndreas Gohr * @param array $answer 38455392016SAndreas Gohr * @return void 38555392016SAndreas Gohr */ 38655392016SAndreas Gohr protected function printAnswer($answer) 38755392016SAndreas Gohr { 38855392016SAndreas Gohr $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 38955392016SAndreas Gohr echo "\n"; 390f6ef2e50SAndreas Gohr $this->printSources($answer['sources']); 39155392016SAndreas Gohr echo "\n"; 39255392016SAndreas Gohr $this->printUsage(); 39355392016SAndreas Gohr } 39455392016SAndreas Gohr 39555392016SAndreas Gohr /** 396f6ef2e50SAndreas Gohr * Print the given sources 397f6ef2e50SAndreas Gohr * 398f6ef2e50SAndreas Gohr * @param Chunk[] $sources 399f6ef2e50SAndreas Gohr * @return void 400f6ef2e50SAndreas Gohr */ 401f6ef2e50SAndreas Gohr protected function printSources($sources) 402f6ef2e50SAndreas Gohr { 403f6ef2e50SAndreas Gohr foreach ($sources as $source) { 404f6ef2e50SAndreas Gohr /** @var Chunk $source */ 4059b3d1b36SAndreas Gohr $this->colors->ptln( 4069b3d1b36SAndreas Gohr "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 4079b3d1b36SAndreas Gohr Colors::C_LIGHTBLUE 4089b3d1b36SAndreas Gohr ); 409f6ef2e50SAndreas Gohr } 410f6ef2e50SAndreas Gohr } 411f6ef2e50SAndreas Gohr 412f6ef2e50SAndreas Gohr /** 41355392016SAndreas Gohr * Print the usage statistics for OpenAI 41455392016SAndreas Gohr * 41555392016SAndreas Gohr * @return void 41655392016SAndreas Gohr */ 417f6ef2e50SAndreas Gohr protected function printUsage() 418f6ef2e50SAndreas Gohr { 41951aa8517SAndreas Gohr $chat = $this->helper->getChatModel()->getUsageStats(); 42051aa8517SAndreas Gohr $rephrase = $this->helper->getRephraseModel()->getUsageStats(); 421c2b7a1f7SAndreas Gohr $embed = $this->helper->getEmbeddingModel()->getUsageStats(); 42251aa8517SAndreas Gohr 42355392016SAndreas Gohr $this->info( 42451aa8517SAndreas Gohr 'Made {requests} requests in {time}s to models. Used {tokens} tokens for about ${cost}.', 42551aa8517SAndreas Gohr [ 42651aa8517SAndreas Gohr 'requests' => $chat['requests'] + $rephrase['requests'] + $embed['requests'], 42751aa8517SAndreas Gohr 'time' => $chat['time'] + $rephrase['time'] + $embed['time'], 42851aa8517SAndreas Gohr 'tokens' => $chat['tokens'] + $chat['tokens'] + $embed['tokens'], 42951aa8517SAndreas Gohr 'cost' => $chat['cost'] + $chat['cost'] + $embed['cost'], 43051aa8517SAndreas Gohr ] 43155392016SAndreas Gohr ); 43255392016SAndreas Gohr } 43355392016SAndreas Gohr 43455392016SAndreas Gohr /** 435c4584168SAndreas Gohr * Interactively ask for a value from the user 436c4584168SAndreas Gohr * 437c4584168SAndreas Gohr * @param string $prompt 438c4584168SAndreas Gohr * @return string 439c4584168SAndreas Gohr */ 440c4584168SAndreas Gohr protected function readLine($prompt) 441c4584168SAndreas Gohr { 442c4584168SAndreas Gohr $value = ''; 4438817535bSAndreas Gohr 444c4584168SAndreas Gohr while ($value === '') { 445c4584168SAndreas Gohr echo $prompt; 446c4584168SAndreas Gohr echo ': '; 447c4584168SAndreas Gohr 448c4584168SAndreas Gohr $fh = fopen('php://stdin', 'r'); 449c4584168SAndreas Gohr $value = trim(fgets($fh)); 450c4584168SAndreas Gohr fclose($fh); 451c4584168SAndreas Gohr } 452c4584168SAndreas Gohr 453c4584168SAndreas Gohr return $value; 454c4584168SAndreas Gohr } 455d5c102b3SAndreas Gohr 456d5c102b3SAndreas Gohr /** 457d5c102b3SAndreas Gohr * Read the skip and match regex from the config 458d5c102b3SAndreas Gohr * 459d5c102b3SAndreas Gohr * Ensures the regular expressions are valid 460d5c102b3SAndreas Gohr * 461d5c102b3SAndreas Gohr * @return string[] [$skipRE, $matchRE] 462d5c102b3SAndreas Gohr */ 463d5c102b3SAndreas Gohr protected function getRegexps() 464d5c102b3SAndreas Gohr { 465d5c102b3SAndreas Gohr $skip = $this->getConf('skipRegex'); 466d5c102b3SAndreas Gohr $skipRE = ''; 467d5c102b3SAndreas Gohr $match = $this->getConf('matchRegex'); 468d5c102b3SAndreas Gohr $matchRE = ''; 469d5c102b3SAndreas Gohr 470d5c102b3SAndreas Gohr if ($skip) { 471d5c102b3SAndreas Gohr $skipRE = '/' . $skip . '/'; 47249a7d3ccSsplitbrain if (@preg_match($skipRE, '') === false) { 473d5c102b3SAndreas Gohr $this->error(preg_last_error_msg()); 474d5c102b3SAndreas Gohr $this->error('Invalid regular expression in $conf[\'skipRegex\']. Ignored.'); 475d5c102b3SAndreas Gohr $skipRE = ''; 476d5c102b3SAndreas Gohr } else { 477d5c102b3SAndreas Gohr $this->success('Skipping pages matching ' . $skipRE); 478d5c102b3SAndreas Gohr } 479d5c102b3SAndreas Gohr } 480d5c102b3SAndreas Gohr 481d5c102b3SAndreas Gohr if ($match) { 482d5c102b3SAndreas Gohr $matchRE = '/' . $match . '/'; 48349a7d3ccSsplitbrain if (@preg_match($matchRE, '') === false) { 484d5c102b3SAndreas Gohr $this->error(preg_last_error_msg()); 485d5c102b3SAndreas Gohr $this->error('Invalid regular expression in $conf[\'matchRegex\']. Ignored.'); 486d5c102b3SAndreas Gohr $matchRE = ''; 487d5c102b3SAndreas Gohr } else { 488d5c102b3SAndreas Gohr $this->success('Only indexing pages matching ' . $matchRE); 489d5c102b3SAndreas Gohr } 490d5c102b3SAndreas Gohr } 491d5c102b3SAndreas Gohr return [$skipRE, $matchRE]; 492d5c102b3SAndreas Gohr } 4938817535bSAndreas Gohr} 494