18817535bSAndreas Gohr<?php 28817535bSAndreas Gohr 3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 40de7e020SAndreas Gohruse dokuwiki\plugin\aichat\AbstractCLI; 5f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 6c2b7a1f7SAndreas Gohruse dokuwiki\plugin\aichat\ModelFactory; 701f06932SAndreas Gohruse dokuwiki\Search\Indexer; 8c4584168SAndreas Gohruse splitbrain\phpcli\Colors; 98817535bSAndreas Gohruse splitbrain\phpcli\Options; 103379af09SAndreas Gohruse splitbrain\phpcli\TableFormatter; 118817535bSAndreas Gohr 128817535bSAndreas Gohr/** 138817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component) 148817535bSAndreas Gohr * 158817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 168817535bSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 178817535bSAndreas Gohr */ 180de7e020SAndreas Gohrclass cli_plugin_aichat extends AbstractCLI 198817535bSAndreas Gohr{ 200337f47fSAndreas Gohr /** @var helper_plugin_aichat */ 210337f47fSAndreas Gohr protected $helper; 220337f47fSAndreas Gohr 238817535bSAndreas Gohr /** @inheritDoc */ 248817535bSAndreas Gohr protected function setup(Options $options) 258817535bSAndreas Gohr { 260de7e020SAndreas Gohr parent::setup($options); 27bddd899cSAndreas Gohr 285284515dSAndreas Gohr $options->setHelp( 295284515dSAndreas Gohr 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 305284515dSAndreas Gohr 'This may incur costs.' 315284515dSAndreas Gohr ); 328817535bSAndreas Gohr 330de7e020SAndreas Gohr $options->registerOption( 340de7e020SAndreas Gohr 'model', 350de7e020SAndreas Gohr 'Overrides the chat and rephrasing model settings and uses this model instead', 360de7e020SAndreas Gohr '', 370de7e020SAndreas Gohr 'model' 380de7e020SAndreas Gohr ); 390de7e020SAndreas Gohr 405284515dSAndreas Gohr $options->registerCommand( 415284515dSAndreas Gohr 'embed', 425284515dSAndreas Gohr 'Create embeddings for all pages. This skips pages that already have embeddings' 435284515dSAndreas Gohr ); 445284515dSAndreas Gohr $options->registerOption( 455284515dSAndreas Gohr 'clear', 465284515dSAndreas Gohr 'Clear all existing embeddings before creating new ones', 477ebc7895Ssplitbrain 'c', 487ebc7895Ssplitbrain false, 497ebc7895Ssplitbrain 'embed' 505284515dSAndreas Gohr ); 518817535bSAndreas Gohr 52e8451b21SAndreas Gohr $options->registerCommand('maintenance', 'Run storage maintenance. Refer to the documentation for details.'); 533379af09SAndreas Gohr 548817535bSAndreas Gohr $options->registerCommand('similar', 'Search for similar pages'); 558817535bSAndreas Gohr $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 568817535bSAndreas Gohr 578817535bSAndreas Gohr $options->registerCommand('ask', 'Ask a question'); 588817535bSAndreas Gohr $options->registerArgument('question', 'The question to ask', true, 'ask'); 59c4584168SAndreas Gohr 60c4584168SAndreas Gohr $options->registerCommand('chat', 'Start an interactive chat session'); 61ad38c5fdSAndreas Gohr 62e8451b21SAndreas Gohr $options->registerCommand('models', 'List available models'); 63e8451b21SAndreas Gohr 64e75dc39fSAndreas Gohr $options->registerCommand('info', 'Get Info about the vector storage and other stats'); 658c8b7ba6SAndreas Gohr 66ad38c5fdSAndreas Gohr $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 67ad38c5fdSAndreas Gohr $options->registerArgument('page', 'The page to split', true, 'split'); 685786be46SAndreas Gohr 6901f06932SAndreas Gohr $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 7001f06932SAndreas Gohr $options->registerArgument('page', 'The page to check', true, 'page'); 71dc355d57SAndreas Gohr $options->registerOption('dump', 'Dump the chunks', 'd', false, 'page'); 7201f06932SAndreas Gohr 738c8b7ba6SAndreas Gohr $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' . 748c8b7ba6SAndreas Gohr ' Not supported on all storages.'); 758c8b7ba6SAndreas Gohr $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv'); 768c8b7ba6SAndreas Gohr $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv'); 778817535bSAndreas Gohr } 788817535bSAndreas Gohr 798817535bSAndreas Gohr /** @inheritDoc */ 808817535bSAndreas Gohr protected function main(Options $options) 818817535bSAndreas Gohr { 820de7e020SAndreas Gohr parent::main($options); 830de7e020SAndreas Gohr 840de7e020SAndreas Gohr $model = $options->getOpt('model'); 850de7e020SAndreas Gohr if($model) { 860de7e020SAndreas Gohr $this->helper->updateConfig( 870de7e020SAndreas Gohr ['chatmodel' => $model, 'rephasemodel' => $model] 880de7e020SAndreas Gohr ); 89c2b7a1f7SAndreas Gohr } 90c2b7a1f7SAndreas Gohr 918817535bSAndreas Gohr switch ($options->getCmd()) { 928817535bSAndreas Gohr case 'embed': 935284515dSAndreas Gohr $this->createEmbeddings($options->getOpt('clear')); 948817535bSAndreas Gohr break; 953379af09SAndreas Gohr case 'maintenance': 963379af09SAndreas Gohr $this->runMaintenance(); 973379af09SAndreas Gohr break; 988817535bSAndreas Gohr case 'similar': 998817535bSAndreas Gohr $this->similar($options->getArgs()[0]); 1008817535bSAndreas Gohr break; 1017552f1aaSAndreas Gohr case 'ask': 1027552f1aaSAndreas Gohr $this->ask($options->getArgs()[0]); 1037552f1aaSAndreas Gohr break; 104c4584168SAndreas Gohr case 'chat': 105c4584168SAndreas Gohr $this->chat(); 106c4584168SAndreas Gohr break; 107e8451b21SAndreas Gohr case 'models': 108e8451b21SAndreas Gohr $this->models(); 109e8451b21SAndreas Gohr break; 110ad38c5fdSAndreas Gohr case 'split': 111ad38c5fdSAndreas Gohr $this->split($options->getArgs()[0]); 112ad38c5fdSAndreas Gohr break; 11301f06932SAndreas Gohr case 'page': 114dc355d57SAndreas Gohr $this->page($options->getArgs()[0], $options->getOpt('dump')); 11501f06932SAndreas Gohr break; 1165786be46SAndreas Gohr case 'info': 117f6ef2e50SAndreas Gohr $this->showinfo(); 1185786be46SAndreas Gohr break; 1198c8b7ba6SAndreas Gohr case 'tsv': 1208c8b7ba6SAndreas Gohr $args = $options->getArgs(); 1218c8b7ba6SAndreas Gohr $vector = $args[0] ?? 'vector.tsv'; 1228c8b7ba6SAndreas Gohr $meta = $args[1] ?? 'meta.tsv'; 1238c8b7ba6SAndreas Gohr $this->tsv($vector, $meta); 1248c8b7ba6SAndreas Gohr break; 1258817535bSAndreas Gohr default: 1268817535bSAndreas Gohr echo $options->help(); 1278817535bSAndreas Gohr } 1288817535bSAndreas Gohr } 1298817535bSAndreas Gohr 130c4584168SAndreas Gohr /** 1315786be46SAndreas Gohr * @return void 1325786be46SAndreas Gohr */ 133f6ef2e50SAndreas Gohr protected function showinfo() 1345786be46SAndreas Gohr { 1353379af09SAndreas Gohr $stats = [ 13699b713bfSAndreas Gohr 'chat model' => $this->getConf('chatmodel'), 13799b713bfSAndreas Gohr 'embed model' => $this->getConf('embedmodel'), 1383379af09SAndreas Gohr ]; 139e75dc39fSAndreas Gohr $stats = array_merge( 140e75dc39fSAndreas Gohr $stats, 141e75dc39fSAndreas Gohr array_map('dformat', $this->helper->getRunData()), 142e75dc39fSAndreas Gohr $this->helper->getStorage()->statistics() 143e75dc39fSAndreas Gohr ); 1443379af09SAndreas Gohr $this->printTable($stats); 1457ee8b02dSAndreas Gohr } 146911314cdSAndreas Gohr 1473379af09SAndreas Gohr /** 1483379af09SAndreas Gohr * Print key value data as tabular data 1493379af09SAndreas Gohr * 1503379af09SAndreas Gohr * @param array $data 1513379af09SAndreas Gohr * @param int $level 1523379af09SAndreas Gohr * @return void 1533379af09SAndreas Gohr */ 1543379af09SAndreas Gohr protected function printTable($data, $level = 0) 1553379af09SAndreas Gohr { 1563379af09SAndreas Gohr $tf = new TableFormatter($this->colors); 1573379af09SAndreas Gohr foreach ($data as $key => $value) { 1583379af09SAndreas Gohr if (is_array($value)) { 1593379af09SAndreas Gohr echo $tf->format( 160e75dc39fSAndreas Gohr [$level * 2, 20, '*'], 1613379af09SAndreas Gohr ['', $key, ''], 1623379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 1633379af09SAndreas Gohr ); 1643379af09SAndreas Gohr $this->printTable($value, $level + 1); 1653379af09SAndreas Gohr } else { 1663379af09SAndreas Gohr echo $tf->format( 167e75dc39fSAndreas Gohr [$level * 2, 20, '*'], 1683379af09SAndreas Gohr ['', $key, $value], 1693379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY] 1703379af09SAndreas Gohr ); 1713379af09SAndreas Gohr } 1723379af09SAndreas Gohr } 1735786be46SAndreas Gohr } 1745786be46SAndreas Gohr 1755786be46SAndreas Gohr /** 17601f06932SAndreas Gohr * Check chunk availability for a given page 17701f06932SAndreas Gohr * 17801f06932SAndreas Gohr * @param string $page 17901f06932SAndreas Gohr * @return void 18001f06932SAndreas Gohr */ 181dc355d57SAndreas Gohr protected function page($page, $dump = false) 18201f06932SAndreas Gohr { 18301f06932SAndreas Gohr $indexer = new Indexer(); 18401f06932SAndreas Gohr $pages = $indexer->getPages(); 18501f06932SAndreas Gohr $pos = array_search(cleanID($page), $pages); 18601f06932SAndreas Gohr 18701f06932SAndreas Gohr if ($pos === false) { 18801f06932SAndreas Gohr $this->error('Page not found'); 18901f06932SAndreas Gohr return; 19001f06932SAndreas Gohr } 19101f06932SAndreas Gohr 19201f06932SAndreas Gohr $storage = $this->helper->getStorage(); 19301f06932SAndreas Gohr $chunks = $storage->getPageChunks($page, $pos * 100); 19401f06932SAndreas Gohr if ($chunks) { 19501f06932SAndreas Gohr $this->success('Found ' . count($chunks) . ' chunks'); 196dc355d57SAndreas Gohr if ($dump) { 197dc355d57SAndreas Gohr echo json_encode($chunks, JSON_PRETTY_PRINT); 198dc355d57SAndreas Gohr } 19901f06932SAndreas Gohr } else { 20001f06932SAndreas Gohr $this->error('No chunks found'); 20101f06932SAndreas Gohr } 20201f06932SAndreas Gohr } 20301f06932SAndreas Gohr 20401f06932SAndreas Gohr /** 205ad38c5fdSAndreas Gohr * Split the given page into chunks and print them 206ad38c5fdSAndreas Gohr * 207ad38c5fdSAndreas Gohr * @param string $page 208ad38c5fdSAndreas Gohr * @return void 209ad38c5fdSAndreas Gohr * @throws Exception 210ad38c5fdSAndreas Gohr */ 211ad38c5fdSAndreas Gohr protected function split($page) 212ad38c5fdSAndreas Gohr { 213*ab1f8ddeSAndreas Gohr $chunks = $this->helper->getEmbeddings()->createPageChunks($page, 0); 214ad38c5fdSAndreas Gohr foreach ($chunks as $chunk) { 215*ab1f8ddeSAndreas Gohr echo $chunk->getText(); 216ad38c5fdSAndreas Gohr echo "\n"; 217ad38c5fdSAndreas Gohr $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 218ad38c5fdSAndreas Gohr } 219ad38c5fdSAndreas Gohr $this->success('Split into ' . count($chunks) . ' chunks'); 220ad38c5fdSAndreas Gohr } 221ad38c5fdSAndreas Gohr 222ad38c5fdSAndreas Gohr /** 223c4584168SAndreas Gohr * Interactive Chat Session 224c4584168SAndreas Gohr * 225c4584168SAndreas Gohr * @return void 226c4584168SAndreas Gohr * @throws Exception 227c4584168SAndreas Gohr */ 228c4584168SAndreas Gohr protected function chat() 229c4584168SAndreas Gohr { 230c4584168SAndreas Gohr $history = []; 231c4584168SAndreas Gohr while ($q = $this->readLine('Your Question')) { 2326a18e0f4SAndreas Gohr $this->helper->getChatModel()->resetUsageStats(); 23351aa8517SAndreas Gohr $this->helper->getRephraseModel()->resetUsageStats(); 234c2b7a1f7SAndreas Gohr $this->helper->getEmbeddingModel()->resetUsageStats(); 235f6ef2e50SAndreas Gohr $result = $this->helper->askChatQuestion($q, $history); 236f6ef2e50SAndreas Gohr $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 237f6ef2e50SAndreas Gohr $history[] = [$result['question'], $result['answer']]; 238c4584168SAndreas Gohr $this->printAnswer($result); 239c4584168SAndreas Gohr } 240c4584168SAndreas Gohr } 241c4584168SAndreas Gohr 242c2b7a1f7SAndreas Gohr /** 243c2b7a1f7SAndreas Gohr * Print information about the available models 244c2b7a1f7SAndreas Gohr * 245c2b7a1f7SAndreas Gohr * @return void 246c2b7a1f7SAndreas Gohr */ 247e8451b21SAndreas Gohr protected function models() 248e8451b21SAndreas Gohr { 249c2b7a1f7SAndreas Gohr $result = (new ModelFactory($this->conf))->getModels(); 250e8451b21SAndreas Gohr 251e8451b21SAndreas Gohr $td = new TableFormatter($this->colors); 252e8451b21SAndreas Gohr $cols = [30, 20, 20, '*']; 253e8451b21SAndreas Gohr echo "==== Chat Models ====\n\n"; 254e8451b21SAndreas Gohr echo $td->format( 255e8451b21SAndreas Gohr $cols, 256e8451b21SAndreas Gohr ['Model', 'Token Limits', 'Price USD/M', 'Description'], 257e8451b21SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 258e8451b21SAndreas Gohr ); 259e8451b21SAndreas Gohr foreach ($result['chat'] as $name => $info) { 260e8451b21SAndreas Gohr echo $td->format( 261e8451b21SAndreas Gohr $cols, 262e8451b21SAndreas Gohr [ 263e8451b21SAndreas Gohr $name, 264e8451b21SAndreas Gohr sprintf(" In: %7d\nOut: %7d", $info['inputTokens'], $info['outputTokens']), 2652045e15aSAndreas Gohr sprintf(" In: %.2f\nOut: %.2f", $info['inputTokenPrice'], $info['outputTokenPrice']), 266e8451b21SAndreas Gohr $info['description'] . "\n" 267e8451b21SAndreas Gohr ], 268e8451b21SAndreas Gohr [ 269c2b7a1f7SAndreas Gohr $info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED, 270e8451b21SAndreas Gohr ] 271e8451b21SAndreas Gohr ); 272e8451b21SAndreas Gohr } 273e8451b21SAndreas Gohr 27487e46484SAndreas Gohr $cols = [30, 10, 10, 10, '*']; 275e8451b21SAndreas Gohr echo "==== Embedding Models ====\n\n"; 276e8451b21SAndreas Gohr echo $td->format( 277e8451b21SAndreas Gohr $cols, 27887e46484SAndreas Gohr ['Model', 'Token Limits', 'Price USD/M', 'Dimensions', 'Description'], 27987e46484SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 280e8451b21SAndreas Gohr ); 281e8451b21SAndreas Gohr foreach ($result['embedding'] as $name => $info) { 282e8451b21SAndreas Gohr echo $td->format( 283e8451b21SAndreas Gohr $cols, 284e8451b21SAndreas Gohr [ 285e8451b21SAndreas Gohr $name, 286e8451b21SAndreas Gohr sprintf("%7d", $info['inputTokens']), 287e8451b21SAndreas Gohr sprintf("%.2f", $info['inputTokenPrice']), 28887e46484SAndreas Gohr $info['dimensions'], 289e8451b21SAndreas Gohr $info['description'] . "\n" 290e8451b21SAndreas Gohr ], 291e8451b21SAndreas Gohr [ 292c2b7a1f7SAndreas Gohr $info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED, 293e8451b21SAndreas Gohr ] 294e8451b21SAndreas Gohr ); 295e8451b21SAndreas Gohr } 296e8451b21SAndreas Gohr 297e8451b21SAndreas Gohr $this->colors->ptln('Current prices may differ', Colors::C_RED); 298e8451b21SAndreas Gohr } 299e8451b21SAndreas Gohr 300c4584168SAndreas Gohr /** 301c4584168SAndreas Gohr * Handle a single, standalone question 302c4584168SAndreas Gohr * 303c4584168SAndreas Gohr * @param string $query 304c4584168SAndreas Gohr * @return void 305c4584168SAndreas Gohr * @throws Exception 306c4584168SAndreas Gohr */ 307c4584168SAndreas Gohr protected function ask($query) 308c4584168SAndreas Gohr { 3090337f47fSAndreas Gohr $result = $this->helper->askQuestion($query); 310c4584168SAndreas Gohr $this->printAnswer($result); 3117552f1aaSAndreas Gohr } 3127552f1aaSAndreas Gohr 313c4584168SAndreas Gohr /** 314c4584168SAndreas Gohr * Get the pages that are similar to the query 315c4584168SAndreas Gohr * 316c4584168SAndreas Gohr * @param string $query 317c4584168SAndreas Gohr * @return void 318c4584168SAndreas Gohr */ 3198817535bSAndreas Gohr protected function similar($query) 3208817535bSAndreas Gohr { 321e33a1d7aSAndreas Gohr $langlimit = $this->helper->getLanguageLimit(); 322e33a1d7aSAndreas Gohr if ($langlimit) { 323e33a1d7aSAndreas Gohr $this->info('Limiting results to {lang}', ['lang' => $langlimit]); 324e33a1d7aSAndreas Gohr } 325e33a1d7aSAndreas Gohr 326e33a1d7aSAndreas Gohr $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit); 327f6ef2e50SAndreas Gohr $this->printSources($sources); 3288817535bSAndreas Gohr } 3298817535bSAndreas Gohr 330c4584168SAndreas Gohr /** 3313379af09SAndreas Gohr * Run the maintenance tasks 3323379af09SAndreas Gohr * 3333379af09SAndreas Gohr * @return void 3343379af09SAndreas Gohr */ 3353379af09SAndreas Gohr protected function runMaintenance() 3363379af09SAndreas Gohr { 3373379af09SAndreas Gohr $start = time(); 3383379af09SAndreas Gohr $this->helper->getStorage()->runMaintenance(); 3393379af09SAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 3403379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 341e75dc39fSAndreas Gohr 342e75dc39fSAndreas Gohr $data = $this->helper->getRunData(); 343e75dc39fSAndreas Gohr $data['maintenance ran at'] = time(); 344e75dc39fSAndreas Gohr $this->helper->setRunData($data); 3453379af09SAndreas Gohr } 3463379af09SAndreas Gohr 3473379af09SAndreas Gohr /** 348c4584168SAndreas Gohr * Recreate chunks and embeddings for all pages 349c4584168SAndreas Gohr * 350c4584168SAndreas Gohr * @return void 351c4584168SAndreas Gohr */ 3525284515dSAndreas Gohr protected function createEmbeddings($clear) 3538817535bSAndreas Gohr { 354d5c102b3SAndreas Gohr [$skipRE, $matchRE] = $this->getRegexps(); 355d5c102b3SAndreas Gohr 3563379af09SAndreas Gohr $start = time(); 357d5c102b3SAndreas Gohr $this->helper->getEmbeddings()->createNewIndex($skipRE, $matchRE, $clear); 358ad38c5fdSAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 3593379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 360e75dc39fSAndreas Gohr 361e75dc39fSAndreas Gohr $data = $this->helper->getRunData(); 362e75dc39fSAndreas Gohr $data['embed ran at'] = time(); 363e75dc39fSAndreas Gohr $this->helper->setRunData($data); 3648817535bSAndreas Gohr } 3658817535bSAndreas Gohr 366c4584168SAndreas Gohr /** 3678c8b7ba6SAndreas Gohr * Dump TSV files for debugging 3688c8b7ba6SAndreas Gohr * 3698c8b7ba6SAndreas Gohr * @return void 3708c8b7ba6SAndreas Gohr */ 3718c8b7ba6SAndreas Gohr protected function tsv($vector, $meta) 3728c8b7ba6SAndreas Gohr { 3738c8b7ba6SAndreas Gohr 3748c8b7ba6SAndreas Gohr $storage = $this->helper->getStorage(); 3758c8b7ba6SAndreas Gohr $storage->dumpTSV($vector, $meta); 3768c8b7ba6SAndreas Gohr $this->success('written to ' . $vector . ' and ' . $meta); 3778c8b7ba6SAndreas Gohr } 3788c8b7ba6SAndreas Gohr 3798c8b7ba6SAndreas Gohr /** 38055392016SAndreas Gohr * Print the given detailed answer in a nice way 38155392016SAndreas Gohr * 38255392016SAndreas Gohr * @param array $answer 38355392016SAndreas Gohr * @return void 38455392016SAndreas Gohr */ 38555392016SAndreas Gohr protected function printAnswer($answer) 38655392016SAndreas Gohr { 38755392016SAndreas Gohr $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 38855392016SAndreas Gohr echo "\n"; 389f6ef2e50SAndreas Gohr $this->printSources($answer['sources']); 39055392016SAndreas Gohr echo "\n"; 39155392016SAndreas Gohr $this->printUsage(); 39255392016SAndreas Gohr } 39355392016SAndreas Gohr 39455392016SAndreas Gohr /** 395f6ef2e50SAndreas Gohr * Print the given sources 396f6ef2e50SAndreas Gohr * 397f6ef2e50SAndreas Gohr * @param Chunk[] $sources 398f6ef2e50SAndreas Gohr * @return void 399f6ef2e50SAndreas Gohr */ 400f6ef2e50SAndreas Gohr protected function printSources($sources) 401f6ef2e50SAndreas Gohr { 402f6ef2e50SAndreas Gohr foreach ($sources as $source) { 403f6ef2e50SAndreas Gohr /** @var Chunk $source */ 4049b3d1b36SAndreas Gohr $this->colors->ptln( 4059b3d1b36SAndreas Gohr "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 4069b3d1b36SAndreas Gohr Colors::C_LIGHTBLUE 4079b3d1b36SAndreas Gohr ); 408f6ef2e50SAndreas Gohr } 409f6ef2e50SAndreas Gohr } 410f6ef2e50SAndreas Gohr 411f6ef2e50SAndreas Gohr /** 41255392016SAndreas Gohr * Print the usage statistics for OpenAI 41355392016SAndreas Gohr * 41455392016SAndreas Gohr * @return void 41555392016SAndreas Gohr */ 416f6ef2e50SAndreas Gohr protected function printUsage() 417f6ef2e50SAndreas Gohr { 41851aa8517SAndreas Gohr $chat = $this->helper->getChatModel()->getUsageStats(); 41951aa8517SAndreas Gohr $rephrase = $this->helper->getRephraseModel()->getUsageStats(); 420c2b7a1f7SAndreas Gohr $embed = $this->helper->getEmbeddingModel()->getUsageStats(); 42151aa8517SAndreas Gohr 42255392016SAndreas Gohr $this->info( 42351aa8517SAndreas Gohr 'Made {requests} requests in {time}s to models. Used {tokens} tokens for about ${cost}.', 42451aa8517SAndreas Gohr [ 42551aa8517SAndreas Gohr 'requests' => $chat['requests'] + $rephrase['requests'] + $embed['requests'], 42651aa8517SAndreas Gohr 'time' => $chat['time'] + $rephrase['time'] + $embed['time'], 42751aa8517SAndreas Gohr 'tokens' => $chat['tokens'] + $chat['tokens'] + $embed['tokens'], 42851aa8517SAndreas Gohr 'cost' => $chat['cost'] + $chat['cost'] + $embed['cost'], 42951aa8517SAndreas Gohr ] 43055392016SAndreas Gohr ); 43155392016SAndreas Gohr } 43255392016SAndreas Gohr 43355392016SAndreas Gohr /** 434c4584168SAndreas Gohr * Interactively ask for a value from the user 435c4584168SAndreas Gohr * 436c4584168SAndreas Gohr * @param string $prompt 437c4584168SAndreas Gohr * @return string 438c4584168SAndreas Gohr */ 439c4584168SAndreas Gohr protected function readLine($prompt) 440c4584168SAndreas Gohr { 441c4584168SAndreas Gohr $value = ''; 4428817535bSAndreas Gohr 443c4584168SAndreas Gohr while ($value === '') { 444c4584168SAndreas Gohr echo $prompt; 445c4584168SAndreas Gohr echo ': '; 446c4584168SAndreas Gohr 447c4584168SAndreas Gohr $fh = fopen('php://stdin', 'r'); 448c4584168SAndreas Gohr $value = trim(fgets($fh)); 449c4584168SAndreas Gohr fclose($fh); 450c4584168SAndreas Gohr } 451c4584168SAndreas Gohr 452c4584168SAndreas Gohr return $value; 453c4584168SAndreas Gohr } 454d5c102b3SAndreas Gohr 455d5c102b3SAndreas Gohr /** 456d5c102b3SAndreas Gohr * Read the skip and match regex from the config 457d5c102b3SAndreas Gohr * 458d5c102b3SAndreas Gohr * Ensures the regular expressions are valid 459d5c102b3SAndreas Gohr * 460d5c102b3SAndreas Gohr * @return string[] [$skipRE, $matchRE] 461d5c102b3SAndreas Gohr */ 462d5c102b3SAndreas Gohr protected function getRegexps() 463d5c102b3SAndreas Gohr { 464d5c102b3SAndreas Gohr $skip = $this->getConf('skipRegex'); 465d5c102b3SAndreas Gohr $skipRE = ''; 466d5c102b3SAndreas Gohr $match = $this->getConf('matchRegex'); 467d5c102b3SAndreas Gohr $matchRE = ''; 468d5c102b3SAndreas Gohr 469d5c102b3SAndreas Gohr if ($skip) { 470d5c102b3SAndreas Gohr $skipRE = '/' . $skip . '/'; 47149a7d3ccSsplitbrain if (@preg_match($skipRE, '') === false) { 472d5c102b3SAndreas Gohr $this->error(preg_last_error_msg()); 473d5c102b3SAndreas Gohr $this->error('Invalid regular expression in $conf[\'skipRegex\']. Ignored.'); 474d5c102b3SAndreas Gohr $skipRE = ''; 475d5c102b3SAndreas Gohr } else { 476d5c102b3SAndreas Gohr $this->success('Skipping pages matching ' . $skipRE); 477d5c102b3SAndreas Gohr } 478d5c102b3SAndreas Gohr } 479d5c102b3SAndreas Gohr 480d5c102b3SAndreas Gohr if ($match) { 481d5c102b3SAndreas Gohr $matchRE = '/' . $match . '/'; 48249a7d3ccSsplitbrain if (@preg_match($matchRE, '') === false) { 483d5c102b3SAndreas Gohr $this->error(preg_last_error_msg()); 484d5c102b3SAndreas Gohr $this->error('Invalid regular expression in $conf[\'matchRegex\']. Ignored.'); 485d5c102b3SAndreas Gohr $matchRE = ''; 486d5c102b3SAndreas Gohr } else { 487d5c102b3SAndreas Gohr $this->success('Only indexing pages matching ' . $matchRE); 488d5c102b3SAndreas Gohr } 489d5c102b3SAndreas Gohr } 490d5c102b3SAndreas Gohr return [$skipRE, $matchRE]; 491d5c102b3SAndreas Gohr } 4928817535bSAndreas Gohr} 493