18817535bSAndreas Gohr<?php 28817535bSAndreas Gohr 3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 4f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 501f06932SAndreas Gohruse dokuwiki\Search\Indexer; 6c4584168SAndreas Gohruse splitbrain\phpcli\Colors; 78817535bSAndreas Gohruse splitbrain\phpcli\Options; 83379af09SAndreas Gohruse splitbrain\phpcli\TableFormatter; 98817535bSAndreas Gohr 108817535bSAndreas Gohr/** 118817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component) 128817535bSAndreas Gohr * 138817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 148817535bSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 158817535bSAndreas Gohr */ 16f6ef2e50SAndreas Gohrclass cli_plugin_aichat extends CLIPlugin 178817535bSAndreas Gohr{ 180337f47fSAndreas Gohr /** @var helper_plugin_aichat */ 190337f47fSAndreas Gohr protected $helper; 200337f47fSAndreas Gohr 210337f47fSAndreas Gohr public function __construct($autocatch = true) 220337f47fSAndreas Gohr { 230337f47fSAndreas Gohr parent::__construct($autocatch); 240337f47fSAndreas Gohr $this->helper = plugin_load('helper', 'aichat'); 253379af09SAndreas Gohr $this->helper->setLogger($this); 260337f47fSAndreas Gohr } 270337f47fSAndreas Gohr 288817535bSAndreas Gohr /** @inheritDoc */ 298817535bSAndreas Gohr protected function setup(Options $options) 308817535bSAndreas Gohr { 31bddd899cSAndreas Gohr $options->useCompactHelp(); 32bddd899cSAndreas Gohr 335284515dSAndreas Gohr $options->setHelp( 345284515dSAndreas Gohr 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 355284515dSAndreas Gohr 'This may incur costs.' 365284515dSAndreas Gohr ); 378817535bSAndreas Gohr 385284515dSAndreas Gohr $options->registerCommand( 395284515dSAndreas Gohr 'embed', 405284515dSAndreas Gohr 'Create embeddings for all pages. This skips pages that already have embeddings' 415284515dSAndreas Gohr ); 425284515dSAndreas Gohr $options->registerOption( 435284515dSAndreas Gohr 'clear', 445284515dSAndreas Gohr 'Clear all existing embeddings before creating new ones', 45*7ebc7895Ssplitbrain 'c', 46*7ebc7895Ssplitbrain false, 47*7ebc7895Ssplitbrain 'embed' 485284515dSAndreas Gohr ); 498817535bSAndreas Gohr 503379af09SAndreas Gohr $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.'); 513379af09SAndreas Gohr 528817535bSAndreas Gohr $options->registerCommand('similar', 'Search for similar pages'); 538817535bSAndreas Gohr $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 548817535bSAndreas Gohr 558817535bSAndreas Gohr $options->registerCommand('ask', 'Ask a question'); 568817535bSAndreas Gohr $options->registerArgument('question', 'The question to ask', true, 'ask'); 57c4584168SAndreas Gohr 58c4584168SAndreas Gohr $options->registerCommand('chat', 'Start an interactive chat session'); 59ad38c5fdSAndreas Gohr 608c8b7ba6SAndreas Gohr $options->registerCommand('info', 'Get Info about the vector storage'); 618c8b7ba6SAndreas Gohr 62ad38c5fdSAndreas Gohr $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 63ad38c5fdSAndreas Gohr $options->registerArgument('page', 'The page to split', true, 'split'); 645786be46SAndreas Gohr 6501f06932SAndreas Gohr $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 6601f06932SAndreas Gohr $options->registerArgument('page', 'The page to check', true, 'page'); 6701f06932SAndreas Gohr 688c8b7ba6SAndreas Gohr $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' . 698c8b7ba6SAndreas Gohr ' Not supported on all storages.'); 708c8b7ba6SAndreas Gohr $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv'); 718c8b7ba6SAndreas Gohr $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv'); 728817535bSAndreas Gohr } 738817535bSAndreas Gohr 748817535bSAndreas Gohr /** @inheritDoc */ 758817535bSAndreas Gohr protected function main(Options $options) 768817535bSAndreas Gohr { 773379af09SAndreas Gohr ini_set('memory_limit', -1); 788817535bSAndreas Gohr switch ($options->getCmd()) { 798817535bSAndreas Gohr case 'embed': 805284515dSAndreas Gohr $this->createEmbeddings($options->getOpt('clear')); 818817535bSAndreas Gohr break; 823379af09SAndreas Gohr case 'maintenance': 833379af09SAndreas Gohr $this->runMaintenance(); 843379af09SAndreas Gohr break; 858817535bSAndreas Gohr case 'similar': 868817535bSAndreas Gohr $this->similar($options->getArgs()[0]); 878817535bSAndreas Gohr break; 887552f1aaSAndreas Gohr case 'ask': 897552f1aaSAndreas Gohr $this->ask($options->getArgs()[0]); 907552f1aaSAndreas Gohr break; 91c4584168SAndreas Gohr case 'chat': 92c4584168SAndreas Gohr $this->chat(); 93c4584168SAndreas Gohr break; 94ad38c5fdSAndreas Gohr case 'split': 95ad38c5fdSAndreas Gohr $this->split($options->getArgs()[0]); 96ad38c5fdSAndreas Gohr break; 9701f06932SAndreas Gohr case 'page': 9801f06932SAndreas Gohr $this->page($options->getArgs()[0]); 9901f06932SAndreas Gohr break; 1005786be46SAndreas Gohr case 'info': 101f6ef2e50SAndreas Gohr $this->showinfo(); 1025786be46SAndreas Gohr break; 1038c8b7ba6SAndreas Gohr case 'tsv': 1048c8b7ba6SAndreas Gohr $args = $options->getArgs(); 1058c8b7ba6SAndreas Gohr $vector = $args[0] ?? 'vector.tsv'; 1068c8b7ba6SAndreas Gohr $meta = $args[1] ?? 'meta.tsv'; 1078c8b7ba6SAndreas Gohr $this->tsv($vector, $meta); 1088c8b7ba6SAndreas Gohr break; 1098817535bSAndreas Gohr default: 1108817535bSAndreas Gohr echo $options->help(); 1118817535bSAndreas Gohr } 1128817535bSAndreas Gohr } 1138817535bSAndreas Gohr 114c4584168SAndreas Gohr /** 1155786be46SAndreas Gohr * @return void 1165786be46SAndreas Gohr */ 117f6ef2e50SAndreas Gohr protected function showinfo() 1185786be46SAndreas Gohr { 1193379af09SAndreas Gohr $stats = [ 1203379af09SAndreas Gohr 'model' => $this->getConf('model'), 1213379af09SAndreas Gohr ]; 1223379af09SAndreas Gohr $stats = array_merge($stats, $this->helper->getStorage()->statistics()); 1233379af09SAndreas Gohr $this->printTable($stats); 1247ee8b02dSAndreas Gohr } 125911314cdSAndreas Gohr 1263379af09SAndreas Gohr /** 1273379af09SAndreas Gohr * Print key value data as tabular data 1283379af09SAndreas Gohr * 1293379af09SAndreas Gohr * @param array $data 1303379af09SAndreas Gohr * @param int $level 1313379af09SAndreas Gohr * @return void 1323379af09SAndreas Gohr */ 1333379af09SAndreas Gohr protected function printTable($data, $level = 0) 1343379af09SAndreas Gohr { 1353379af09SAndreas Gohr $tf = new TableFormatter($this->colors); 1363379af09SAndreas Gohr foreach ($data as $key => $value) { 1373379af09SAndreas Gohr if (is_array($value)) { 1383379af09SAndreas Gohr echo $tf->format( 1393379af09SAndreas Gohr [$level * 2, 15, '*'], 1403379af09SAndreas Gohr ['', $key, ''], 1413379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 1423379af09SAndreas Gohr ); 1433379af09SAndreas Gohr $this->printTable($value, $level + 1); 1443379af09SAndreas Gohr } else { 1453379af09SAndreas Gohr echo $tf->format( 1463379af09SAndreas Gohr [$level * 2, 15, '*'], 1473379af09SAndreas Gohr ['', $key, $value], 1483379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY] 1493379af09SAndreas Gohr ); 1503379af09SAndreas Gohr } 1513379af09SAndreas Gohr } 1525786be46SAndreas Gohr } 1535786be46SAndreas Gohr 1545786be46SAndreas Gohr /** 15501f06932SAndreas Gohr * Check chunk availability for a given page 15601f06932SAndreas Gohr * 15701f06932SAndreas Gohr * @param string $page 15801f06932SAndreas Gohr * @return void 15901f06932SAndreas Gohr */ 16001f06932SAndreas Gohr protected function page($page) 16101f06932SAndreas Gohr { 16201f06932SAndreas Gohr $indexer = new Indexer(); 16301f06932SAndreas Gohr $pages = $indexer->getPages(); 16401f06932SAndreas Gohr $pos = array_search(cleanID($page), $pages); 16501f06932SAndreas Gohr 16601f06932SAndreas Gohr if ($pos === false) { 16701f06932SAndreas Gohr $this->error('Page not found'); 16801f06932SAndreas Gohr return; 16901f06932SAndreas Gohr } 17001f06932SAndreas Gohr 17101f06932SAndreas Gohr $storage = $this->helper->getStorage(); 17201f06932SAndreas Gohr $chunks = $storage->getPageChunks($page, $pos * 100); 17301f06932SAndreas Gohr if ($chunks) { 17401f06932SAndreas Gohr $this->success('Found ' . count($chunks) . ' chunks'); 17501f06932SAndreas Gohr } else { 17601f06932SAndreas Gohr $this->error('No chunks found'); 17701f06932SAndreas Gohr } 17801f06932SAndreas Gohr } 17901f06932SAndreas Gohr 18001f06932SAndreas Gohr /** 181ad38c5fdSAndreas Gohr * Split the given page into chunks and print them 182ad38c5fdSAndreas Gohr * 183ad38c5fdSAndreas Gohr * @param string $page 184ad38c5fdSAndreas Gohr * @return void 185ad38c5fdSAndreas Gohr * @throws Exception 186ad38c5fdSAndreas Gohr */ 187ad38c5fdSAndreas Gohr protected function split($page) 188ad38c5fdSAndreas Gohr { 189ad38c5fdSAndreas Gohr $text = rawWiki($page); 190ad38c5fdSAndreas Gohr $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 191ad38c5fdSAndreas Gohr foreach ($chunks as $chunk) { 192ad38c5fdSAndreas Gohr echo $chunk; 193ad38c5fdSAndreas Gohr echo "\n"; 194ad38c5fdSAndreas Gohr $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 195ad38c5fdSAndreas Gohr } 196ad38c5fdSAndreas Gohr $this->success('Split into ' . count($chunks) . ' chunks'); 197ad38c5fdSAndreas Gohr } 198ad38c5fdSAndreas Gohr 199ad38c5fdSAndreas Gohr /** 200c4584168SAndreas Gohr * Interactive Chat Session 201c4584168SAndreas Gohr * 202c4584168SAndreas Gohr * @return void 203c4584168SAndreas Gohr * @throws Exception 204c4584168SAndreas Gohr */ 205c4584168SAndreas Gohr protected function chat() 206c4584168SAndreas Gohr { 207c4584168SAndreas Gohr $history = []; 208c4584168SAndreas Gohr while ($q = $this->readLine('Your Question')) { 209f6ef2e50SAndreas Gohr $this->helper->getModel()->resetUsageStats(); 210f6ef2e50SAndreas Gohr $result = $this->helper->askChatQuestion($q, $history); 211f6ef2e50SAndreas Gohr $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 212f6ef2e50SAndreas Gohr $history[] = [$result['question'], $result['answer']]; 213c4584168SAndreas Gohr $this->printAnswer($result); 214c4584168SAndreas Gohr } 215c4584168SAndreas Gohr } 216c4584168SAndreas Gohr 217c4584168SAndreas Gohr /** 218c4584168SAndreas Gohr * Handle a single, standalone question 219c4584168SAndreas Gohr * 220c4584168SAndreas Gohr * @param string $query 221c4584168SAndreas Gohr * @return void 222c4584168SAndreas Gohr * @throws Exception 223c4584168SAndreas Gohr */ 224c4584168SAndreas Gohr protected function ask($query) 225c4584168SAndreas Gohr { 2260337f47fSAndreas Gohr $result = $this->helper->askQuestion($query); 227c4584168SAndreas Gohr $this->printAnswer($result); 2287552f1aaSAndreas Gohr } 2297552f1aaSAndreas Gohr 230c4584168SAndreas Gohr /** 231c4584168SAndreas Gohr * Get the pages that are similar to the query 232c4584168SAndreas Gohr * 233c4584168SAndreas Gohr * @param string $query 234c4584168SAndreas Gohr * @return void 235c4584168SAndreas Gohr */ 2368817535bSAndreas Gohr protected function similar($query) 2378817535bSAndreas Gohr { 238e33a1d7aSAndreas Gohr $langlimit = $this->helper->getLanguageLimit(); 239e33a1d7aSAndreas Gohr if ($langlimit) { 240e33a1d7aSAndreas Gohr $this->info('Limiting results to {lang}', ['lang' => $langlimit]); 241e33a1d7aSAndreas Gohr } 242e33a1d7aSAndreas Gohr 243e33a1d7aSAndreas Gohr $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit); 244f6ef2e50SAndreas Gohr $this->printSources($sources); 2458817535bSAndreas Gohr } 2468817535bSAndreas Gohr 247c4584168SAndreas Gohr /** 2483379af09SAndreas Gohr * Run the maintenance tasks 2493379af09SAndreas Gohr * 2503379af09SAndreas Gohr * @return void 2513379af09SAndreas Gohr */ 2523379af09SAndreas Gohr protected function runMaintenance() 2533379af09SAndreas Gohr { 2543379af09SAndreas Gohr $start = time(); 2553379af09SAndreas Gohr $this->helper->getStorage()->runMaintenance(); 2563379af09SAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 2573379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 2583379af09SAndreas Gohr } 2593379af09SAndreas Gohr 2603379af09SAndreas Gohr /** 261c4584168SAndreas Gohr * Recreate chunks and embeddings for all pages 262c4584168SAndreas Gohr * 263c4584168SAndreas Gohr * @return void 264ad38c5fdSAndreas Gohr * @todo make skip regex configurable 265c4584168SAndreas Gohr */ 2665284515dSAndreas Gohr protected function createEmbeddings($clear) 2678817535bSAndreas Gohr { 2683379af09SAndreas Gohr $start = time(); 2695284515dSAndreas Gohr $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear); 270ad38c5fdSAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 2713379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 2728817535bSAndreas Gohr } 2738817535bSAndreas Gohr 274c4584168SAndreas Gohr /** 2758c8b7ba6SAndreas Gohr * Dump TSV files for debugging 2768c8b7ba6SAndreas Gohr * 2778c8b7ba6SAndreas Gohr * @return void 2788c8b7ba6SAndreas Gohr */ 2798c8b7ba6SAndreas Gohr protected function tsv($vector, $meta) 2808c8b7ba6SAndreas Gohr { 2818c8b7ba6SAndreas Gohr 2828c8b7ba6SAndreas Gohr $storage = $this->helper->getStorage(); 2838c8b7ba6SAndreas Gohr $storage->dumpTSV($vector, $meta); 2848c8b7ba6SAndreas Gohr $this->success('written to ' . $vector . ' and ' . $meta); 2858c8b7ba6SAndreas Gohr } 2868c8b7ba6SAndreas Gohr 2878c8b7ba6SAndreas Gohr /** 28855392016SAndreas Gohr * Print the given detailed answer in a nice way 28955392016SAndreas Gohr * 29055392016SAndreas Gohr * @param array $answer 29155392016SAndreas Gohr * @return void 29255392016SAndreas Gohr */ 29355392016SAndreas Gohr protected function printAnswer($answer) 29455392016SAndreas Gohr { 29555392016SAndreas Gohr $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 29655392016SAndreas Gohr echo "\n"; 297f6ef2e50SAndreas Gohr $this->printSources($answer['sources']); 29855392016SAndreas Gohr echo "\n"; 29955392016SAndreas Gohr $this->printUsage(); 30055392016SAndreas Gohr } 30155392016SAndreas Gohr 30255392016SAndreas Gohr /** 303f6ef2e50SAndreas Gohr * Print the given sources 304f6ef2e50SAndreas Gohr * 305f6ef2e50SAndreas Gohr * @param Chunk[] $sources 306f6ef2e50SAndreas Gohr * @return void 307f6ef2e50SAndreas Gohr */ 308f6ef2e50SAndreas Gohr protected function printSources($sources) 309f6ef2e50SAndreas Gohr { 310f6ef2e50SAndreas Gohr foreach ($sources as $source) { 311f6ef2e50SAndreas Gohr /** @var Chunk $source */ 3129b3d1b36SAndreas Gohr $this->colors->ptln( 3139b3d1b36SAndreas Gohr "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 3149b3d1b36SAndreas Gohr Colors::C_LIGHTBLUE 3159b3d1b36SAndreas Gohr ); 316f6ef2e50SAndreas Gohr } 317f6ef2e50SAndreas Gohr } 318f6ef2e50SAndreas Gohr 319f6ef2e50SAndreas Gohr /** 32055392016SAndreas Gohr * Print the usage statistics for OpenAI 32155392016SAndreas Gohr * 32255392016SAndreas Gohr * @return void 32355392016SAndreas Gohr */ 324f6ef2e50SAndreas Gohr protected function printUsage() 325f6ef2e50SAndreas Gohr { 32655392016SAndreas Gohr $this->info( 327f6ef2e50SAndreas Gohr 'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.', 328f6ef2e50SAndreas Gohr $this->helper->getModel()->getUsageStats() 32955392016SAndreas Gohr ); 33055392016SAndreas Gohr } 33155392016SAndreas Gohr 33255392016SAndreas Gohr /** 333c4584168SAndreas Gohr * Interactively ask for a value from the user 334c4584168SAndreas Gohr * 335c4584168SAndreas Gohr * @param string $prompt 336c4584168SAndreas Gohr * @return string 337c4584168SAndreas Gohr */ 338c4584168SAndreas Gohr protected function readLine($prompt) 339c4584168SAndreas Gohr { 340c4584168SAndreas Gohr $value = ''; 3418817535bSAndreas Gohr 342c4584168SAndreas Gohr while ($value === '') { 343c4584168SAndreas Gohr echo $prompt; 344c4584168SAndreas Gohr echo ': '; 345c4584168SAndreas Gohr 346c4584168SAndreas Gohr $fh = fopen('php://stdin', 'r'); 347c4584168SAndreas Gohr $value = trim(fgets($fh)); 348c4584168SAndreas Gohr fclose($fh); 349c4584168SAndreas Gohr } 350c4584168SAndreas Gohr 351c4584168SAndreas Gohr return $value; 352c4584168SAndreas Gohr } 3538817535bSAndreas Gohr} 354