18817535bSAndreas Gohr<?php 28817535bSAndreas Gohr 3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 4f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 501f06932SAndreas Gohruse dokuwiki\Search\Indexer; 6c4584168SAndreas Gohruse splitbrain\phpcli\Colors; 78817535bSAndreas Gohruse splitbrain\phpcli\Options; 83379af09SAndreas Gohruse splitbrain\phpcli\TableFormatter; 98817535bSAndreas Gohr 108817535bSAndreas Gohr 118817535bSAndreas Gohr/** 128817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component) 138817535bSAndreas Gohr * 148817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 158817535bSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 168817535bSAndreas Gohr */ 17f6ef2e50SAndreas Gohrclass cli_plugin_aichat extends CLIPlugin 188817535bSAndreas Gohr{ 190337f47fSAndreas Gohr /** @var helper_plugin_aichat */ 200337f47fSAndreas Gohr protected $helper; 210337f47fSAndreas Gohr 220337f47fSAndreas Gohr public function __construct($autocatch = true) 230337f47fSAndreas Gohr { 240337f47fSAndreas Gohr parent::__construct($autocatch); 250337f47fSAndreas Gohr $this->helper = plugin_load('helper', 'aichat'); 263379af09SAndreas Gohr $this->helper->setLogger($this); 270337f47fSAndreas Gohr } 280337f47fSAndreas Gohr 298817535bSAndreas Gohr /** @inheritDoc */ 308817535bSAndreas Gohr protected function setup(Options $options) 318817535bSAndreas Gohr { 32bddd899cSAndreas Gohr $options->useCompactHelp(); 33bddd899cSAndreas Gohr 345284515dSAndreas Gohr $options->setHelp( 355284515dSAndreas Gohr 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 365284515dSAndreas Gohr 'This may incur costs.' 375284515dSAndreas Gohr ); 388817535bSAndreas Gohr 395284515dSAndreas Gohr $options->registerCommand( 405284515dSAndreas Gohr 'embed', 415284515dSAndreas Gohr 'Create embeddings for all pages. This skips pages that already have embeddings' 425284515dSAndreas Gohr ); 435284515dSAndreas Gohr $options->registerOption( 445284515dSAndreas Gohr 'clear', 455284515dSAndreas Gohr 'Clear all existing embeddings before creating new ones', 465284515dSAndreas Gohr 'c', false, 'embed' 475284515dSAndreas Gohr ); 488817535bSAndreas Gohr 493379af09SAndreas Gohr $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.'); 503379af09SAndreas Gohr 518817535bSAndreas Gohr $options->registerCommand('similar', 'Search for similar pages'); 528817535bSAndreas Gohr $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 538817535bSAndreas Gohr 548817535bSAndreas Gohr $options->registerCommand('ask', 'Ask a question'); 558817535bSAndreas Gohr $options->registerArgument('question', 'The question to ask', true, 'ask'); 56c4584168SAndreas Gohr 57c4584168SAndreas Gohr $options->registerCommand('chat', 'Start an interactive chat session'); 58ad38c5fdSAndreas Gohr 598c8b7ba6SAndreas Gohr $options->registerCommand('info', 'Get Info about the vector storage'); 608c8b7ba6SAndreas Gohr 61ad38c5fdSAndreas Gohr $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 62ad38c5fdSAndreas Gohr $options->registerArgument('page', 'The page to split', true, 'split'); 635786be46SAndreas Gohr 6401f06932SAndreas Gohr $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 6501f06932SAndreas Gohr $options->registerArgument('page', 'The page to check', true, 'page'); 6601f06932SAndreas Gohr 678c8b7ba6SAndreas Gohr $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' . 688c8b7ba6SAndreas Gohr ' Not supported on all storages.'); 698c8b7ba6SAndreas Gohr $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv'); 708c8b7ba6SAndreas Gohr $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv'); 718c8b7ba6SAndreas Gohr 728817535bSAndreas Gohr } 738817535bSAndreas Gohr 748817535bSAndreas Gohr /** @inheritDoc */ 758817535bSAndreas Gohr protected function main(Options $options) 768817535bSAndreas Gohr { 773379af09SAndreas Gohr ini_set('memory_limit', -1); 788817535bSAndreas Gohr switch ($options->getCmd()) { 798817535bSAndreas Gohr 808817535bSAndreas Gohr case 'embed': 815284515dSAndreas Gohr $this->createEmbeddings($options->getOpt('clear')); 828817535bSAndreas Gohr break; 833379af09SAndreas Gohr case 'maintenance': 843379af09SAndreas Gohr $this->runMaintenance(); 853379af09SAndreas Gohr break; 868817535bSAndreas Gohr case 'similar': 878817535bSAndreas Gohr $this->similar($options->getArgs()[0]); 888817535bSAndreas Gohr break; 897552f1aaSAndreas Gohr case 'ask': 907552f1aaSAndreas Gohr $this->ask($options->getArgs()[0]); 917552f1aaSAndreas Gohr break; 92c4584168SAndreas Gohr case 'chat': 93c4584168SAndreas Gohr $this->chat(); 94c4584168SAndreas Gohr break; 95ad38c5fdSAndreas Gohr case 'split': 96ad38c5fdSAndreas Gohr $this->split($options->getArgs()[0]); 97ad38c5fdSAndreas Gohr break; 9801f06932SAndreas Gohr case 'page': 9901f06932SAndreas Gohr $this->page($options->getArgs()[0]); 10001f06932SAndreas Gohr break; 1015786be46SAndreas Gohr case 'info': 102f6ef2e50SAndreas Gohr $this->showinfo(); 1035786be46SAndreas Gohr break; 1048c8b7ba6SAndreas Gohr case 'tsv': 1058c8b7ba6SAndreas Gohr $args = $options->getArgs(); 1068c8b7ba6SAndreas Gohr $vector = $args[0] ?? 'vector.tsv'; 1078c8b7ba6SAndreas Gohr $meta = $args[1] ?? 'meta.tsv'; 1088c8b7ba6SAndreas Gohr $this->tsv($vector, $meta); 1098c8b7ba6SAndreas Gohr break; 1108817535bSAndreas Gohr default: 1118817535bSAndreas Gohr echo $options->help(); 1128817535bSAndreas Gohr } 1138817535bSAndreas Gohr } 1148817535bSAndreas Gohr 115c4584168SAndreas Gohr /** 1165786be46SAndreas Gohr * @return void 1175786be46SAndreas Gohr */ 118f6ef2e50SAndreas Gohr protected function showinfo() 1195786be46SAndreas Gohr { 1203379af09SAndreas Gohr $stats = [ 1213379af09SAndreas Gohr 'model' => $this->getConf('model'), 1223379af09SAndreas Gohr ]; 1233379af09SAndreas Gohr $stats = array_merge($stats, $this->helper->getStorage()->statistics()); 1243379af09SAndreas Gohr $this->printTable($stats); 1257ee8b02dSAndreas Gohr } 126911314cdSAndreas Gohr 1273379af09SAndreas Gohr /** 1283379af09SAndreas Gohr * Print key value data as tabular data 1293379af09SAndreas Gohr * 1303379af09SAndreas Gohr * @param array $data 1313379af09SAndreas Gohr * @param int $level 1323379af09SAndreas Gohr * @return void 1333379af09SAndreas Gohr */ 1343379af09SAndreas Gohr protected function printTable($data, $level = 0) 1353379af09SAndreas Gohr { 1363379af09SAndreas Gohr $tf = new TableFormatter($this->colors); 1373379af09SAndreas Gohr foreach ($data as $key => $value) { 1383379af09SAndreas Gohr if (is_array($value)) { 1393379af09SAndreas Gohr echo $tf->format( 1403379af09SAndreas Gohr [$level * 2, 15, '*'], 1413379af09SAndreas Gohr ['', $key, ''], 1423379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 1433379af09SAndreas Gohr ); 1443379af09SAndreas Gohr $this->printTable($value, $level + 1); 1453379af09SAndreas Gohr } else { 1463379af09SAndreas Gohr echo $tf->format( 1473379af09SAndreas Gohr [$level * 2, 15, '*'], 1483379af09SAndreas Gohr ['', $key, $value], 1493379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY] 1503379af09SAndreas Gohr ); 1513379af09SAndreas Gohr } 1523379af09SAndreas Gohr } 1535786be46SAndreas Gohr } 1545786be46SAndreas Gohr 1555786be46SAndreas Gohr /** 15601f06932SAndreas Gohr * Check chunk availability for a given page 15701f06932SAndreas Gohr * 15801f06932SAndreas Gohr * @param string $page 15901f06932SAndreas Gohr * @return void 16001f06932SAndreas Gohr */ 16101f06932SAndreas Gohr protected function page($page) 16201f06932SAndreas Gohr { 16301f06932SAndreas Gohr $indexer = new Indexer(); 16401f06932SAndreas Gohr $pages = $indexer->getPages(); 16501f06932SAndreas Gohr $pos = array_search(cleanID($page), $pages); 16601f06932SAndreas Gohr 16701f06932SAndreas Gohr if ($pos === false) { 16801f06932SAndreas Gohr $this->error('Page not found'); 16901f06932SAndreas Gohr return; 17001f06932SAndreas Gohr } 17101f06932SAndreas Gohr 17201f06932SAndreas Gohr $storage = $this->helper->getStorage(); 17301f06932SAndreas Gohr $chunks = $storage->getPageChunks($page, $pos * 100); 17401f06932SAndreas Gohr if ($chunks) { 17501f06932SAndreas Gohr $this->success('Found ' . count($chunks) . ' chunks'); 17601f06932SAndreas Gohr } else { 17701f06932SAndreas Gohr $this->error('No chunks found'); 17801f06932SAndreas Gohr } 17901f06932SAndreas Gohr } 18001f06932SAndreas Gohr 18101f06932SAndreas Gohr /** 182ad38c5fdSAndreas Gohr * Split the given page into chunks and print them 183ad38c5fdSAndreas Gohr * 184ad38c5fdSAndreas Gohr * @param string $page 185ad38c5fdSAndreas Gohr * @return void 186ad38c5fdSAndreas Gohr * @throws Exception 187ad38c5fdSAndreas Gohr */ 188ad38c5fdSAndreas Gohr protected function split($page) 189ad38c5fdSAndreas Gohr { 190ad38c5fdSAndreas Gohr $text = rawWiki($page); 191ad38c5fdSAndreas Gohr $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 192ad38c5fdSAndreas Gohr foreach ($chunks as $chunk) { 193ad38c5fdSAndreas Gohr echo $chunk; 194ad38c5fdSAndreas Gohr echo "\n"; 195ad38c5fdSAndreas Gohr $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 196ad38c5fdSAndreas Gohr } 197ad38c5fdSAndreas Gohr $this->success('Split into ' . count($chunks) . ' chunks'); 198ad38c5fdSAndreas Gohr } 199ad38c5fdSAndreas Gohr 200ad38c5fdSAndreas Gohr /** 201c4584168SAndreas Gohr * Interactive Chat Session 202c4584168SAndreas Gohr * 203c4584168SAndreas Gohr * @return void 204c4584168SAndreas Gohr * @throws Exception 205c4584168SAndreas Gohr */ 206c4584168SAndreas Gohr protected function chat() 207c4584168SAndreas Gohr { 208c4584168SAndreas Gohr $history = []; 209c4584168SAndreas Gohr while ($q = $this->readLine('Your Question')) { 210f6ef2e50SAndreas Gohr $this->helper->getModel()->resetUsageStats(); 211f6ef2e50SAndreas Gohr $result = $this->helper->askChatQuestion($q, $history); 212f6ef2e50SAndreas Gohr $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 213f6ef2e50SAndreas Gohr $history[] = [$result['question'], $result['answer']]; 214c4584168SAndreas Gohr $this->printAnswer($result); 215c4584168SAndreas Gohr } 216c4584168SAndreas Gohr } 217c4584168SAndreas Gohr 218c4584168SAndreas Gohr /** 219c4584168SAndreas Gohr * Handle a single, standalone question 220c4584168SAndreas Gohr * 221c4584168SAndreas Gohr * @param string $query 222c4584168SAndreas Gohr * @return void 223c4584168SAndreas Gohr * @throws Exception 224c4584168SAndreas Gohr */ 225c4584168SAndreas Gohr protected function ask($query) 226c4584168SAndreas Gohr { 2270337f47fSAndreas Gohr $result = $this->helper->askQuestion($query); 228c4584168SAndreas Gohr $this->printAnswer($result); 2297552f1aaSAndreas Gohr } 2307552f1aaSAndreas Gohr 231c4584168SAndreas Gohr /** 232c4584168SAndreas Gohr * Get the pages that are similar to the query 233c4584168SAndreas Gohr * 234c4584168SAndreas Gohr * @param string $query 235c4584168SAndreas Gohr * @return void 236c4584168SAndreas Gohr */ 2378817535bSAndreas Gohr protected function similar($query) 2388817535bSAndreas Gohr { 239*e33a1d7aSAndreas Gohr $langlimit = $this->helper->getLanguageLimit(); 240*e33a1d7aSAndreas Gohr if ($langlimit) { 241*e33a1d7aSAndreas Gohr $this->info('Limiting results to {lang}', ['lang' => $langlimit]); 242*e33a1d7aSAndreas Gohr } 243*e33a1d7aSAndreas Gohr 244*e33a1d7aSAndreas Gohr $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit); 245f6ef2e50SAndreas Gohr $this->printSources($sources); 2468817535bSAndreas Gohr } 2478817535bSAndreas Gohr 248c4584168SAndreas Gohr /** 2493379af09SAndreas Gohr * Run the maintenance tasks 2503379af09SAndreas Gohr * 2513379af09SAndreas Gohr * @return void 2523379af09SAndreas Gohr */ 2533379af09SAndreas Gohr protected function runMaintenance() 2543379af09SAndreas Gohr { 2553379af09SAndreas Gohr $start = time(); 2563379af09SAndreas Gohr $this->helper->getStorage()->runMaintenance(); 2573379af09SAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 2583379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 2593379af09SAndreas Gohr } 2603379af09SAndreas Gohr 2613379af09SAndreas Gohr /** 262c4584168SAndreas Gohr * Recreate chunks and embeddings for all pages 263c4584168SAndreas Gohr * 264c4584168SAndreas Gohr * @return void 265ad38c5fdSAndreas Gohr * @todo make skip regex configurable 266c4584168SAndreas Gohr */ 2675284515dSAndreas Gohr protected function createEmbeddings($clear) 2688817535bSAndreas Gohr { 2693379af09SAndreas Gohr $start = time(); 2705284515dSAndreas Gohr $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear); 271ad38c5fdSAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 2723379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 2738817535bSAndreas Gohr } 2748817535bSAndreas Gohr 275c4584168SAndreas Gohr /** 2768c8b7ba6SAndreas Gohr * Dump TSV files for debugging 2778c8b7ba6SAndreas Gohr * 2788c8b7ba6SAndreas Gohr * @return void 2798c8b7ba6SAndreas Gohr */ 2808c8b7ba6SAndreas Gohr protected function tsv($vector, $meta) 2818c8b7ba6SAndreas Gohr { 2828c8b7ba6SAndreas Gohr 2838c8b7ba6SAndreas Gohr $storage = $this->helper->getStorage(); 2848c8b7ba6SAndreas Gohr $storage->dumpTSV($vector, $meta); 2858c8b7ba6SAndreas Gohr $this->success('written to ' . $vector . ' and ' . $meta); 2868c8b7ba6SAndreas Gohr } 2878c8b7ba6SAndreas Gohr 2888c8b7ba6SAndreas Gohr /** 28955392016SAndreas Gohr * Print the given detailed answer in a nice way 29055392016SAndreas Gohr * 29155392016SAndreas Gohr * @param array $answer 29255392016SAndreas Gohr * @return void 29355392016SAndreas Gohr */ 29455392016SAndreas Gohr protected function printAnswer($answer) 29555392016SAndreas Gohr { 29655392016SAndreas Gohr $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 29755392016SAndreas Gohr echo "\n"; 298f6ef2e50SAndreas Gohr $this->printSources($answer['sources']); 29955392016SAndreas Gohr echo "\n"; 30055392016SAndreas Gohr $this->printUsage(); 30155392016SAndreas Gohr } 30255392016SAndreas Gohr 30355392016SAndreas Gohr /** 304f6ef2e50SAndreas Gohr * Print the given sources 305f6ef2e50SAndreas Gohr * 306f6ef2e50SAndreas Gohr * @param Chunk[] $sources 307f6ef2e50SAndreas Gohr * @return void 308f6ef2e50SAndreas Gohr */ 309f6ef2e50SAndreas Gohr protected function printSources($sources) 310f6ef2e50SAndreas Gohr { 311f6ef2e50SAndreas Gohr foreach ($sources as $source) { 312f6ef2e50SAndreas Gohr /** @var Chunk $source */ 3139b3d1b36SAndreas Gohr $this->colors->ptln( 3149b3d1b36SAndreas Gohr "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 3159b3d1b36SAndreas Gohr Colors::C_LIGHTBLUE 3169b3d1b36SAndreas Gohr ); 317f6ef2e50SAndreas Gohr } 318f6ef2e50SAndreas Gohr } 319f6ef2e50SAndreas Gohr 320f6ef2e50SAndreas Gohr /** 32155392016SAndreas Gohr * Print the usage statistics for OpenAI 32255392016SAndreas Gohr * 32355392016SAndreas Gohr * @return void 32455392016SAndreas Gohr */ 325f6ef2e50SAndreas Gohr protected function printUsage() 326f6ef2e50SAndreas Gohr { 32755392016SAndreas Gohr $this->info( 328f6ef2e50SAndreas Gohr 'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.', 329f6ef2e50SAndreas Gohr $this->helper->getModel()->getUsageStats() 33055392016SAndreas Gohr ); 33155392016SAndreas Gohr } 33255392016SAndreas Gohr 33355392016SAndreas Gohr /** 334c4584168SAndreas Gohr * Interactively ask for a value from the user 335c4584168SAndreas Gohr * 336c4584168SAndreas Gohr * @param string $prompt 337c4584168SAndreas Gohr * @return string 338c4584168SAndreas Gohr */ 339c4584168SAndreas Gohr protected function readLine($prompt) 340c4584168SAndreas Gohr { 341c4584168SAndreas Gohr $value = ''; 3428817535bSAndreas Gohr 343c4584168SAndreas Gohr while ($value === '') { 344c4584168SAndreas Gohr echo $prompt; 345c4584168SAndreas Gohr echo ': '; 346c4584168SAndreas Gohr 347c4584168SAndreas Gohr $fh = fopen('php://stdin', 'r'); 348c4584168SAndreas Gohr $value = trim(fgets($fh)); 349c4584168SAndreas Gohr fclose($fh); 350c4584168SAndreas Gohr } 351c4584168SAndreas Gohr 352c4584168SAndreas Gohr return $value; 353c4584168SAndreas Gohr } 3548817535bSAndreas Gohr} 355