18817535bSAndreas Gohr<?php 28817535bSAndreas Gohr 3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 4f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 501f06932SAndreas Gohruse dokuwiki\Search\Indexer; 6c4584168SAndreas Gohruse splitbrain\phpcli\Colors; 78817535bSAndreas Gohruse splitbrain\phpcli\Options; 83379af09SAndreas Gohruse splitbrain\phpcli\TableFormatter; 98817535bSAndreas Gohr 108817535bSAndreas Gohr/** 118817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component) 128817535bSAndreas Gohr * 138817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 148817535bSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 158817535bSAndreas Gohr */ 16f6ef2e50SAndreas Gohrclass cli_plugin_aichat extends CLIPlugin 178817535bSAndreas Gohr{ 180337f47fSAndreas Gohr /** @var helper_plugin_aichat */ 190337f47fSAndreas Gohr protected $helper; 200337f47fSAndreas Gohr 210337f47fSAndreas Gohr public function __construct($autocatch = true) 220337f47fSAndreas Gohr { 230337f47fSAndreas Gohr parent::__construct($autocatch); 240337f47fSAndreas Gohr $this->helper = plugin_load('helper', 'aichat'); 253379af09SAndreas Gohr $this->helper->setLogger($this); 260337f47fSAndreas Gohr } 270337f47fSAndreas Gohr 288817535bSAndreas Gohr /** @inheritDoc */ 298817535bSAndreas Gohr protected function setup(Options $options) 308817535bSAndreas Gohr { 31bddd899cSAndreas Gohr $options->useCompactHelp(); 32bddd899cSAndreas Gohr 335284515dSAndreas Gohr $options->setHelp( 345284515dSAndreas Gohr 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 355284515dSAndreas Gohr 'This may incur costs.' 365284515dSAndreas Gohr ); 378817535bSAndreas Gohr 385284515dSAndreas Gohr $options->registerCommand( 395284515dSAndreas Gohr 'embed', 405284515dSAndreas Gohr 'Create embeddings for all pages. This skips pages that already have embeddings' 415284515dSAndreas Gohr ); 425284515dSAndreas Gohr $options->registerOption( 435284515dSAndreas Gohr 'clear', 445284515dSAndreas Gohr 'Clear all existing embeddings before creating new ones', 457ebc7895Ssplitbrain 'c', 467ebc7895Ssplitbrain false, 477ebc7895Ssplitbrain 'embed' 485284515dSAndreas Gohr ); 498817535bSAndreas Gohr 503379af09SAndreas Gohr $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.'); 513379af09SAndreas Gohr 528817535bSAndreas Gohr $options->registerCommand('similar', 'Search for similar pages'); 538817535bSAndreas Gohr $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 548817535bSAndreas Gohr 558817535bSAndreas Gohr $options->registerCommand('ask', 'Ask a question'); 568817535bSAndreas Gohr $options->registerArgument('question', 'The question to ask', true, 'ask'); 57c4584168SAndreas Gohr 58c4584168SAndreas Gohr $options->registerCommand('chat', 'Start an interactive chat session'); 59ad38c5fdSAndreas Gohr 608c8b7ba6SAndreas Gohr $options->registerCommand('info', 'Get Info about the vector storage'); 618c8b7ba6SAndreas Gohr 62ad38c5fdSAndreas Gohr $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 63ad38c5fdSAndreas Gohr $options->registerArgument('page', 'The page to split', true, 'split'); 645786be46SAndreas Gohr 6501f06932SAndreas Gohr $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 6601f06932SAndreas Gohr $options->registerArgument('page', 'The page to check', true, 'page'); 67*dc355d57SAndreas Gohr $options->registerOption('dump', 'Dump the chunks', 'd', false, 'page'); 6801f06932SAndreas Gohr 698c8b7ba6SAndreas Gohr $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' . 708c8b7ba6SAndreas Gohr ' Not supported on all storages.'); 718c8b7ba6SAndreas Gohr $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv'); 728c8b7ba6SAndreas Gohr $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv'); 738817535bSAndreas Gohr } 748817535bSAndreas Gohr 758817535bSAndreas Gohr /** @inheritDoc */ 768817535bSAndreas Gohr protected function main(Options $options) 778817535bSAndreas Gohr { 783379af09SAndreas Gohr ini_set('memory_limit', -1); 798817535bSAndreas Gohr switch ($options->getCmd()) { 808817535bSAndreas Gohr case 'embed': 815284515dSAndreas Gohr $this->createEmbeddings($options->getOpt('clear')); 828817535bSAndreas Gohr break; 833379af09SAndreas Gohr case 'maintenance': 843379af09SAndreas Gohr $this->runMaintenance(); 853379af09SAndreas Gohr break; 868817535bSAndreas Gohr case 'similar': 878817535bSAndreas Gohr $this->similar($options->getArgs()[0]); 888817535bSAndreas Gohr break; 897552f1aaSAndreas Gohr case 'ask': 907552f1aaSAndreas Gohr $this->ask($options->getArgs()[0]); 917552f1aaSAndreas Gohr break; 92c4584168SAndreas Gohr case 'chat': 93c4584168SAndreas Gohr $this->chat(); 94c4584168SAndreas Gohr break; 95ad38c5fdSAndreas Gohr case 'split': 96ad38c5fdSAndreas Gohr $this->split($options->getArgs()[0]); 97ad38c5fdSAndreas Gohr break; 9801f06932SAndreas Gohr case 'page': 99*dc355d57SAndreas Gohr $this->page($options->getArgs()[0], $options->getOpt('dump')); 10001f06932SAndreas Gohr break; 1015786be46SAndreas Gohr case 'info': 102f6ef2e50SAndreas Gohr $this->showinfo(); 1035786be46SAndreas Gohr break; 1048c8b7ba6SAndreas Gohr case 'tsv': 1058c8b7ba6SAndreas Gohr $args = $options->getArgs(); 1068c8b7ba6SAndreas Gohr $vector = $args[0] ?? 'vector.tsv'; 1078c8b7ba6SAndreas Gohr $meta = $args[1] ?? 'meta.tsv'; 1088c8b7ba6SAndreas Gohr $this->tsv($vector, $meta); 1098c8b7ba6SAndreas Gohr break; 1108817535bSAndreas Gohr default: 1118817535bSAndreas Gohr echo $options->help(); 1128817535bSAndreas Gohr } 1138817535bSAndreas Gohr } 1148817535bSAndreas Gohr 115c4584168SAndreas Gohr /** 1165786be46SAndreas Gohr * @return void 1175786be46SAndreas Gohr */ 118f6ef2e50SAndreas Gohr protected function showinfo() 1195786be46SAndreas Gohr { 1203379af09SAndreas Gohr $stats = [ 1213379af09SAndreas Gohr 'model' => $this->getConf('model'), 1223379af09SAndreas Gohr ]; 1233379af09SAndreas Gohr $stats = array_merge($stats, $this->helper->getStorage()->statistics()); 1243379af09SAndreas Gohr $this->printTable($stats); 1257ee8b02dSAndreas Gohr } 126911314cdSAndreas Gohr 1273379af09SAndreas Gohr /** 1283379af09SAndreas Gohr * Print key value data as tabular data 1293379af09SAndreas Gohr * 1303379af09SAndreas Gohr * @param array $data 1313379af09SAndreas Gohr * @param int $level 1323379af09SAndreas Gohr * @return void 1333379af09SAndreas Gohr */ 1343379af09SAndreas Gohr protected function printTable($data, $level = 0) 1353379af09SAndreas Gohr { 1363379af09SAndreas Gohr $tf = new TableFormatter($this->colors); 1373379af09SAndreas Gohr foreach ($data as $key => $value) { 1383379af09SAndreas Gohr if (is_array($value)) { 1393379af09SAndreas Gohr echo $tf->format( 1403379af09SAndreas Gohr [$level * 2, 15, '*'], 1413379af09SAndreas Gohr ['', $key, ''], 1423379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 1433379af09SAndreas Gohr ); 1443379af09SAndreas Gohr $this->printTable($value, $level + 1); 1453379af09SAndreas Gohr } else { 1463379af09SAndreas Gohr echo $tf->format( 1473379af09SAndreas Gohr [$level * 2, 15, '*'], 1483379af09SAndreas Gohr ['', $key, $value], 1493379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY] 1503379af09SAndreas Gohr ); 1513379af09SAndreas Gohr } 1523379af09SAndreas Gohr } 1535786be46SAndreas Gohr } 1545786be46SAndreas Gohr 1555786be46SAndreas Gohr /** 15601f06932SAndreas Gohr * Check chunk availability for a given page 15701f06932SAndreas Gohr * 15801f06932SAndreas Gohr * @param string $page 15901f06932SAndreas Gohr * @return void 16001f06932SAndreas Gohr */ 161*dc355d57SAndreas Gohr protected function page($page, $dump = false) 16201f06932SAndreas Gohr { 16301f06932SAndreas Gohr $indexer = new Indexer(); 16401f06932SAndreas Gohr $pages = $indexer->getPages(); 16501f06932SAndreas Gohr $pos = array_search(cleanID($page), $pages); 16601f06932SAndreas Gohr 16701f06932SAndreas Gohr if ($pos === false) { 16801f06932SAndreas Gohr $this->error('Page not found'); 16901f06932SAndreas Gohr return; 17001f06932SAndreas Gohr } 17101f06932SAndreas Gohr 17201f06932SAndreas Gohr $storage = $this->helper->getStorage(); 17301f06932SAndreas Gohr $chunks = $storage->getPageChunks($page, $pos * 100); 17401f06932SAndreas Gohr if ($chunks) { 17501f06932SAndreas Gohr $this->success('Found ' . count($chunks) . ' chunks'); 176*dc355d57SAndreas Gohr if ($dump) { 177*dc355d57SAndreas Gohr echo json_encode($chunks, JSON_PRETTY_PRINT); 178*dc355d57SAndreas Gohr } 17901f06932SAndreas Gohr } else { 18001f06932SAndreas Gohr $this->error('No chunks found'); 18101f06932SAndreas Gohr } 18201f06932SAndreas Gohr } 18301f06932SAndreas Gohr 18401f06932SAndreas Gohr /** 185ad38c5fdSAndreas Gohr * Split the given page into chunks and print them 186ad38c5fdSAndreas Gohr * 187ad38c5fdSAndreas Gohr * @param string $page 188ad38c5fdSAndreas Gohr * @return void 189ad38c5fdSAndreas Gohr * @throws Exception 190ad38c5fdSAndreas Gohr */ 191ad38c5fdSAndreas Gohr protected function split($page) 192ad38c5fdSAndreas Gohr { 193ad38c5fdSAndreas Gohr $text = rawWiki($page); 194ad38c5fdSAndreas Gohr $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 195ad38c5fdSAndreas Gohr foreach ($chunks as $chunk) { 196ad38c5fdSAndreas Gohr echo $chunk; 197ad38c5fdSAndreas Gohr echo "\n"; 198ad38c5fdSAndreas Gohr $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 199ad38c5fdSAndreas Gohr } 200ad38c5fdSAndreas Gohr $this->success('Split into ' . count($chunks) . ' chunks'); 201ad38c5fdSAndreas Gohr } 202ad38c5fdSAndreas Gohr 203ad38c5fdSAndreas Gohr /** 204c4584168SAndreas Gohr * Interactive Chat Session 205c4584168SAndreas Gohr * 206c4584168SAndreas Gohr * @return void 207c4584168SAndreas Gohr * @throws Exception 208c4584168SAndreas Gohr */ 209c4584168SAndreas Gohr protected function chat() 210c4584168SAndreas Gohr { 211c4584168SAndreas Gohr $history = []; 212c4584168SAndreas Gohr while ($q = $this->readLine('Your Question')) { 213f6ef2e50SAndreas Gohr $this->helper->getModel()->resetUsageStats(); 214f6ef2e50SAndreas Gohr $result = $this->helper->askChatQuestion($q, $history); 215f6ef2e50SAndreas Gohr $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 216f6ef2e50SAndreas Gohr $history[] = [$result['question'], $result['answer']]; 217c4584168SAndreas Gohr $this->printAnswer($result); 218c4584168SAndreas Gohr } 219c4584168SAndreas Gohr } 220c4584168SAndreas Gohr 221c4584168SAndreas Gohr /** 222c4584168SAndreas Gohr * Handle a single, standalone question 223c4584168SAndreas Gohr * 224c4584168SAndreas Gohr * @param string $query 225c4584168SAndreas Gohr * @return void 226c4584168SAndreas Gohr * @throws Exception 227c4584168SAndreas Gohr */ 228c4584168SAndreas Gohr protected function ask($query) 229c4584168SAndreas Gohr { 2300337f47fSAndreas Gohr $result = $this->helper->askQuestion($query); 231c4584168SAndreas Gohr $this->printAnswer($result); 2327552f1aaSAndreas Gohr } 2337552f1aaSAndreas Gohr 234c4584168SAndreas Gohr /** 235c4584168SAndreas Gohr * Get the pages that are similar to the query 236c4584168SAndreas Gohr * 237c4584168SAndreas Gohr * @param string $query 238c4584168SAndreas Gohr * @return void 239c4584168SAndreas Gohr */ 2408817535bSAndreas Gohr protected function similar($query) 2418817535bSAndreas Gohr { 242e33a1d7aSAndreas Gohr $langlimit = $this->helper->getLanguageLimit(); 243e33a1d7aSAndreas Gohr if ($langlimit) { 244e33a1d7aSAndreas Gohr $this->info('Limiting results to {lang}', ['lang' => $langlimit]); 245e33a1d7aSAndreas Gohr } 246e33a1d7aSAndreas Gohr 247e33a1d7aSAndreas Gohr $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit); 248f6ef2e50SAndreas Gohr $this->printSources($sources); 2498817535bSAndreas Gohr } 2508817535bSAndreas Gohr 251c4584168SAndreas Gohr /** 2523379af09SAndreas Gohr * Run the maintenance tasks 2533379af09SAndreas Gohr * 2543379af09SAndreas Gohr * @return void 2553379af09SAndreas Gohr */ 2563379af09SAndreas Gohr protected function runMaintenance() 2573379af09SAndreas Gohr { 2583379af09SAndreas Gohr $start = time(); 2593379af09SAndreas Gohr $this->helper->getStorage()->runMaintenance(); 2603379af09SAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 2613379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 2623379af09SAndreas Gohr } 2633379af09SAndreas Gohr 2643379af09SAndreas Gohr /** 265c4584168SAndreas Gohr * Recreate chunks and embeddings for all pages 266c4584168SAndreas Gohr * 267c4584168SAndreas Gohr * @return void 268ad38c5fdSAndreas Gohr * @todo make skip regex configurable 269c4584168SAndreas Gohr */ 2705284515dSAndreas Gohr protected function createEmbeddings($clear) 2718817535bSAndreas Gohr { 2723379af09SAndreas Gohr $start = time(); 2735284515dSAndreas Gohr $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear); 274ad38c5fdSAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 2753379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 2768817535bSAndreas Gohr } 2778817535bSAndreas Gohr 278c4584168SAndreas Gohr /** 2798c8b7ba6SAndreas Gohr * Dump TSV files for debugging 2808c8b7ba6SAndreas Gohr * 2818c8b7ba6SAndreas Gohr * @return void 2828c8b7ba6SAndreas Gohr */ 2838c8b7ba6SAndreas Gohr protected function tsv($vector, $meta) 2848c8b7ba6SAndreas Gohr { 2858c8b7ba6SAndreas Gohr 2868c8b7ba6SAndreas Gohr $storage = $this->helper->getStorage(); 2878c8b7ba6SAndreas Gohr $storage->dumpTSV($vector, $meta); 2888c8b7ba6SAndreas Gohr $this->success('written to ' . $vector . ' and ' . $meta); 2898c8b7ba6SAndreas Gohr } 2908c8b7ba6SAndreas Gohr 2918c8b7ba6SAndreas Gohr /** 29255392016SAndreas Gohr * Print the given detailed answer in a nice way 29355392016SAndreas Gohr * 29455392016SAndreas Gohr * @param array $answer 29555392016SAndreas Gohr * @return void 29655392016SAndreas Gohr */ 29755392016SAndreas Gohr protected function printAnswer($answer) 29855392016SAndreas Gohr { 29955392016SAndreas Gohr $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 30055392016SAndreas Gohr echo "\n"; 301f6ef2e50SAndreas Gohr $this->printSources($answer['sources']); 30255392016SAndreas Gohr echo "\n"; 30355392016SAndreas Gohr $this->printUsage(); 30455392016SAndreas Gohr } 30555392016SAndreas Gohr 30655392016SAndreas Gohr /** 307f6ef2e50SAndreas Gohr * Print the given sources 308f6ef2e50SAndreas Gohr * 309f6ef2e50SAndreas Gohr * @param Chunk[] $sources 310f6ef2e50SAndreas Gohr * @return void 311f6ef2e50SAndreas Gohr */ 312f6ef2e50SAndreas Gohr protected function printSources($sources) 313f6ef2e50SAndreas Gohr { 314f6ef2e50SAndreas Gohr foreach ($sources as $source) { 315f6ef2e50SAndreas Gohr /** @var Chunk $source */ 3169b3d1b36SAndreas Gohr $this->colors->ptln( 3179b3d1b36SAndreas Gohr "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 3189b3d1b36SAndreas Gohr Colors::C_LIGHTBLUE 3199b3d1b36SAndreas Gohr ); 320f6ef2e50SAndreas Gohr } 321f6ef2e50SAndreas Gohr } 322f6ef2e50SAndreas Gohr 323f6ef2e50SAndreas Gohr /** 32455392016SAndreas Gohr * Print the usage statistics for OpenAI 32555392016SAndreas Gohr * 32655392016SAndreas Gohr * @return void 32755392016SAndreas Gohr */ 328f6ef2e50SAndreas Gohr protected function printUsage() 329f6ef2e50SAndreas Gohr { 33055392016SAndreas Gohr $this->info( 331f6ef2e50SAndreas Gohr 'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.', 332f6ef2e50SAndreas Gohr $this->helper->getModel()->getUsageStats() 33355392016SAndreas Gohr ); 33455392016SAndreas Gohr } 33555392016SAndreas Gohr 33655392016SAndreas Gohr /** 337c4584168SAndreas Gohr * Interactively ask for a value from the user 338c4584168SAndreas Gohr * 339c4584168SAndreas Gohr * @param string $prompt 340c4584168SAndreas Gohr * @return string 341c4584168SAndreas Gohr */ 342c4584168SAndreas Gohr protected function readLine($prompt) 343c4584168SAndreas Gohr { 344c4584168SAndreas Gohr $value = ''; 3458817535bSAndreas Gohr 346c4584168SAndreas Gohr while ($value === '') { 347c4584168SAndreas Gohr echo $prompt; 348c4584168SAndreas Gohr echo ': '; 349c4584168SAndreas Gohr 350c4584168SAndreas Gohr $fh = fopen('php://stdin', 'r'); 351c4584168SAndreas Gohr $value = trim(fgets($fh)); 352c4584168SAndreas Gohr fclose($fh); 353c4584168SAndreas Gohr } 354c4584168SAndreas Gohr 355c4584168SAndreas Gohr return $value; 356c4584168SAndreas Gohr } 3578817535bSAndreas Gohr} 358