18817535bSAndreas Gohr<?php 28817535bSAndreas Gohr 3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 4f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 501f06932SAndreas Gohruse dokuwiki\Search\Indexer; 6c4584168SAndreas Gohruse splitbrain\phpcli\Colors; 78817535bSAndreas Gohruse splitbrain\phpcli\Options; 83379af09SAndreas Gohruse splitbrain\phpcli\TableFormatter; 98817535bSAndreas Gohr 108817535bSAndreas Gohr 118817535bSAndreas Gohr/** 128817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component) 138817535bSAndreas Gohr * 148817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 158817535bSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 168817535bSAndreas Gohr */ 17f6ef2e50SAndreas Gohrclass cli_plugin_aichat extends CLIPlugin 188817535bSAndreas Gohr{ 190337f47fSAndreas Gohr /** @var helper_plugin_aichat */ 200337f47fSAndreas Gohr protected $helper; 210337f47fSAndreas Gohr 220337f47fSAndreas Gohr public function __construct($autocatch = true) 230337f47fSAndreas Gohr { 240337f47fSAndreas Gohr parent::__construct($autocatch); 250337f47fSAndreas Gohr $this->helper = plugin_load('helper', 'aichat'); 263379af09SAndreas Gohr $this->helper->setLogger($this); 270337f47fSAndreas Gohr } 280337f47fSAndreas Gohr 298817535bSAndreas Gohr /** @inheritDoc */ 308817535bSAndreas Gohr protected function setup(Options $options) 318817535bSAndreas Gohr { 32bddd899cSAndreas Gohr $options->useCompactHelp(); 33bddd899cSAndreas Gohr 345284515dSAndreas Gohr $options->setHelp( 355284515dSAndreas Gohr 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 365284515dSAndreas Gohr 'This may incur costs.' 375284515dSAndreas Gohr ); 388817535bSAndreas Gohr 395284515dSAndreas Gohr $options->registerCommand( 405284515dSAndreas Gohr 'embed', 415284515dSAndreas Gohr 'Create embeddings for all pages. This skips pages that already have embeddings' 425284515dSAndreas Gohr ); 435284515dSAndreas Gohr $options->registerOption( 445284515dSAndreas Gohr 'clear', 455284515dSAndreas Gohr 'Clear all existing embeddings before creating new ones', 465284515dSAndreas Gohr 'c', false, 'embed' 475284515dSAndreas Gohr ); 488817535bSAndreas Gohr 493379af09SAndreas Gohr $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.'); 503379af09SAndreas Gohr 518817535bSAndreas Gohr $options->registerCommand('similar', 'Search for similar pages'); 528817535bSAndreas Gohr $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 538817535bSAndreas Gohr 548817535bSAndreas Gohr $options->registerCommand('ask', 'Ask a question'); 558817535bSAndreas Gohr $options->registerArgument('question', 'The question to ask', true, 'ask'); 56c4584168SAndreas Gohr 57c4584168SAndreas Gohr $options->registerCommand('chat', 'Start an interactive chat session'); 58ad38c5fdSAndreas Gohr 59*8c8b7ba6SAndreas Gohr $options->registerCommand('info', 'Get Info about the vector storage'); 60*8c8b7ba6SAndreas Gohr 61ad38c5fdSAndreas Gohr $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 62ad38c5fdSAndreas Gohr $options->registerArgument('page', 'The page to split', true, 'split'); 635786be46SAndreas Gohr 6401f06932SAndreas Gohr $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 6501f06932SAndreas Gohr $options->registerArgument('page', 'The page to check', true, 'page'); 6601f06932SAndreas Gohr 67*8c8b7ba6SAndreas Gohr $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' . 68*8c8b7ba6SAndreas Gohr ' Not supported on all storages.'); 69*8c8b7ba6SAndreas Gohr $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv'); 70*8c8b7ba6SAndreas Gohr $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv'); 71*8c8b7ba6SAndreas Gohr 728817535bSAndreas Gohr } 738817535bSAndreas Gohr 748817535bSAndreas Gohr /** @inheritDoc */ 758817535bSAndreas Gohr protected function main(Options $options) 768817535bSAndreas Gohr { 773379af09SAndreas Gohr ini_set('memory_limit', -1); 788817535bSAndreas Gohr switch ($options->getCmd()) { 798817535bSAndreas Gohr 808817535bSAndreas Gohr case 'embed': 815284515dSAndreas Gohr $this->createEmbeddings($options->getOpt('clear')); 828817535bSAndreas Gohr break; 833379af09SAndreas Gohr case 'maintenance': 843379af09SAndreas Gohr $this->runMaintenance(); 853379af09SAndreas Gohr break; 868817535bSAndreas Gohr case 'similar': 878817535bSAndreas Gohr $this->similar($options->getArgs()[0]); 888817535bSAndreas Gohr break; 897552f1aaSAndreas Gohr case 'ask': 907552f1aaSAndreas Gohr $this->ask($options->getArgs()[0]); 917552f1aaSAndreas Gohr break; 92c4584168SAndreas Gohr case 'chat': 93c4584168SAndreas Gohr $this->chat(); 94c4584168SAndreas Gohr break; 95ad38c5fdSAndreas Gohr case 'split': 96ad38c5fdSAndreas Gohr $this->split($options->getArgs()[0]); 97ad38c5fdSAndreas Gohr break; 9801f06932SAndreas Gohr case 'page': 9901f06932SAndreas Gohr $this->page($options->getArgs()[0]); 10001f06932SAndreas Gohr break; 1015786be46SAndreas Gohr case 'info': 102f6ef2e50SAndreas Gohr $this->showinfo(); 1035786be46SAndreas Gohr break; 104*8c8b7ba6SAndreas Gohr case 'tsv': 105*8c8b7ba6SAndreas Gohr $args = $options->getArgs(); 106*8c8b7ba6SAndreas Gohr $vector = $args[0] ?? 'vector.tsv'; 107*8c8b7ba6SAndreas Gohr $meta = $args[1] ?? 'meta.tsv'; 108*8c8b7ba6SAndreas Gohr $this->tsv($vector, $meta); 109*8c8b7ba6SAndreas Gohr break; 1108817535bSAndreas Gohr default: 1118817535bSAndreas Gohr echo $options->help(); 1128817535bSAndreas Gohr } 1138817535bSAndreas Gohr } 1148817535bSAndreas Gohr 115c4584168SAndreas Gohr /** 1165786be46SAndreas Gohr * @return void 1175786be46SAndreas Gohr */ 118f6ef2e50SAndreas Gohr protected function showinfo() 1195786be46SAndreas Gohr { 1203379af09SAndreas Gohr $stats = [ 1213379af09SAndreas Gohr 'model' => $this->getConf('model'), 1223379af09SAndreas Gohr ]; 1233379af09SAndreas Gohr $stats = array_merge($stats, $this->helper->getStorage()->statistics()); 1243379af09SAndreas Gohr $this->printTable($stats); 1257ee8b02dSAndreas Gohr } 126911314cdSAndreas Gohr 1273379af09SAndreas Gohr /** 1283379af09SAndreas Gohr * Print key value data as tabular data 1293379af09SAndreas Gohr * 1303379af09SAndreas Gohr * @param array $data 1313379af09SAndreas Gohr * @param int $level 1323379af09SAndreas Gohr * @return void 1333379af09SAndreas Gohr */ 1343379af09SAndreas Gohr protected function printTable($data, $level = 0) 1353379af09SAndreas Gohr { 1363379af09SAndreas Gohr $tf = new TableFormatter($this->colors); 1373379af09SAndreas Gohr foreach ($data as $key => $value) { 1383379af09SAndreas Gohr if (is_array($value)) { 1393379af09SAndreas Gohr echo $tf->format( 1403379af09SAndreas Gohr [$level * 2, 15, '*'], 1413379af09SAndreas Gohr ['', $key, ''], 1423379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 1433379af09SAndreas Gohr ); 1443379af09SAndreas Gohr $this->printTable($value, $level + 1); 1453379af09SAndreas Gohr } else { 1463379af09SAndreas Gohr echo $tf->format( 1473379af09SAndreas Gohr [$level * 2, 15, '*'], 1483379af09SAndreas Gohr ['', $key, $value], 1493379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY] 1503379af09SAndreas Gohr ); 1513379af09SAndreas Gohr } 1523379af09SAndreas Gohr } 1535786be46SAndreas Gohr } 1545786be46SAndreas Gohr 1555786be46SAndreas Gohr /** 15601f06932SAndreas Gohr * Check chunk availability for a given page 15701f06932SAndreas Gohr * 15801f06932SAndreas Gohr * @param string $page 15901f06932SAndreas Gohr * @return void 16001f06932SAndreas Gohr */ 16101f06932SAndreas Gohr protected function page($page) 16201f06932SAndreas Gohr { 16301f06932SAndreas Gohr $indexer = new Indexer(); 16401f06932SAndreas Gohr $pages = $indexer->getPages(); 16501f06932SAndreas Gohr $pos = array_search(cleanID($page), $pages); 16601f06932SAndreas Gohr 16701f06932SAndreas Gohr if ($pos === false) { 16801f06932SAndreas Gohr $this->error('Page not found'); 16901f06932SAndreas Gohr return; 17001f06932SAndreas Gohr } 17101f06932SAndreas Gohr 17201f06932SAndreas Gohr $storage = $this->helper->getStorage(); 17301f06932SAndreas Gohr $chunks = $storage->getPageChunks($page, $pos * 100); 17401f06932SAndreas Gohr if ($chunks) { 17501f06932SAndreas Gohr $this->success('Found ' . count($chunks) . ' chunks'); 17601f06932SAndreas Gohr } else { 17701f06932SAndreas Gohr $this->error('No chunks found'); 17801f06932SAndreas Gohr } 17901f06932SAndreas Gohr } 18001f06932SAndreas Gohr 18101f06932SAndreas Gohr /** 182ad38c5fdSAndreas Gohr * Split the given page into chunks and print them 183ad38c5fdSAndreas Gohr * 184ad38c5fdSAndreas Gohr * @param string $page 185ad38c5fdSAndreas Gohr * @return void 186ad38c5fdSAndreas Gohr * @throws Exception 187ad38c5fdSAndreas Gohr */ 188ad38c5fdSAndreas Gohr protected function split($page) 189ad38c5fdSAndreas Gohr { 190ad38c5fdSAndreas Gohr $text = rawWiki($page); 191ad38c5fdSAndreas Gohr $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 192ad38c5fdSAndreas Gohr foreach ($chunks as $chunk) { 193ad38c5fdSAndreas Gohr echo $chunk; 194ad38c5fdSAndreas Gohr echo "\n"; 195ad38c5fdSAndreas Gohr $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 196ad38c5fdSAndreas Gohr } 197ad38c5fdSAndreas Gohr $this->success('Split into ' . count($chunks) . ' chunks'); 198ad38c5fdSAndreas Gohr } 199ad38c5fdSAndreas Gohr 200ad38c5fdSAndreas Gohr /** 201c4584168SAndreas Gohr * Interactive Chat Session 202c4584168SAndreas Gohr * 203c4584168SAndreas Gohr * @return void 204c4584168SAndreas Gohr * @throws Exception 205c4584168SAndreas Gohr */ 206c4584168SAndreas Gohr protected function chat() 207c4584168SAndreas Gohr { 208c4584168SAndreas Gohr $history = []; 209c4584168SAndreas Gohr while ($q = $this->readLine('Your Question')) { 210f6ef2e50SAndreas Gohr $this->helper->getModel()->resetUsageStats(); 211f6ef2e50SAndreas Gohr $result = $this->helper->askChatQuestion($q, $history); 212f6ef2e50SAndreas Gohr $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 213f6ef2e50SAndreas Gohr $history[] = [$result['question'], $result['answer']]; 214c4584168SAndreas Gohr $this->printAnswer($result); 215c4584168SAndreas Gohr } 216c4584168SAndreas Gohr } 217c4584168SAndreas Gohr 218c4584168SAndreas Gohr /** 219c4584168SAndreas Gohr * Handle a single, standalone question 220c4584168SAndreas Gohr * 221c4584168SAndreas Gohr * @param string $query 222c4584168SAndreas Gohr * @return void 223c4584168SAndreas Gohr * @throws Exception 224c4584168SAndreas Gohr */ 225c4584168SAndreas Gohr protected function ask($query) 226c4584168SAndreas Gohr { 2270337f47fSAndreas Gohr $result = $this->helper->askQuestion($query); 228c4584168SAndreas Gohr $this->printAnswer($result); 2297552f1aaSAndreas Gohr } 2307552f1aaSAndreas Gohr 231c4584168SAndreas Gohr /** 232c4584168SAndreas Gohr * Get the pages that are similar to the query 233c4584168SAndreas Gohr * 234c4584168SAndreas Gohr * @param string $query 235c4584168SAndreas Gohr * @return void 236c4584168SAndreas Gohr */ 2378817535bSAndreas Gohr protected function similar($query) 2388817535bSAndreas Gohr { 2390337f47fSAndreas Gohr $sources = $this->helper->getEmbeddings()->getSimilarChunks($query); 240f6ef2e50SAndreas Gohr $this->printSources($sources); 2418817535bSAndreas Gohr } 2428817535bSAndreas Gohr 243c4584168SAndreas Gohr /** 2443379af09SAndreas Gohr * Run the maintenance tasks 2453379af09SAndreas Gohr * 2463379af09SAndreas Gohr * @return void 2473379af09SAndreas Gohr */ 2483379af09SAndreas Gohr protected function runMaintenance() 2493379af09SAndreas Gohr { 2503379af09SAndreas Gohr $start = time(); 2513379af09SAndreas Gohr $this->helper->getStorage()->runMaintenance(); 2523379af09SAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 2533379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 2543379af09SAndreas Gohr } 2553379af09SAndreas Gohr 2563379af09SAndreas Gohr /** 257c4584168SAndreas Gohr * Recreate chunks and embeddings for all pages 258c4584168SAndreas Gohr * 259c4584168SAndreas Gohr * @return void 260ad38c5fdSAndreas Gohr * @todo make skip regex configurable 261c4584168SAndreas Gohr */ 2625284515dSAndreas Gohr protected function createEmbeddings($clear) 2638817535bSAndreas Gohr { 2643379af09SAndreas Gohr $start = time(); 2655284515dSAndreas Gohr $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear); 266ad38c5fdSAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 2673379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 2688817535bSAndreas Gohr } 2698817535bSAndreas Gohr 270c4584168SAndreas Gohr /** 271*8c8b7ba6SAndreas Gohr * Dump TSV files for debugging 272*8c8b7ba6SAndreas Gohr * 273*8c8b7ba6SAndreas Gohr * @return void 274*8c8b7ba6SAndreas Gohr */ 275*8c8b7ba6SAndreas Gohr protected function tsv($vector, $meta) 276*8c8b7ba6SAndreas Gohr { 277*8c8b7ba6SAndreas Gohr 278*8c8b7ba6SAndreas Gohr $storage = $this->helper->getStorage(); 279*8c8b7ba6SAndreas Gohr $storage->dumpTSV($vector, $meta); 280*8c8b7ba6SAndreas Gohr $this->success('written to ' . $vector . ' and ' . $meta); 281*8c8b7ba6SAndreas Gohr } 282*8c8b7ba6SAndreas Gohr 283*8c8b7ba6SAndreas Gohr /** 28455392016SAndreas Gohr * Print the given detailed answer in a nice way 28555392016SAndreas Gohr * 28655392016SAndreas Gohr * @param array $answer 28755392016SAndreas Gohr * @return void 28855392016SAndreas Gohr */ 28955392016SAndreas Gohr protected function printAnswer($answer) 29055392016SAndreas Gohr { 29155392016SAndreas Gohr $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 29255392016SAndreas Gohr echo "\n"; 293f6ef2e50SAndreas Gohr $this->printSources($answer['sources']); 29455392016SAndreas Gohr echo "\n"; 29555392016SAndreas Gohr $this->printUsage(); 29655392016SAndreas Gohr } 29755392016SAndreas Gohr 29855392016SAndreas Gohr /** 299f6ef2e50SAndreas Gohr * Print the given sources 300f6ef2e50SAndreas Gohr * 301f6ef2e50SAndreas Gohr * @param Chunk[] $sources 302f6ef2e50SAndreas Gohr * @return void 303f6ef2e50SAndreas Gohr */ 304f6ef2e50SAndreas Gohr protected function printSources($sources) 305f6ef2e50SAndreas Gohr { 306f6ef2e50SAndreas Gohr foreach ($sources as $source) { 307f6ef2e50SAndreas Gohr /** @var Chunk $source */ 3089b3d1b36SAndreas Gohr $this->colors->ptln( 3099b3d1b36SAndreas Gohr "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 3109b3d1b36SAndreas Gohr Colors::C_LIGHTBLUE 3119b3d1b36SAndreas Gohr ); 312f6ef2e50SAndreas Gohr } 313f6ef2e50SAndreas Gohr } 314f6ef2e50SAndreas Gohr 315f6ef2e50SAndreas Gohr /** 31655392016SAndreas Gohr * Print the usage statistics for OpenAI 31755392016SAndreas Gohr * 31855392016SAndreas Gohr * @return void 31955392016SAndreas Gohr */ 320f6ef2e50SAndreas Gohr protected function printUsage() 321f6ef2e50SAndreas Gohr { 32255392016SAndreas Gohr $this->info( 323f6ef2e50SAndreas Gohr 'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.', 324f6ef2e50SAndreas Gohr $this->helper->getModel()->getUsageStats() 32555392016SAndreas Gohr ); 32655392016SAndreas Gohr } 32755392016SAndreas Gohr 32855392016SAndreas Gohr /** 329c4584168SAndreas Gohr * Interactively ask for a value from the user 330c4584168SAndreas Gohr * 331c4584168SAndreas Gohr * @param string $prompt 332c4584168SAndreas Gohr * @return string 333c4584168SAndreas Gohr */ 334c4584168SAndreas Gohr protected function readLine($prompt) 335c4584168SAndreas Gohr { 336c4584168SAndreas Gohr $value = ''; 3378817535bSAndreas Gohr 338c4584168SAndreas Gohr while ($value === '') { 339c4584168SAndreas Gohr echo $prompt; 340c4584168SAndreas Gohr echo ': '; 341c4584168SAndreas Gohr 342c4584168SAndreas Gohr $fh = fopen('php://stdin', 'r'); 343c4584168SAndreas Gohr $value = trim(fgets($fh)); 344c4584168SAndreas Gohr fclose($fh); 345c4584168SAndreas Gohr } 346c4584168SAndreas Gohr 347c4584168SAndreas Gohr return $value; 348c4584168SAndreas Gohr } 3498817535bSAndreas Gohr} 350