18817535bSAndreas Gohr<?php 28817535bSAndreas Gohr 3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 4f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 501f06932SAndreas Gohruse dokuwiki\Search\Indexer; 6c4584168SAndreas Gohruse splitbrain\phpcli\Colors; 78817535bSAndreas Gohruse splitbrain\phpcli\Options; 8*3379af09SAndreas Gohruse splitbrain\phpcli\TableFormatter; 98817535bSAndreas Gohr 108817535bSAndreas Gohr 118817535bSAndreas Gohr/** 128817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component) 138817535bSAndreas Gohr * 148817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 158817535bSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 168817535bSAndreas Gohr */ 17f6ef2e50SAndreas Gohrclass cli_plugin_aichat extends CLIPlugin 188817535bSAndreas Gohr{ 190337f47fSAndreas Gohr /** @var helper_plugin_aichat */ 200337f47fSAndreas Gohr protected $helper; 210337f47fSAndreas Gohr 220337f47fSAndreas Gohr public function __construct($autocatch = true) 230337f47fSAndreas Gohr { 240337f47fSAndreas Gohr parent::__construct($autocatch); 250337f47fSAndreas Gohr $this->helper = plugin_load('helper', 'aichat'); 26*3379af09SAndreas Gohr $this->helper->setLogger($this); 270337f47fSAndreas Gohr } 280337f47fSAndreas Gohr 298817535bSAndreas Gohr /** @inheritDoc */ 308817535bSAndreas Gohr protected function setup(Options $options) 318817535bSAndreas Gohr { 32bddd899cSAndreas Gohr $options->useCompactHelp(); 33bddd899cSAndreas Gohr 345284515dSAndreas Gohr $options->setHelp( 355284515dSAndreas Gohr 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 365284515dSAndreas Gohr 'This may incur costs.' 375284515dSAndreas Gohr ); 388817535bSAndreas Gohr 395284515dSAndreas Gohr $options->registerCommand( 405284515dSAndreas Gohr 'embed', 415284515dSAndreas Gohr 'Create embeddings for all pages. This skips pages that already have embeddings' 425284515dSAndreas Gohr ); 435284515dSAndreas Gohr $options->registerOption( 445284515dSAndreas Gohr 'clear', 455284515dSAndreas Gohr 'Clear all existing embeddings before creating new ones', 465284515dSAndreas Gohr 'c', false, 'embed' 475284515dSAndreas Gohr ); 488817535bSAndreas Gohr 49*3379af09SAndreas Gohr $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.'); 50*3379af09SAndreas Gohr 518817535bSAndreas Gohr $options->registerCommand('similar', 'Search for similar pages'); 528817535bSAndreas Gohr $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 538817535bSAndreas Gohr 548817535bSAndreas Gohr $options->registerCommand('ask', 'Ask a question'); 558817535bSAndreas Gohr $options->registerArgument('question', 'The question to ask', true, 'ask'); 56c4584168SAndreas Gohr 57c4584168SAndreas Gohr $options->registerCommand('chat', 'Start an interactive chat session'); 58ad38c5fdSAndreas Gohr 59ad38c5fdSAndreas Gohr $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 60ad38c5fdSAndreas Gohr $options->registerArgument('page', 'The page to split', true, 'split'); 615786be46SAndreas Gohr 6201f06932SAndreas Gohr $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 6301f06932SAndreas Gohr $options->registerArgument('page', 'The page to check', true, 'page'); 6401f06932SAndreas Gohr 65bddd899cSAndreas Gohr $options->registerCommand('info', 'Get Info about the vector storage'); 668817535bSAndreas Gohr } 678817535bSAndreas Gohr 688817535bSAndreas Gohr /** @inheritDoc */ 698817535bSAndreas Gohr protected function main(Options $options) 708817535bSAndreas Gohr { 71*3379af09SAndreas Gohr ini_set('memory_limit', -1); 728817535bSAndreas Gohr switch ($options->getCmd()) { 738817535bSAndreas Gohr 748817535bSAndreas Gohr case 'embed': 755284515dSAndreas Gohr $this->createEmbeddings($options->getOpt('clear')); 768817535bSAndreas Gohr break; 77*3379af09SAndreas Gohr case 'maintenance': 78*3379af09SAndreas Gohr $this->runMaintenance(); 79*3379af09SAndreas Gohr break; 808817535bSAndreas Gohr case 'similar': 818817535bSAndreas Gohr $this->similar($options->getArgs()[0]); 828817535bSAndreas Gohr break; 837552f1aaSAndreas Gohr case 'ask': 847552f1aaSAndreas Gohr $this->ask($options->getArgs()[0]); 857552f1aaSAndreas Gohr break; 86c4584168SAndreas Gohr case 'chat': 87c4584168SAndreas Gohr $this->chat(); 88c4584168SAndreas Gohr break; 89ad38c5fdSAndreas Gohr case 'split': 90ad38c5fdSAndreas Gohr $this->split($options->getArgs()[0]); 91ad38c5fdSAndreas Gohr break; 9201f06932SAndreas Gohr case 'page': 9301f06932SAndreas Gohr $this->page($options->getArgs()[0]); 9401f06932SAndreas Gohr break; 955786be46SAndreas Gohr case 'info': 96f6ef2e50SAndreas Gohr $this->showinfo(); 975786be46SAndreas Gohr break; 988817535bSAndreas Gohr default: 998817535bSAndreas Gohr echo $options->help(); 1008817535bSAndreas Gohr } 1018817535bSAndreas Gohr } 1028817535bSAndreas Gohr 103c4584168SAndreas Gohr /** 1045786be46SAndreas Gohr * @return void 1055786be46SAndreas Gohr */ 106f6ef2e50SAndreas Gohr protected function showinfo() 1075786be46SAndreas Gohr { 108*3379af09SAndreas Gohr 109*3379af09SAndreas Gohr $stats = [ 110*3379af09SAndreas Gohr 'model' => $this->getConf('model'), 111*3379af09SAndreas Gohr ]; 112*3379af09SAndreas Gohr $stats = array_merge($stats, $this->helper->getStorage()->statistics()); 113*3379af09SAndreas Gohr $this->printTable($stats); 1147ee8b02dSAndreas Gohr } 115911314cdSAndreas Gohr 116*3379af09SAndreas Gohr /** 117*3379af09SAndreas Gohr * Print key value data as tabular data 118*3379af09SAndreas Gohr * 119*3379af09SAndreas Gohr * @param array $data 120*3379af09SAndreas Gohr * @param int $level 121*3379af09SAndreas Gohr * @return void 122*3379af09SAndreas Gohr */ 123*3379af09SAndreas Gohr protected function printTable($data, $level = 0) 124*3379af09SAndreas Gohr { 125*3379af09SAndreas Gohr $tf = new TableFormatter($this->colors); 126*3379af09SAndreas Gohr foreach ($data as $key => $value) { 127*3379af09SAndreas Gohr if (is_array($value)) { 128*3379af09SAndreas Gohr echo $tf->format( 129*3379af09SAndreas Gohr [$level * 2, 15, '*'], 130*3379af09SAndreas Gohr ['', $key, ''], 131*3379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE] 132*3379af09SAndreas Gohr ); 133*3379af09SAndreas Gohr $this->printTable($value, $level + 1); 134*3379af09SAndreas Gohr } else { 135*3379af09SAndreas Gohr echo $tf->format( 136*3379af09SAndreas Gohr [$level * 2, 15, '*'], 137*3379af09SAndreas Gohr ['', $key, $value], 138*3379af09SAndreas Gohr [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY] 139*3379af09SAndreas Gohr ); 140*3379af09SAndreas Gohr } 141*3379af09SAndreas Gohr } 1425786be46SAndreas Gohr } 1435786be46SAndreas Gohr 1445786be46SAndreas Gohr /** 14501f06932SAndreas Gohr * Check chunk availability for a given page 14601f06932SAndreas Gohr * 14701f06932SAndreas Gohr * @param string $page 14801f06932SAndreas Gohr * @return void 14901f06932SAndreas Gohr */ 15001f06932SAndreas Gohr protected function page($page) 15101f06932SAndreas Gohr { 15201f06932SAndreas Gohr $indexer = new Indexer(); 15301f06932SAndreas Gohr $pages = $indexer->getPages(); 15401f06932SAndreas Gohr $pos = array_search(cleanID($page), $pages); 15501f06932SAndreas Gohr 15601f06932SAndreas Gohr if ($pos === false) { 15701f06932SAndreas Gohr $this->error('Page not found'); 15801f06932SAndreas Gohr return; 15901f06932SAndreas Gohr } 16001f06932SAndreas Gohr 16101f06932SAndreas Gohr $storage = $this->helper->getStorage(); 16201f06932SAndreas Gohr $chunks = $storage->getPageChunks($page, $pos * 100); 16301f06932SAndreas Gohr if ($chunks) { 16401f06932SAndreas Gohr $this->success('Found ' . count($chunks) . ' chunks'); 16501f06932SAndreas Gohr } else { 16601f06932SAndreas Gohr $this->error('No chunks found'); 16701f06932SAndreas Gohr } 16801f06932SAndreas Gohr } 16901f06932SAndreas Gohr 17001f06932SAndreas Gohr /** 171ad38c5fdSAndreas Gohr * Split the given page into chunks and print them 172ad38c5fdSAndreas Gohr * 173ad38c5fdSAndreas Gohr * @param string $page 174ad38c5fdSAndreas Gohr * @return void 175ad38c5fdSAndreas Gohr * @throws Exception 176ad38c5fdSAndreas Gohr */ 177ad38c5fdSAndreas Gohr protected function split($page) 178ad38c5fdSAndreas Gohr { 179ad38c5fdSAndreas Gohr $text = rawWiki($page); 180ad38c5fdSAndreas Gohr $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 181ad38c5fdSAndreas Gohr foreach ($chunks as $chunk) { 182ad38c5fdSAndreas Gohr echo $chunk; 183ad38c5fdSAndreas Gohr echo "\n"; 184ad38c5fdSAndreas Gohr $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 185ad38c5fdSAndreas Gohr } 186ad38c5fdSAndreas Gohr $this->success('Split into ' . count($chunks) . ' chunks'); 187ad38c5fdSAndreas Gohr } 188ad38c5fdSAndreas Gohr 189ad38c5fdSAndreas Gohr /** 190c4584168SAndreas Gohr * Interactive Chat Session 191c4584168SAndreas Gohr * 192c4584168SAndreas Gohr * @return void 193c4584168SAndreas Gohr * @throws Exception 194c4584168SAndreas Gohr */ 195c4584168SAndreas Gohr protected function chat() 196c4584168SAndreas Gohr { 197c4584168SAndreas Gohr $history = []; 198c4584168SAndreas Gohr while ($q = $this->readLine('Your Question')) { 199f6ef2e50SAndreas Gohr $this->helper->getModel()->resetUsageStats(); 200f6ef2e50SAndreas Gohr $result = $this->helper->askChatQuestion($q, $history); 201f6ef2e50SAndreas Gohr $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 202f6ef2e50SAndreas Gohr $history[] = [$result['question'], $result['answer']]; 203c4584168SAndreas Gohr $this->printAnswer($result); 204c4584168SAndreas Gohr } 205c4584168SAndreas Gohr } 206c4584168SAndreas Gohr 207c4584168SAndreas Gohr /** 208c4584168SAndreas Gohr * Handle a single, standalone question 209c4584168SAndreas Gohr * 210c4584168SAndreas Gohr * @param string $query 211c4584168SAndreas Gohr * @return void 212c4584168SAndreas Gohr * @throws Exception 213c4584168SAndreas Gohr */ 214c4584168SAndreas Gohr protected function ask($query) 215c4584168SAndreas Gohr { 2160337f47fSAndreas Gohr $result = $this->helper->askQuestion($query); 217c4584168SAndreas Gohr $this->printAnswer($result); 2187552f1aaSAndreas Gohr } 2197552f1aaSAndreas Gohr 220c4584168SAndreas Gohr /** 221c4584168SAndreas Gohr * Get the pages that are similar to the query 222c4584168SAndreas Gohr * 223c4584168SAndreas Gohr * @param string $query 224c4584168SAndreas Gohr * @return void 225c4584168SAndreas Gohr */ 2268817535bSAndreas Gohr protected function similar($query) 2278817535bSAndreas Gohr { 2280337f47fSAndreas Gohr $sources = $this->helper->getEmbeddings()->getSimilarChunks($query); 229f6ef2e50SAndreas Gohr $this->printSources($sources); 2308817535bSAndreas Gohr } 2318817535bSAndreas Gohr 232c4584168SAndreas Gohr /** 233*3379af09SAndreas Gohr * Run the maintenance tasks 234*3379af09SAndreas Gohr * 235*3379af09SAndreas Gohr * @return void 236*3379af09SAndreas Gohr */ 237*3379af09SAndreas Gohr protected function runMaintenance() 238*3379af09SAndreas Gohr { 239*3379af09SAndreas Gohr $start = time(); 240*3379af09SAndreas Gohr $this->helper->getStorage()->runMaintenance(); 241*3379af09SAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 242*3379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 243*3379af09SAndreas Gohr } 244*3379af09SAndreas Gohr 245*3379af09SAndreas Gohr /** 246c4584168SAndreas Gohr * Recreate chunks and embeddings for all pages 247c4584168SAndreas Gohr * 248c4584168SAndreas Gohr * @return void 249ad38c5fdSAndreas Gohr * @todo make skip regex configurable 250c4584168SAndreas Gohr */ 2515284515dSAndreas Gohr protected function createEmbeddings($clear) 2528817535bSAndreas Gohr { 253*3379af09SAndreas Gohr $start = time(); 2545284515dSAndreas Gohr $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear); 255ad38c5fdSAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 256*3379af09SAndreas Gohr $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]); 2578817535bSAndreas Gohr } 2588817535bSAndreas Gohr 259c4584168SAndreas Gohr /** 26055392016SAndreas Gohr * Print the given detailed answer in a nice way 26155392016SAndreas Gohr * 26255392016SAndreas Gohr * @param array $answer 26355392016SAndreas Gohr * @return void 26455392016SAndreas Gohr */ 26555392016SAndreas Gohr protected function printAnswer($answer) 26655392016SAndreas Gohr { 26755392016SAndreas Gohr $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 26855392016SAndreas Gohr echo "\n"; 269f6ef2e50SAndreas Gohr $this->printSources($answer['sources']); 27055392016SAndreas Gohr echo "\n"; 27155392016SAndreas Gohr $this->printUsage(); 27255392016SAndreas Gohr } 27355392016SAndreas Gohr 27455392016SAndreas Gohr /** 275f6ef2e50SAndreas Gohr * Print the given sources 276f6ef2e50SAndreas Gohr * 277f6ef2e50SAndreas Gohr * @param Chunk[] $sources 278f6ef2e50SAndreas Gohr * @return void 279f6ef2e50SAndreas Gohr */ 280f6ef2e50SAndreas Gohr protected function printSources($sources) 281f6ef2e50SAndreas Gohr { 282f6ef2e50SAndreas Gohr foreach ($sources as $source) { 283f6ef2e50SAndreas Gohr /** @var Chunk $source */ 2849b3d1b36SAndreas Gohr $this->colors->ptln( 2859b3d1b36SAndreas Gohr "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 2869b3d1b36SAndreas Gohr Colors::C_LIGHTBLUE 2879b3d1b36SAndreas Gohr ); 288f6ef2e50SAndreas Gohr } 289f6ef2e50SAndreas Gohr } 290f6ef2e50SAndreas Gohr 291f6ef2e50SAndreas Gohr /** 29255392016SAndreas Gohr * Print the usage statistics for OpenAI 29355392016SAndreas Gohr * 29455392016SAndreas Gohr * @return void 29555392016SAndreas Gohr */ 296f6ef2e50SAndreas Gohr protected function printUsage() 297f6ef2e50SAndreas Gohr { 29855392016SAndreas Gohr $this->info( 299f6ef2e50SAndreas Gohr 'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.', 300f6ef2e50SAndreas Gohr $this->helper->getModel()->getUsageStats() 30155392016SAndreas Gohr ); 30255392016SAndreas Gohr } 30355392016SAndreas Gohr 30455392016SAndreas Gohr /** 305c4584168SAndreas Gohr * Interactively ask for a value from the user 306c4584168SAndreas Gohr * 307c4584168SAndreas Gohr * @param string $prompt 308c4584168SAndreas Gohr * @return string 309c4584168SAndreas Gohr */ 310c4584168SAndreas Gohr protected function readLine($prompt) 311c4584168SAndreas Gohr { 312c4584168SAndreas Gohr $value = ''; 3138817535bSAndreas Gohr 314c4584168SAndreas Gohr while ($value === '') { 315c4584168SAndreas Gohr echo $prompt; 316c4584168SAndreas Gohr echo ': '; 317c4584168SAndreas Gohr 318c4584168SAndreas Gohr $fh = fopen('php://stdin', 'r'); 319c4584168SAndreas Gohr $value = trim(fgets($fh)); 320c4584168SAndreas Gohr fclose($fh); 321c4584168SAndreas Gohr } 322c4584168SAndreas Gohr 323c4584168SAndreas Gohr return $value; 324c4584168SAndreas Gohr } 3258817535bSAndreas Gohr} 3268817535bSAndreas Gohr 327