18817535bSAndreas Gohr<?php 28817535bSAndreas Gohr 3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 4f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 5*01f06932SAndreas Gohruse dokuwiki\Search\Indexer; 6c4584168SAndreas Gohruse splitbrain\phpcli\Colors; 78817535bSAndreas Gohruse splitbrain\phpcli\Options; 88817535bSAndreas Gohr 98817535bSAndreas Gohr 108817535bSAndreas Gohr/** 118817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component) 128817535bSAndreas Gohr * 138817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 148817535bSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 158817535bSAndreas Gohr */ 16f6ef2e50SAndreas Gohrclass cli_plugin_aichat extends CLIPlugin 178817535bSAndreas Gohr{ 180337f47fSAndreas Gohr /** @var helper_plugin_aichat */ 190337f47fSAndreas Gohr protected $helper; 200337f47fSAndreas Gohr 210337f47fSAndreas Gohr public function __construct($autocatch = true) 220337f47fSAndreas Gohr { 230337f47fSAndreas Gohr parent::__construct($autocatch); 240337f47fSAndreas Gohr $this->helper = plugin_load('helper', 'aichat'); 252ecc089aSAndreas Gohr $this->helper->getEmbeddings()->setLogger($this); 260337f47fSAndreas Gohr } 270337f47fSAndreas Gohr 288817535bSAndreas Gohr /** @inheritDoc */ 298817535bSAndreas Gohr protected function setup(Options $options) 308817535bSAndreas Gohr { 31bddd899cSAndreas Gohr $options->useCompactHelp(); 32bddd899cSAndreas Gohr 335284515dSAndreas Gohr $options->setHelp( 345284515dSAndreas Gohr 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 355284515dSAndreas Gohr 'This may incur costs.' 365284515dSAndreas Gohr ); 378817535bSAndreas Gohr 385284515dSAndreas Gohr $options->registerCommand( 395284515dSAndreas Gohr 'embed', 405284515dSAndreas Gohr 'Create embeddings for all pages. This skips pages that already have embeddings' 415284515dSAndreas Gohr ); 425284515dSAndreas Gohr $options->registerOption( 435284515dSAndreas Gohr 'clear', 445284515dSAndreas Gohr 'Clear all existing embeddings before creating new ones', 455284515dSAndreas Gohr 'c', false, 'embed' 465284515dSAndreas Gohr ); 478817535bSAndreas Gohr 488817535bSAndreas Gohr $options->registerCommand('similar', 'Search for similar pages'); 498817535bSAndreas Gohr $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 508817535bSAndreas Gohr 518817535bSAndreas Gohr $options->registerCommand('ask', 'Ask a question'); 528817535bSAndreas Gohr $options->registerArgument('question', 'The question to ask', true, 'ask'); 53c4584168SAndreas Gohr 54c4584168SAndreas Gohr $options->registerCommand('chat', 'Start an interactive chat session'); 55ad38c5fdSAndreas Gohr 56ad38c5fdSAndreas Gohr $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 57ad38c5fdSAndreas Gohr $options->registerArgument('page', 'The page to split', true, 'split'); 585786be46SAndreas Gohr 59*01f06932SAndreas Gohr $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 60*01f06932SAndreas Gohr $options->registerArgument('page', 'The page to check', true, 'page'); 61*01f06932SAndreas Gohr 62bddd899cSAndreas Gohr $options->registerCommand('info', 'Get Info about the vector storage'); 638817535bSAndreas Gohr } 648817535bSAndreas Gohr 658817535bSAndreas Gohr /** @inheritDoc */ 668817535bSAndreas Gohr protected function main(Options $options) 678817535bSAndreas Gohr { 688817535bSAndreas Gohr switch ($options->getCmd()) { 698817535bSAndreas Gohr 708817535bSAndreas Gohr case 'embed': 715284515dSAndreas Gohr $this->createEmbeddings($options->getOpt('clear')); 728817535bSAndreas Gohr break; 738817535bSAndreas Gohr case 'similar': 748817535bSAndreas Gohr $this->similar($options->getArgs()[0]); 758817535bSAndreas Gohr break; 767552f1aaSAndreas Gohr case 'ask': 777552f1aaSAndreas Gohr $this->ask($options->getArgs()[0]); 787552f1aaSAndreas Gohr break; 79c4584168SAndreas Gohr case 'chat': 80c4584168SAndreas Gohr $this->chat(); 81c4584168SAndreas Gohr break; 82ad38c5fdSAndreas Gohr case 'split': 83ad38c5fdSAndreas Gohr $this->split($options->getArgs()[0]); 84ad38c5fdSAndreas Gohr break; 85*01f06932SAndreas Gohr case 'page': 86*01f06932SAndreas Gohr $this->page($options->getArgs()[0]); 87*01f06932SAndreas Gohr break; 885786be46SAndreas Gohr case 'info': 89f6ef2e50SAndreas Gohr $this->showinfo(); 905786be46SAndreas Gohr break; 918817535bSAndreas Gohr default: 928817535bSAndreas Gohr echo $options->help(); 938817535bSAndreas Gohr } 948817535bSAndreas Gohr } 958817535bSAndreas Gohr 96c4584168SAndreas Gohr /** 975786be46SAndreas Gohr * @return void 985786be46SAndreas Gohr */ 99f6ef2e50SAndreas Gohr protected function showinfo() 1005786be46SAndreas Gohr { 101f6ef2e50SAndreas Gohr echo 'model: ' . $this->getConf('model') . "\n"; 102*01f06932SAndreas Gohr $stats = $this->helper->getStorage()->statistics(); 1037ee8b02dSAndreas Gohr foreach ($stats as $key => $value) { 1047ee8b02dSAndreas Gohr echo $key . ': ' . $value . "\n"; 1057ee8b02dSAndreas Gohr } 106911314cdSAndreas Gohr 107911314cdSAndreas Gohr //echo $this->helper->getModel()->listUpstreamModels(); 1085786be46SAndreas Gohr } 1095786be46SAndreas Gohr 1105786be46SAndreas Gohr /** 111*01f06932SAndreas Gohr * Check chunk availability for a given page 112*01f06932SAndreas Gohr * 113*01f06932SAndreas Gohr * @param string $page 114*01f06932SAndreas Gohr * @return void 115*01f06932SAndreas Gohr */ 116*01f06932SAndreas Gohr protected function page($page) 117*01f06932SAndreas Gohr { 118*01f06932SAndreas Gohr $indexer = new Indexer(); 119*01f06932SAndreas Gohr $pages = $indexer->getPages(); 120*01f06932SAndreas Gohr $pos = array_search(cleanID($page), $pages); 121*01f06932SAndreas Gohr 122*01f06932SAndreas Gohr if ($pos === false) { 123*01f06932SAndreas Gohr $this->error('Page not found'); 124*01f06932SAndreas Gohr return; 125*01f06932SAndreas Gohr } 126*01f06932SAndreas Gohr 127*01f06932SAndreas Gohr $storage = $this->helper->getStorage(); 128*01f06932SAndreas Gohr $chunks = $storage->getPageChunks($page, $pos * 100); 129*01f06932SAndreas Gohr if ($chunks) { 130*01f06932SAndreas Gohr $this->success('Found ' . count($chunks) . ' chunks'); 131*01f06932SAndreas Gohr } else { 132*01f06932SAndreas Gohr $this->error('No chunks found'); 133*01f06932SAndreas Gohr } 134*01f06932SAndreas Gohr } 135*01f06932SAndreas Gohr 136*01f06932SAndreas Gohr /** 137ad38c5fdSAndreas Gohr * Split the given page into chunks and print them 138ad38c5fdSAndreas Gohr * 139ad38c5fdSAndreas Gohr * @param string $page 140ad38c5fdSAndreas Gohr * @return void 141ad38c5fdSAndreas Gohr * @throws Exception 142ad38c5fdSAndreas Gohr */ 143ad38c5fdSAndreas Gohr protected function split($page) 144ad38c5fdSAndreas Gohr { 145ad38c5fdSAndreas Gohr $text = rawWiki($page); 146ad38c5fdSAndreas Gohr $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 147ad38c5fdSAndreas Gohr foreach ($chunks as $chunk) { 148ad38c5fdSAndreas Gohr echo $chunk; 149ad38c5fdSAndreas Gohr echo "\n"; 150ad38c5fdSAndreas Gohr $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 151ad38c5fdSAndreas Gohr } 152ad38c5fdSAndreas Gohr $this->success('Split into ' . count($chunks) . ' chunks'); 153ad38c5fdSAndreas Gohr } 154ad38c5fdSAndreas Gohr 155ad38c5fdSAndreas Gohr /** 156c4584168SAndreas Gohr * Interactive Chat Session 157c4584168SAndreas Gohr * 158c4584168SAndreas Gohr * @return void 159c4584168SAndreas Gohr * @throws Exception 160c4584168SAndreas Gohr */ 161c4584168SAndreas Gohr protected function chat() 162c4584168SAndreas Gohr { 163c4584168SAndreas Gohr $history = []; 164c4584168SAndreas Gohr while ($q = $this->readLine('Your Question')) { 165f6ef2e50SAndreas Gohr $this->helper->getModel()->resetUsageStats(); 166f6ef2e50SAndreas Gohr $result = $this->helper->askChatQuestion($q, $history); 167f6ef2e50SAndreas Gohr $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 168f6ef2e50SAndreas Gohr $history[] = [$result['question'], $result['answer']]; 169c4584168SAndreas Gohr $this->printAnswer($result); 170c4584168SAndreas Gohr } 171c4584168SAndreas Gohr } 172c4584168SAndreas Gohr 173c4584168SAndreas Gohr /** 174c4584168SAndreas Gohr * Handle a single, standalone question 175c4584168SAndreas Gohr * 176c4584168SAndreas Gohr * @param string $query 177c4584168SAndreas Gohr * @return void 178c4584168SAndreas Gohr * @throws Exception 179c4584168SAndreas Gohr */ 180c4584168SAndreas Gohr protected function ask($query) 181c4584168SAndreas Gohr { 1820337f47fSAndreas Gohr $result = $this->helper->askQuestion($query); 183c4584168SAndreas Gohr $this->printAnswer($result); 1847552f1aaSAndreas Gohr } 1857552f1aaSAndreas Gohr 186c4584168SAndreas Gohr /** 187c4584168SAndreas Gohr * Get the pages that are similar to the query 188c4584168SAndreas Gohr * 189c4584168SAndreas Gohr * @param string $query 190c4584168SAndreas Gohr * @return void 191c4584168SAndreas Gohr */ 1928817535bSAndreas Gohr protected function similar($query) 1938817535bSAndreas Gohr { 1940337f47fSAndreas Gohr $sources = $this->helper->getEmbeddings()->getSimilarChunks($query); 195f6ef2e50SAndreas Gohr $this->printSources($sources); 1968817535bSAndreas Gohr } 1978817535bSAndreas Gohr 198c4584168SAndreas Gohr /** 199c4584168SAndreas Gohr * Recreate chunks and embeddings for all pages 200c4584168SAndreas Gohr * 201c4584168SAndreas Gohr * @return void 202ad38c5fdSAndreas Gohr * @todo make skip regex configurable 203c4584168SAndreas Gohr */ 2045284515dSAndreas Gohr protected function createEmbeddings($clear) 2058817535bSAndreas Gohr { 206ad38c5fdSAndreas Gohr ini_set('memory_limit', -1); // we may need a lot of memory here 2075284515dSAndreas Gohr $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear); 208ad38c5fdSAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 2098817535bSAndreas Gohr } 2108817535bSAndreas Gohr 211c4584168SAndreas Gohr /** 21255392016SAndreas Gohr * Print the given detailed answer in a nice way 21355392016SAndreas Gohr * 21455392016SAndreas Gohr * @param array $answer 21555392016SAndreas Gohr * @return void 21655392016SAndreas Gohr */ 21755392016SAndreas Gohr protected function printAnswer($answer) 21855392016SAndreas Gohr { 21955392016SAndreas Gohr $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 22055392016SAndreas Gohr echo "\n"; 221f6ef2e50SAndreas Gohr $this->printSources($answer['sources']); 22255392016SAndreas Gohr echo "\n"; 22355392016SAndreas Gohr $this->printUsage(); 22455392016SAndreas Gohr } 22555392016SAndreas Gohr 22655392016SAndreas Gohr /** 227f6ef2e50SAndreas Gohr * Print the given sources 228f6ef2e50SAndreas Gohr * 229f6ef2e50SAndreas Gohr * @param Chunk[] $sources 230f6ef2e50SAndreas Gohr * @return void 231f6ef2e50SAndreas Gohr */ 232f6ef2e50SAndreas Gohr protected function printSources($sources) 233f6ef2e50SAndreas Gohr { 234f6ef2e50SAndreas Gohr foreach ($sources as $source) { 235f6ef2e50SAndreas Gohr /** @var Chunk $source */ 2369b3d1b36SAndreas Gohr $this->colors->ptln( 2379b3d1b36SAndreas Gohr "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 2389b3d1b36SAndreas Gohr Colors::C_LIGHTBLUE 2399b3d1b36SAndreas Gohr ); 240f6ef2e50SAndreas Gohr } 241f6ef2e50SAndreas Gohr } 242f6ef2e50SAndreas Gohr 243f6ef2e50SAndreas Gohr /** 24455392016SAndreas Gohr * Print the usage statistics for OpenAI 24555392016SAndreas Gohr * 24655392016SAndreas Gohr * @return void 24755392016SAndreas Gohr */ 248f6ef2e50SAndreas Gohr protected function printUsage() 249f6ef2e50SAndreas Gohr { 25055392016SAndreas Gohr $this->info( 251f6ef2e50SAndreas Gohr 'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.', 252f6ef2e50SAndreas Gohr $this->helper->getModel()->getUsageStats() 25355392016SAndreas Gohr ); 25455392016SAndreas Gohr } 25555392016SAndreas Gohr 25655392016SAndreas Gohr /** 257c4584168SAndreas Gohr * Interactively ask for a value from the user 258c4584168SAndreas Gohr * 259c4584168SAndreas Gohr * @param string $prompt 260c4584168SAndreas Gohr * @return string 261c4584168SAndreas Gohr */ 262c4584168SAndreas Gohr protected function readLine($prompt) 263c4584168SAndreas Gohr { 264c4584168SAndreas Gohr $value = ''; 2658817535bSAndreas Gohr 266c4584168SAndreas Gohr while ($value === '') { 267c4584168SAndreas Gohr echo $prompt; 268c4584168SAndreas Gohr echo ': '; 269c4584168SAndreas Gohr 270c4584168SAndreas Gohr $fh = fopen('php://stdin', 'r'); 271c4584168SAndreas Gohr $value = trim(fgets($fh)); 272c4584168SAndreas Gohr fclose($fh); 273c4584168SAndreas Gohr } 274c4584168SAndreas Gohr 275c4584168SAndreas Gohr return $value; 276c4584168SAndreas Gohr } 2778817535bSAndreas Gohr} 2788817535bSAndreas Gohr 279