18817535bSAndreas Gohr<?php 28817535bSAndreas Gohr 3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 4f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 5c4584168SAndreas Gohruse splitbrain\phpcli\Colors; 68817535bSAndreas Gohruse splitbrain\phpcli\Options; 78817535bSAndreas Gohr 88817535bSAndreas Gohr 98817535bSAndreas Gohr/** 108817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component) 118817535bSAndreas Gohr * 128817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 138817535bSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 148817535bSAndreas Gohr */ 15f6ef2e50SAndreas Gohrclass cli_plugin_aichat extends CLIPlugin 168817535bSAndreas Gohr{ 170337f47fSAndreas Gohr /** @var helper_plugin_aichat */ 180337f47fSAndreas Gohr protected $helper; 190337f47fSAndreas Gohr 200337f47fSAndreas Gohr public function __construct($autocatch = true) 210337f47fSAndreas Gohr { 220337f47fSAndreas Gohr parent::__construct($autocatch); 230337f47fSAndreas Gohr $this->helper = plugin_load('helper', 'aichat'); 242ecc089aSAndreas Gohr $this->helper->getEmbeddings()->setLogger($this); 250337f47fSAndreas Gohr } 260337f47fSAndreas Gohr 278817535bSAndreas Gohr /** @inheritDoc */ 288817535bSAndreas Gohr protected function setup(Options $options) 298817535bSAndreas Gohr { 30bddd899cSAndreas Gohr $options->useCompactHelp(); 31bddd899cSAndreas Gohr 325284515dSAndreas Gohr $options->setHelp( 335284515dSAndreas Gohr 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 345284515dSAndreas Gohr 'This may incur costs.' 355284515dSAndreas Gohr ); 368817535bSAndreas Gohr 375284515dSAndreas Gohr $options->registerCommand( 385284515dSAndreas Gohr 'embed', 395284515dSAndreas Gohr 'Create embeddings for all pages. This skips pages that already have embeddings' 405284515dSAndreas Gohr ); 415284515dSAndreas Gohr $options->registerOption( 425284515dSAndreas Gohr 'clear', 435284515dSAndreas Gohr 'Clear all existing embeddings before creating new ones', 445284515dSAndreas Gohr 'c', false, 'embed' 455284515dSAndreas Gohr ); 468817535bSAndreas Gohr 478817535bSAndreas Gohr $options->registerCommand('similar', 'Search for similar pages'); 488817535bSAndreas Gohr $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 498817535bSAndreas Gohr 508817535bSAndreas Gohr $options->registerCommand('ask', 'Ask a question'); 518817535bSAndreas Gohr $options->registerArgument('question', 'The question to ask', true, 'ask'); 52c4584168SAndreas Gohr 53c4584168SAndreas Gohr $options->registerCommand('chat', 'Start an interactive chat session'); 54ad38c5fdSAndreas Gohr 55ad38c5fdSAndreas Gohr $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 56ad38c5fdSAndreas Gohr $options->registerArgument('page', 'The page to split', true, 'split'); 575786be46SAndreas Gohr 58bddd899cSAndreas Gohr $options->registerCommand('info', 'Get Info about the vector storage'); 598817535bSAndreas Gohr } 608817535bSAndreas Gohr 618817535bSAndreas Gohr /** @inheritDoc */ 628817535bSAndreas Gohr protected function main(Options $options) 638817535bSAndreas Gohr { 648817535bSAndreas Gohr switch ($options->getCmd()) { 658817535bSAndreas Gohr 668817535bSAndreas Gohr case 'embed': 675284515dSAndreas Gohr $this->createEmbeddings($options->getOpt('clear')); 688817535bSAndreas Gohr break; 698817535bSAndreas Gohr case 'similar': 708817535bSAndreas Gohr $this->similar($options->getArgs()[0]); 718817535bSAndreas Gohr break; 727552f1aaSAndreas Gohr case 'ask': 737552f1aaSAndreas Gohr $this->ask($options->getArgs()[0]); 747552f1aaSAndreas Gohr break; 75c4584168SAndreas Gohr case 'chat': 76c4584168SAndreas Gohr $this->chat(); 77c4584168SAndreas Gohr break; 78ad38c5fdSAndreas Gohr case 'split': 79ad38c5fdSAndreas Gohr $this->split($options->getArgs()[0]); 80ad38c5fdSAndreas Gohr break; 815786be46SAndreas Gohr case 'info': 82f6ef2e50SAndreas Gohr $this->showinfo(); 835786be46SAndreas Gohr break; 848817535bSAndreas Gohr default: 858817535bSAndreas Gohr echo $options->help(); 868817535bSAndreas Gohr } 878817535bSAndreas Gohr } 888817535bSAndreas Gohr 89c4584168SAndreas Gohr /** 905786be46SAndreas Gohr * @return void 915786be46SAndreas Gohr */ 92f6ef2e50SAndreas Gohr protected function showinfo() 935786be46SAndreas Gohr { 94f6ef2e50SAndreas Gohr echo 'model: ' . $this->getConf('model') . "\n"; 957ee8b02dSAndreas Gohr $stats = $this->helper->getEmbeddings()->getStorage()->statistics(); 967ee8b02dSAndreas Gohr foreach ($stats as $key => $value) { 977ee8b02dSAndreas Gohr echo $key . ': ' . $value . "\n"; 987ee8b02dSAndreas Gohr } 99*911314cdSAndreas Gohr 100*911314cdSAndreas Gohr //echo $this->helper->getModel()->listUpstreamModels(); 1015786be46SAndreas Gohr } 1025786be46SAndreas Gohr 1035786be46SAndreas Gohr /** 104ad38c5fdSAndreas Gohr * Split the given page into chunks and print them 105ad38c5fdSAndreas Gohr * 106ad38c5fdSAndreas Gohr * @param string $page 107ad38c5fdSAndreas Gohr * @return void 108ad38c5fdSAndreas Gohr * @throws Exception 109ad38c5fdSAndreas Gohr */ 110ad38c5fdSAndreas Gohr protected function split($page) 111ad38c5fdSAndreas Gohr { 112ad38c5fdSAndreas Gohr $text = rawWiki($page); 113ad38c5fdSAndreas Gohr $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 114ad38c5fdSAndreas Gohr foreach ($chunks as $chunk) { 115ad38c5fdSAndreas Gohr echo $chunk; 116ad38c5fdSAndreas Gohr echo "\n"; 117ad38c5fdSAndreas Gohr $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 118ad38c5fdSAndreas Gohr } 119ad38c5fdSAndreas Gohr $this->success('Split into ' . count($chunks) . ' chunks'); 120ad38c5fdSAndreas Gohr } 121ad38c5fdSAndreas Gohr 122ad38c5fdSAndreas Gohr /** 123c4584168SAndreas Gohr * Interactive Chat Session 124c4584168SAndreas Gohr * 125c4584168SAndreas Gohr * @return void 126c4584168SAndreas Gohr * @throws Exception 127c4584168SAndreas Gohr */ 128c4584168SAndreas Gohr protected function chat() 129c4584168SAndreas Gohr { 130c4584168SAndreas Gohr $history = []; 131c4584168SAndreas Gohr while ($q = $this->readLine('Your Question')) { 132f6ef2e50SAndreas Gohr $this->helper->getModel()->resetUsageStats(); 133f6ef2e50SAndreas Gohr $result = $this->helper->askChatQuestion($q, $history); 134f6ef2e50SAndreas Gohr $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 135f6ef2e50SAndreas Gohr $history[] = [$result['question'], $result['answer']]; 136c4584168SAndreas Gohr $this->printAnswer($result); 137c4584168SAndreas Gohr } 138c4584168SAndreas Gohr } 139c4584168SAndreas Gohr 140c4584168SAndreas Gohr /** 141c4584168SAndreas Gohr * Handle a single, standalone question 142c4584168SAndreas Gohr * 143c4584168SAndreas Gohr * @param string $query 144c4584168SAndreas Gohr * @return void 145c4584168SAndreas Gohr * @throws Exception 146c4584168SAndreas Gohr */ 147c4584168SAndreas Gohr protected function ask($query) 148c4584168SAndreas Gohr { 1490337f47fSAndreas Gohr $result = $this->helper->askQuestion($query); 150c4584168SAndreas Gohr $this->printAnswer($result); 1517552f1aaSAndreas Gohr } 1527552f1aaSAndreas Gohr 153c4584168SAndreas Gohr /** 154c4584168SAndreas Gohr * Get the pages that are similar to the query 155c4584168SAndreas Gohr * 156c4584168SAndreas Gohr * @param string $query 157c4584168SAndreas Gohr * @return void 158c4584168SAndreas Gohr */ 1598817535bSAndreas Gohr protected function similar($query) 1608817535bSAndreas Gohr { 1610337f47fSAndreas Gohr $sources = $this->helper->getEmbeddings()->getSimilarChunks($query); 162f6ef2e50SAndreas Gohr $this->printSources($sources); 1638817535bSAndreas Gohr } 1648817535bSAndreas Gohr 165c4584168SAndreas Gohr /** 166c4584168SAndreas Gohr * Recreate chunks and embeddings for all pages 167c4584168SAndreas Gohr * 168c4584168SAndreas Gohr * @return void 169ad38c5fdSAndreas Gohr * @todo make skip regex configurable 170c4584168SAndreas Gohr */ 1715284515dSAndreas Gohr protected function createEmbeddings($clear) 1728817535bSAndreas Gohr { 173ad38c5fdSAndreas Gohr ini_set('memory_limit', -1); // we may need a lot of memory here 1745284515dSAndreas Gohr $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear); 175ad38c5fdSAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 1768817535bSAndreas Gohr } 1778817535bSAndreas Gohr 178c4584168SAndreas Gohr /** 17955392016SAndreas Gohr * Print the given detailed answer in a nice way 18055392016SAndreas Gohr * 18155392016SAndreas Gohr * @param array $answer 18255392016SAndreas Gohr * @return void 18355392016SAndreas Gohr */ 18455392016SAndreas Gohr protected function printAnswer($answer) 18555392016SAndreas Gohr { 18655392016SAndreas Gohr $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 18755392016SAndreas Gohr echo "\n"; 188f6ef2e50SAndreas Gohr $this->printSources($answer['sources']); 18955392016SAndreas Gohr echo "\n"; 19055392016SAndreas Gohr $this->printUsage(); 19155392016SAndreas Gohr } 19255392016SAndreas Gohr 19355392016SAndreas Gohr /** 194f6ef2e50SAndreas Gohr * Print the given sources 195f6ef2e50SAndreas Gohr * 196f6ef2e50SAndreas Gohr * @param Chunk[] $sources 197f6ef2e50SAndreas Gohr * @return void 198f6ef2e50SAndreas Gohr */ 199f6ef2e50SAndreas Gohr protected function printSources($sources) 200f6ef2e50SAndreas Gohr { 201f6ef2e50SAndreas Gohr foreach ($sources as $source) { 202f6ef2e50SAndreas Gohr /** @var Chunk $source */ 2039b3d1b36SAndreas Gohr $this->colors->ptln( 2049b3d1b36SAndreas Gohr "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 2059b3d1b36SAndreas Gohr Colors::C_LIGHTBLUE 2069b3d1b36SAndreas Gohr ); 207f6ef2e50SAndreas Gohr } 208f6ef2e50SAndreas Gohr } 209f6ef2e50SAndreas Gohr 210f6ef2e50SAndreas Gohr /** 21155392016SAndreas Gohr * Print the usage statistics for OpenAI 21255392016SAndreas Gohr * 21355392016SAndreas Gohr * @return void 21455392016SAndreas Gohr */ 215f6ef2e50SAndreas Gohr protected function printUsage() 216f6ef2e50SAndreas Gohr { 21755392016SAndreas Gohr $this->info( 218f6ef2e50SAndreas Gohr 'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.', 219f6ef2e50SAndreas Gohr $this->helper->getModel()->getUsageStats() 22055392016SAndreas Gohr ); 22155392016SAndreas Gohr } 22255392016SAndreas Gohr 22355392016SAndreas Gohr /** 224c4584168SAndreas Gohr * Interactively ask for a value from the user 225c4584168SAndreas Gohr * 226c4584168SAndreas Gohr * @param string $prompt 227c4584168SAndreas Gohr * @return string 228c4584168SAndreas Gohr */ 229c4584168SAndreas Gohr protected function readLine($prompt) 230c4584168SAndreas Gohr { 231c4584168SAndreas Gohr $value = ''; 2328817535bSAndreas Gohr 233c4584168SAndreas Gohr while ($value === '') { 234c4584168SAndreas Gohr echo $prompt; 235c4584168SAndreas Gohr echo ': '; 236c4584168SAndreas Gohr 237c4584168SAndreas Gohr $fh = fopen('php://stdin', 'r'); 238c4584168SAndreas Gohr $value = trim(fgets($fh)); 239c4584168SAndreas Gohr fclose($fh); 240c4584168SAndreas Gohr } 241c4584168SAndreas Gohr 242c4584168SAndreas Gohr return $value; 243c4584168SAndreas Gohr } 2448817535bSAndreas Gohr} 2458817535bSAndreas Gohr 246