18817535bSAndreas Gohr<?php 28817535bSAndreas Gohr 3bddd899cSAndreas Gohruse dokuwiki\plugin\aichat\backend\Chunk; 4c4584168SAndreas Gohruse splitbrain\phpcli\Colors; 58817535bSAndreas Gohruse splitbrain\phpcli\Options; 68817535bSAndreas Gohr 78817535bSAndreas Gohr 88817535bSAndreas Gohr/** 98817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component) 108817535bSAndreas Gohr * 118817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 128817535bSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 138817535bSAndreas Gohr */ 148817535bSAndreas Gohrclass cli_plugin_aichat extends \dokuwiki\Extension\CLIPlugin 158817535bSAndreas Gohr{ 160337f47fSAndreas Gohr /** @var helper_plugin_aichat */ 170337f47fSAndreas Gohr protected $helper; 180337f47fSAndreas Gohr 190337f47fSAndreas Gohr public function __construct($autocatch = true) 200337f47fSAndreas Gohr { 210337f47fSAndreas Gohr parent::__construct($autocatch); 220337f47fSAndreas Gohr $this->helper = plugin_load('helper', 'aichat'); 232ecc089aSAndreas Gohr $this->helper->getEmbeddings()->setLogger($this); 240337f47fSAndreas Gohr } 250337f47fSAndreas Gohr 268817535bSAndreas Gohr /** @inheritDoc */ 278817535bSAndreas Gohr protected function setup(Options $options) 288817535bSAndreas Gohr { 29bddd899cSAndreas Gohr $options->useCompactHelp(); 30bddd899cSAndreas Gohr 315284515dSAndreas Gohr $options->setHelp( 325284515dSAndreas Gohr 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 335284515dSAndreas Gohr 'This may incur costs.' 345284515dSAndreas Gohr ); 358817535bSAndreas Gohr 365284515dSAndreas Gohr $options->registerCommand( 375284515dSAndreas Gohr 'embed', 385284515dSAndreas Gohr 'Create embeddings for all pages. This skips pages that already have embeddings' 395284515dSAndreas Gohr ); 405284515dSAndreas Gohr $options->registerOption( 415284515dSAndreas Gohr 'clear', 425284515dSAndreas Gohr 'Clear all existing embeddings before creating new ones', 435284515dSAndreas Gohr 'c', false, 'embed' 445284515dSAndreas Gohr ); 458817535bSAndreas Gohr 468817535bSAndreas Gohr $options->registerCommand('similar', 'Search for similar pages'); 478817535bSAndreas Gohr $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 488817535bSAndreas Gohr 498817535bSAndreas Gohr $options->registerCommand('ask', 'Ask a question'); 508817535bSAndreas Gohr $options->registerArgument('question', 'The question to ask', true, 'ask'); 51c4584168SAndreas Gohr 52c4584168SAndreas Gohr $options->registerCommand('chat', 'Start an interactive chat session'); 53ad38c5fdSAndreas Gohr 54ad38c5fdSAndreas Gohr $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 55ad38c5fdSAndreas Gohr $options->registerArgument('page', 'The page to split', true, 'split'); 565786be46SAndreas Gohr 57bddd899cSAndreas Gohr $options->registerCommand('info', 'Get Info about the vector storage'); 588817535bSAndreas Gohr } 598817535bSAndreas Gohr 608817535bSAndreas Gohr /** @inheritDoc */ 618817535bSAndreas Gohr protected function main(Options $options) 628817535bSAndreas Gohr { 638817535bSAndreas Gohr switch ($options->getCmd()) { 648817535bSAndreas Gohr 658817535bSAndreas Gohr case 'embed': 665284515dSAndreas Gohr $this->createEmbeddings($options->getOpt('clear')); 678817535bSAndreas Gohr break; 688817535bSAndreas Gohr case 'similar': 698817535bSAndreas Gohr $this->similar($options->getArgs()[0]); 708817535bSAndreas Gohr break; 717552f1aaSAndreas Gohr case 'ask': 727552f1aaSAndreas Gohr $this->ask($options->getArgs()[0]); 737552f1aaSAndreas Gohr break; 74c4584168SAndreas Gohr case 'chat': 75c4584168SAndreas Gohr $this->chat(); 76c4584168SAndreas Gohr break; 77ad38c5fdSAndreas Gohr case 'split': 78ad38c5fdSAndreas Gohr $this->split($options->getArgs()[0]); 79ad38c5fdSAndreas Gohr break; 805786be46SAndreas Gohr case 'info': 815786be46SAndreas Gohr $this->treeinfo(); 825786be46SAndreas Gohr break; 838817535bSAndreas Gohr default: 848817535bSAndreas Gohr echo $options->help(); 858817535bSAndreas Gohr } 868817535bSAndreas Gohr } 878817535bSAndreas Gohr 88c4584168SAndreas Gohr /** 895786be46SAndreas Gohr * @return void 905786be46SAndreas Gohr */ 915786be46SAndreas Gohr protected function treeinfo() 925786be46SAndreas Gohr { 937ee8b02dSAndreas Gohr $stats = $this->helper->getEmbeddings()->getStorage()->statistics(); 947ee8b02dSAndreas Gohr foreach ($stats as $key => $value) { 957ee8b02dSAndreas Gohr echo $key . ': ' . $value . "\n"; 967ee8b02dSAndreas Gohr } 975786be46SAndreas Gohr } 985786be46SAndreas Gohr 995786be46SAndreas Gohr /** 100ad38c5fdSAndreas Gohr * Split the given page into chunks and print them 101ad38c5fdSAndreas Gohr * 102ad38c5fdSAndreas Gohr * @param string $page 103ad38c5fdSAndreas Gohr * @return void 104ad38c5fdSAndreas Gohr * @throws Exception 105ad38c5fdSAndreas Gohr */ 106ad38c5fdSAndreas Gohr protected function split($page) 107ad38c5fdSAndreas Gohr { 108ad38c5fdSAndreas Gohr $text = rawWiki($page); 109ad38c5fdSAndreas Gohr $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 110ad38c5fdSAndreas Gohr foreach ($chunks as $chunk) { 111ad38c5fdSAndreas Gohr echo $chunk; 112ad38c5fdSAndreas Gohr echo "\n"; 113ad38c5fdSAndreas Gohr $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 114ad38c5fdSAndreas Gohr } 115ad38c5fdSAndreas Gohr $this->success('Split into ' . count($chunks) . ' chunks'); 116ad38c5fdSAndreas Gohr } 117ad38c5fdSAndreas Gohr 118ad38c5fdSAndreas Gohr /** 119c4584168SAndreas Gohr * Interactive Chat Session 120c4584168SAndreas Gohr * 121c4584168SAndreas Gohr * @return void 122c4584168SAndreas Gohr * @throws Exception 123c4584168SAndreas Gohr */ 124c4584168SAndreas Gohr protected function chat() 125c4584168SAndreas Gohr { 126c4584168SAndreas Gohr $history = []; 127c4584168SAndreas Gohr while ($q = $this->readLine('Your Question')) { 128*55392016SAndreas Gohr $this->helper->getOpenAI()->resetUsageStats(); 129c4584168SAndreas Gohr if ($history) { 1300337f47fSAndreas Gohr $question = $this->helper->rephraseChatQuestion($q, $history); 131c4584168SAndreas Gohr $this->colors->ptln("Interpretation: $question", Colors::C_LIGHTPURPLE); 132c4584168SAndreas Gohr } else { 133c4584168SAndreas Gohr $question = $q; 134c4584168SAndreas Gohr } 1350337f47fSAndreas Gohr $result = $this->helper->askQuestion($question); 136c4584168SAndreas Gohr $history[] = [$q, $result['answer']]; 137c4584168SAndreas Gohr $this->printAnswer($result); 138c4584168SAndreas Gohr } 139c4584168SAndreas Gohr } 140c4584168SAndreas Gohr 141c4584168SAndreas Gohr /** 142c4584168SAndreas Gohr * Handle a single, standalone question 143c4584168SAndreas Gohr * 144c4584168SAndreas Gohr * @param string $query 145c4584168SAndreas Gohr * @return void 146c4584168SAndreas Gohr * @throws Exception 147c4584168SAndreas Gohr */ 148c4584168SAndreas Gohr protected function ask($query) 149c4584168SAndreas Gohr { 1500337f47fSAndreas Gohr $result = $this->helper->askQuestion($query); 151c4584168SAndreas Gohr $this->printAnswer($result); 1527552f1aaSAndreas Gohr } 1537552f1aaSAndreas Gohr 154c4584168SAndreas Gohr /** 155c4584168SAndreas Gohr * Get the pages that are similar to the query 156c4584168SAndreas Gohr * 157c4584168SAndreas Gohr * @param string $query 158c4584168SAndreas Gohr * @return void 159c4584168SAndreas Gohr */ 1608817535bSAndreas Gohr protected function similar($query) 1618817535bSAndreas Gohr { 1620337f47fSAndreas Gohr $sources = $this->helper->getEmbeddings()->getSimilarChunks($query); 163c4584168SAndreas Gohr foreach ($sources as $source) { 1647ee8b02dSAndreas Gohr $this->colors->ptln($source->getPage(), Colors::C_LIGHTBLUE); 165c4584168SAndreas Gohr } 1668817535bSAndreas Gohr } 1678817535bSAndreas Gohr 168c4584168SAndreas Gohr /** 169c4584168SAndreas Gohr * Recreate chunks and embeddings for all pages 170c4584168SAndreas Gohr * 171c4584168SAndreas Gohr * @return void 172ad38c5fdSAndreas Gohr * @todo make skip regex configurable 173c4584168SAndreas Gohr */ 1745284515dSAndreas Gohr protected function createEmbeddings($clear) 1758817535bSAndreas Gohr { 176ad38c5fdSAndreas Gohr ini_set('memory_limit', -1); // we may need a lot of memory here 1775284515dSAndreas Gohr $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear); 178ad38c5fdSAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 1798817535bSAndreas Gohr } 1808817535bSAndreas Gohr 181c4584168SAndreas Gohr /** 182*55392016SAndreas Gohr * Print the given detailed answer in a nice way 183*55392016SAndreas Gohr * 184*55392016SAndreas Gohr * @param array $answer 185*55392016SAndreas Gohr * @return void 186*55392016SAndreas Gohr */ 187*55392016SAndreas Gohr protected function printAnswer($answer) 188*55392016SAndreas Gohr { 189*55392016SAndreas Gohr $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 190*55392016SAndreas Gohr echo "\n"; 191*55392016SAndreas Gohr foreach ($answer['sources'] as $source) { 192*55392016SAndreas Gohr /** @var Chunk $source */ 193*55392016SAndreas Gohr $this->colors->ptln("\t" . $source->getPage(), Colors::C_LIGHTBLUE); 194*55392016SAndreas Gohr } 195*55392016SAndreas Gohr echo "\n"; 196*55392016SAndreas Gohr $this->printUsage(); 197*55392016SAndreas Gohr } 198*55392016SAndreas Gohr 199*55392016SAndreas Gohr /** 200*55392016SAndreas Gohr * Print the usage statistics for OpenAI 201*55392016SAndreas Gohr * 202*55392016SAndreas Gohr * @return void 203*55392016SAndreas Gohr */ 204*55392016SAndreas Gohr protected function printUsage() { 205*55392016SAndreas Gohr $this->info( 206*55392016SAndreas Gohr 'Made {requests} requests in {time}s to OpenAI. Used {tokens} tokens for about ${cost}.', 207*55392016SAndreas Gohr $this->helper->getOpenAI()->getUsageStats() 208*55392016SAndreas Gohr ); 209*55392016SAndreas Gohr } 210*55392016SAndreas Gohr 211*55392016SAndreas Gohr /** 212c4584168SAndreas Gohr * Interactively ask for a value from the user 213c4584168SAndreas Gohr * 214c4584168SAndreas Gohr * @param string $prompt 215c4584168SAndreas Gohr * @return string 216c4584168SAndreas Gohr */ 217c4584168SAndreas Gohr protected function readLine($prompt) 218c4584168SAndreas Gohr { 219c4584168SAndreas Gohr $value = ''; 2208817535bSAndreas Gohr 221c4584168SAndreas Gohr while ($value === '') { 222c4584168SAndreas Gohr echo $prompt; 223c4584168SAndreas Gohr echo ': '; 224c4584168SAndreas Gohr 225c4584168SAndreas Gohr $fh = fopen('php://stdin', 'r'); 226c4584168SAndreas Gohr $value = trim(fgets($fh)); 227c4584168SAndreas Gohr fclose($fh); 228c4584168SAndreas Gohr } 229c4584168SAndreas Gohr 230c4584168SAndreas Gohr return $value; 231c4584168SAndreas Gohr } 2328817535bSAndreas Gohr} 2338817535bSAndreas Gohr 234