18817535bSAndreas Gohr<?php 28817535bSAndreas Gohr 3*bddd899cSAndreas Gohruse dokuwiki\plugin\aichat\backend\Chunk; 4c4584168SAndreas Gohruse splitbrain\phpcli\Colors; 58817535bSAndreas Gohruse splitbrain\phpcli\Options; 68817535bSAndreas Gohr 78817535bSAndreas Gohr 88817535bSAndreas Gohr/** 98817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component) 108817535bSAndreas Gohr * 118817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 128817535bSAndreas Gohr * @author Andreas Gohr <gohr@cosmocode.de> 138817535bSAndreas Gohr */ 148817535bSAndreas Gohrclass cli_plugin_aichat extends \dokuwiki\Extension\CLIPlugin 158817535bSAndreas Gohr{ 160337f47fSAndreas Gohr /** @var helper_plugin_aichat */ 170337f47fSAndreas Gohr protected $helper; 180337f47fSAndreas Gohr 190337f47fSAndreas Gohr public function __construct($autocatch = true) 200337f47fSAndreas Gohr { 210337f47fSAndreas Gohr parent::__construct($autocatch); 220337f47fSAndreas Gohr $this->helper = plugin_load('helper', 'aichat'); 232ecc089aSAndreas Gohr $this->helper->getEmbeddings()->setLogger($this); 240337f47fSAndreas Gohr } 250337f47fSAndreas Gohr 268817535bSAndreas Gohr 278817535bSAndreas Gohr /** @inheritDoc */ 288817535bSAndreas Gohr protected function setup(Options $options) 298817535bSAndreas Gohr { 30*bddd899cSAndreas Gohr $options->useCompactHelp(); 31*bddd899cSAndreas Gohr 329da5f0dfSAndreas Gohr $options->setHelp('Manage and query the AI chatbot data'); 338817535bSAndreas Gohr 348817535bSAndreas Gohr $options->registerCommand('embed', 'Create embeddings for all pages'); 358817535bSAndreas Gohr 368817535bSAndreas Gohr $options->registerCommand('similar', 'Search for similar pages'); 378817535bSAndreas Gohr $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 388817535bSAndreas Gohr 398817535bSAndreas Gohr $options->registerCommand('ask', 'Ask a question'); 408817535bSAndreas Gohr $options->registerArgument('question', 'The question to ask', true, 'ask'); 41c4584168SAndreas Gohr 42c4584168SAndreas Gohr $options->registerCommand('chat', 'Start an interactive chat session'); 43ad38c5fdSAndreas Gohr 44ad38c5fdSAndreas Gohr $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 45ad38c5fdSAndreas Gohr $options->registerArgument('page', 'The page to split', true, 'split'); 465786be46SAndreas Gohr 47*bddd899cSAndreas Gohr $options->registerCommand('info', 'Get Info about the vector storage'); 488817535bSAndreas Gohr } 498817535bSAndreas Gohr 508817535bSAndreas Gohr /** @inheritDoc */ 518817535bSAndreas Gohr protected function main(Options $options) 528817535bSAndreas Gohr { 538817535bSAndreas Gohr switch ($options->getCmd()) { 548817535bSAndreas Gohr 558817535bSAndreas Gohr case 'embed': 568817535bSAndreas Gohr $this->createEmbeddings(); 578817535bSAndreas Gohr break; 588817535bSAndreas Gohr case 'similar': 598817535bSAndreas Gohr $this->similar($options->getArgs()[0]); 608817535bSAndreas Gohr break; 617552f1aaSAndreas Gohr case 'ask': 627552f1aaSAndreas Gohr $this->ask($options->getArgs()[0]); 637552f1aaSAndreas Gohr break; 64c4584168SAndreas Gohr case 'chat': 65c4584168SAndreas Gohr $this->chat(); 66c4584168SAndreas Gohr break; 67ad38c5fdSAndreas Gohr case 'split': 68ad38c5fdSAndreas Gohr $this->split($options->getArgs()[0]); 69ad38c5fdSAndreas Gohr break; 705786be46SAndreas Gohr case 'info': 715786be46SAndreas Gohr $this->treeinfo(); 725786be46SAndreas Gohr break; 738817535bSAndreas Gohr default: 748817535bSAndreas Gohr echo $options->help(); 758817535bSAndreas Gohr } 768817535bSAndreas Gohr } 778817535bSAndreas Gohr 78c4584168SAndreas Gohr /** 795786be46SAndreas Gohr * @return void 805786be46SAndreas Gohr */ 815786be46SAndreas Gohr protected function treeinfo() 825786be46SAndreas Gohr { 837ee8b02dSAndreas Gohr $stats = $this->helper->getEmbeddings()->getStorage()->statistics(); 847ee8b02dSAndreas Gohr foreach($stats as $key => $value) { 857ee8b02dSAndreas Gohr echo $key . ': ' . $value. "\n"; 867ee8b02dSAndreas Gohr } 875786be46SAndreas Gohr } 885786be46SAndreas Gohr 895786be46SAndreas Gohr /** 90ad38c5fdSAndreas Gohr * Split the given page into chunks and print them 91ad38c5fdSAndreas Gohr * 92ad38c5fdSAndreas Gohr * @param string $page 93ad38c5fdSAndreas Gohr * @return void 94ad38c5fdSAndreas Gohr * @throws Exception 95ad38c5fdSAndreas Gohr */ 96ad38c5fdSAndreas Gohr protected function split($page) 97ad38c5fdSAndreas Gohr { 98ad38c5fdSAndreas Gohr $text = rawWiki($page); 99ad38c5fdSAndreas Gohr $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 100ad38c5fdSAndreas Gohr foreach ($chunks as $chunk) { 101ad38c5fdSAndreas Gohr echo $chunk; 102ad38c5fdSAndreas Gohr echo "\n"; 103ad38c5fdSAndreas Gohr $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 104ad38c5fdSAndreas Gohr } 105ad38c5fdSAndreas Gohr $this->success('Split into ' . count($chunks) . ' chunks'); 106ad38c5fdSAndreas Gohr } 107ad38c5fdSAndreas Gohr 108ad38c5fdSAndreas Gohr /** 109c4584168SAndreas Gohr * Interactive Chat Session 110c4584168SAndreas Gohr * 111c4584168SAndreas Gohr * @return void 112c4584168SAndreas Gohr * @throws Exception 113c4584168SAndreas Gohr */ 114c4584168SAndreas Gohr protected function chat() 115c4584168SAndreas Gohr { 116c4584168SAndreas Gohr $history = []; 117c4584168SAndreas Gohr while ($q = $this->readLine('Your Question')) { 118c4584168SAndreas Gohr if ($history) { 1190337f47fSAndreas Gohr $question = $this->helper->rephraseChatQuestion($q, $history); 120c4584168SAndreas Gohr $this->colors->ptln("Interpretation: $question", Colors::C_LIGHTPURPLE); 121c4584168SAndreas Gohr } else { 122c4584168SAndreas Gohr $question = $q; 123c4584168SAndreas Gohr } 1240337f47fSAndreas Gohr $result = $this->helper->askQuestion($question); 125c4584168SAndreas Gohr $history[] = [$q, $result['answer']]; 126c4584168SAndreas Gohr $this->printAnswer($result); 127c4584168SAndreas Gohr } 128c4584168SAndreas Gohr } 129c4584168SAndreas Gohr 130c4584168SAndreas Gohr /** 131c4584168SAndreas Gohr * Print the given detailed answer in a nice way 132c4584168SAndreas Gohr * 133c4584168SAndreas Gohr * @param array $answer 134c4584168SAndreas Gohr * @return void 135c4584168SAndreas Gohr */ 136c4584168SAndreas Gohr protected function printAnswer($answer) 137c4584168SAndreas Gohr { 138c4584168SAndreas Gohr $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 139c4584168SAndreas Gohr echo "\n"; 140c4584168SAndreas Gohr foreach ($answer['sources'] as $source) { 141*bddd899cSAndreas Gohr /** @var Chunk $source */ 142*bddd899cSAndreas Gohr $this->colors->ptln("\t" . $source->getPage(), Colors::C_LIGHTBLUE); 143c4584168SAndreas Gohr } 144c4584168SAndreas Gohr echo "\n"; 145c4584168SAndreas Gohr } 146c4584168SAndreas Gohr 147c4584168SAndreas Gohr /** 148c4584168SAndreas Gohr * Handle a single, standalone question 149c4584168SAndreas Gohr * 150c4584168SAndreas Gohr * @param string $query 151c4584168SAndreas Gohr * @return void 152c4584168SAndreas Gohr * @throws Exception 153c4584168SAndreas Gohr */ 154c4584168SAndreas Gohr protected function ask($query) 155c4584168SAndreas Gohr { 1560337f47fSAndreas Gohr $result = $this->helper->askQuestion($query); 157c4584168SAndreas Gohr $this->printAnswer($result); 1587552f1aaSAndreas Gohr } 1597552f1aaSAndreas Gohr 160c4584168SAndreas Gohr /** 161c4584168SAndreas Gohr * Get the pages that are similar to the query 162c4584168SAndreas Gohr * 163c4584168SAndreas Gohr * @param string $query 164c4584168SAndreas Gohr * @return void 165c4584168SAndreas Gohr */ 1668817535bSAndreas Gohr protected function similar($query) 1678817535bSAndreas Gohr { 1680337f47fSAndreas Gohr $sources = $this->helper->getEmbeddings()->getSimilarChunks($query); 169c4584168SAndreas Gohr foreach ($sources as $source) { 1707ee8b02dSAndreas Gohr $this->colors->ptln($source->getPage(), Colors::C_LIGHTBLUE); 171c4584168SAndreas Gohr } 1728817535bSAndreas Gohr } 1738817535bSAndreas Gohr 174c4584168SAndreas Gohr /** 175c4584168SAndreas Gohr * Recreate chunks and embeddings for all pages 176c4584168SAndreas Gohr * 177c4584168SAndreas Gohr * @return void 178ad38c5fdSAndreas Gohr * @todo make skip regex configurable 179c4584168SAndreas Gohr */ 1808817535bSAndreas Gohr protected function createEmbeddings() 1818817535bSAndreas Gohr { 182ad38c5fdSAndreas Gohr ini_set('memory_limit', -1); // we may need a lot of memory here 183ad38c5fdSAndreas Gohr $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/'); 184ad38c5fdSAndreas Gohr $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 1858817535bSAndreas Gohr } 1868817535bSAndreas Gohr 187c4584168SAndreas Gohr /** 188c4584168SAndreas Gohr * Interactively ask for a value from the user 189c4584168SAndreas Gohr * 190c4584168SAndreas Gohr * @param string $prompt 191c4584168SAndreas Gohr * @return string 192c4584168SAndreas Gohr */ 193c4584168SAndreas Gohr protected function readLine($prompt) 194c4584168SAndreas Gohr { 195c4584168SAndreas Gohr $value = ''; 1968817535bSAndreas Gohr 197c4584168SAndreas Gohr while ($value === '') { 198c4584168SAndreas Gohr echo $prompt; 199c4584168SAndreas Gohr echo ': '; 200c4584168SAndreas Gohr 201c4584168SAndreas Gohr $fh = fopen('php://stdin', 'r'); 202c4584168SAndreas Gohr $value = trim(fgets($fh)); 203c4584168SAndreas Gohr fclose($fh); 204c4584168SAndreas Gohr } 205c4584168SAndreas Gohr 206c4584168SAndreas Gohr return $value; 207c4584168SAndreas Gohr } 2088817535bSAndreas Gohr} 2098817535bSAndreas Gohr 210