1<?php 2 3use dokuwiki\plugin\aichat\backend\Chunk; 4use splitbrain\phpcli\Colors; 5use splitbrain\phpcli\Options; 6 7 8/** 9 * DokuWiki Plugin aichat (CLI Component) 10 * 11 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 12 * @author Andreas Gohr <gohr@cosmocode.de> 13 */ 14class cli_plugin_aichat extends \dokuwiki\Extension\CLIPlugin 15{ 16 /** @var helper_plugin_aichat */ 17 protected $helper; 18 19 public function __construct($autocatch = true) 20 { 21 parent::__construct($autocatch); 22 $this->helper = plugin_load('helper', 'aichat'); 23 $this->helper->getEmbeddings()->setLogger($this); 24 } 25 26 /** @inheritDoc */ 27 protected function setup(Options $options) 28 { 29 $options->useCompactHelp(); 30 31 $options->setHelp( 32 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 33 'This may incur costs.' 34 ); 35 36 $options->registerCommand( 37 'embed', 38 'Create embeddings for all pages. This skips pages that already have embeddings' 39 ); 40 $options->registerOption( 41 'clear', 42 'Clear all existing embeddings before creating new ones', 43 'c', false, 'embed' 44 ); 45 46 $options->registerCommand('similar', 'Search for similar pages'); 47 $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 48 49 $options->registerCommand('ask', 'Ask a question'); 50 $options->registerArgument('question', 'The question to ask', true, 'ask'); 51 52 $options->registerCommand('chat', 'Start an interactive chat session'); 53 54 $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 55 $options->registerArgument('page', 'The page to split', true, 'split'); 56 57 $options->registerCommand('info', 'Get Info about the vector storage'); 58 } 59 60 /** @inheritDoc */ 61 protected function main(Options $options) 62 { 63 switch ($options->getCmd()) { 64 65 case 'embed': 66 $this->createEmbeddings($options->getOpt('clear')); 67 break; 68 case 'similar': 69 $this->similar($options->getArgs()[0]); 70 break; 71 case 'ask': 72 $this->ask($options->getArgs()[0]); 73 break; 74 case 'chat': 75 $this->chat(); 76 break; 77 case 'split': 78 $this->split($options->getArgs()[0]); 79 break; 80 case 'info': 81 $this->treeinfo(); 82 break; 83 default: 84 echo $options->help(); 85 } 86 } 87 88 /** 89 * @return void 90 */ 91 protected function treeinfo() 92 { 93 $stats = $this->helper->getEmbeddings()->getStorage()->statistics(); 94 foreach ($stats as $key => $value) { 95 echo $key . ': ' . $value . "\n"; 96 } 97 } 98 99 /** 100 * Split the given page into chunks and print them 101 * 102 * @param string $page 103 * @return void 104 * @throws Exception 105 */ 106 protected function split($page) 107 { 108 $text = rawWiki($page); 109 $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 110 foreach ($chunks as $chunk) { 111 echo $chunk; 112 echo "\n"; 113 $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 114 } 115 $this->success('Split into ' . count($chunks) . ' chunks'); 116 } 117 118 /** 119 * Interactive Chat Session 120 * 121 * @return void 122 * @throws Exception 123 */ 124 protected function chat() 125 { 126 $history = []; 127 while ($q = $this->readLine('Your Question')) { 128 $this->helper->getOpenAI()->resetUsageStats(); 129 if ($history) { 130 $question = $this->helper->rephraseChatQuestion($q, $history); 131 $this->colors->ptln("Interpretation: $question", Colors::C_LIGHTPURPLE); 132 } else { 133 $question = $q; 134 } 135 $result = $this->helper->askQuestion($question); 136 $history[] = [$q, $result['answer']]; 137 $this->printAnswer($result); 138 } 139 } 140 141 /** 142 * Handle a single, standalone question 143 * 144 * @param string $query 145 * @return void 146 * @throws Exception 147 */ 148 protected function ask($query) 149 { 150 $result = $this->helper->askQuestion($query); 151 $this->printAnswer($result); 152 } 153 154 /** 155 * Get the pages that are similar to the query 156 * 157 * @param string $query 158 * @return void 159 */ 160 protected function similar($query) 161 { 162 $sources = $this->helper->getEmbeddings()->getSimilarChunks($query); 163 foreach ($sources as $source) { 164 $this->colors->ptln($source->getPage(), Colors::C_LIGHTBLUE); 165 } 166 } 167 168 /** 169 * Recreate chunks and embeddings for all pages 170 * 171 * @return void 172 * @todo make skip regex configurable 173 */ 174 protected function createEmbeddings($clear) 175 { 176 ini_set('memory_limit', -1); // we may need a lot of memory here 177 $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear); 178 $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 179 } 180 181 /** 182 * Print the given detailed answer in a nice way 183 * 184 * @param array $answer 185 * @return void 186 */ 187 protected function printAnswer($answer) 188 { 189 $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 190 echo "\n"; 191 foreach ($answer['sources'] as $source) { 192 /** @var Chunk $source */ 193 $this->colors->ptln("\t" . $source->getPage(), Colors::C_LIGHTBLUE); 194 } 195 echo "\n"; 196 $this->printUsage(); 197 } 198 199 /** 200 * Print the usage statistics for OpenAI 201 * 202 * @return void 203 */ 204 protected function printUsage() { 205 $this->info( 206 'Made {requests} requests in {time}s to OpenAI. Used {tokens} tokens for about ${cost}.', 207 $this->helper->getOpenAI()->getUsageStats() 208 ); 209 } 210 211 /** 212 * Interactively ask for a value from the user 213 * 214 * @param string $prompt 215 * @return string 216 */ 217 protected function readLine($prompt) 218 { 219 $value = ''; 220 221 while ($value === '') { 222 echo $prompt; 223 echo ': '; 224 225 $fh = fopen('php://stdin', 'r'); 226 $value = trim(fgets($fh)); 227 fclose($fh); 228 } 229 230 return $value; 231 } 232} 233 234