1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use dokuwiki\plugin\aichat\Chunk; 5use dokuwiki\Search\Indexer; 6use splitbrain\phpcli\Colors; 7use splitbrain\phpcli\Options; 8 9 10/** 11 * DokuWiki Plugin aichat (CLI Component) 12 * 13 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html 14 * @author Andreas Gohr <gohr@cosmocode.de> 15 */ 16class cli_plugin_aichat extends CLIPlugin 17{ 18 /** @var helper_plugin_aichat */ 19 protected $helper; 20 21 public function __construct($autocatch = true) 22 { 23 parent::__construct($autocatch); 24 $this->helper = plugin_load('helper', 'aichat'); 25 $this->helper->getEmbeddings()->setLogger($this); 26 } 27 28 /** @inheritDoc */ 29 protected function setup(Options $options) 30 { 31 $options->useCompactHelp(); 32 33 $options->setHelp( 34 'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' . 35 'This may incur costs.' 36 ); 37 38 $options->registerCommand( 39 'embed', 40 'Create embeddings for all pages. This skips pages that already have embeddings' 41 ); 42 $options->registerOption( 43 'clear', 44 'Clear all existing embeddings before creating new ones', 45 'c', false, 'embed' 46 ); 47 48 $options->registerCommand('similar', 'Search for similar pages'); 49 $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar'); 50 51 $options->registerCommand('ask', 'Ask a question'); 52 $options->registerArgument('question', 'The question to ask', true, 'ask'); 53 54 $options->registerCommand('chat', 'Start an interactive chat session'); 55 56 $options->registerCommand('split', 'Split a page into chunks (for debugging)'); 57 $options->registerArgument('page', 'The page to split', true, 'split'); 58 59 $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)'); 60 $options->registerArgument('page', 'The page to check', true, 'page'); 61 62 $options->registerCommand('info', 'Get Info about the vector storage'); 63 } 64 65 /** @inheritDoc */ 66 protected function main(Options $options) 67 { 68 switch ($options->getCmd()) { 69 70 case 'embed': 71 $this->createEmbeddings($options->getOpt('clear')); 72 break; 73 case 'similar': 74 $this->similar($options->getArgs()[0]); 75 break; 76 case 'ask': 77 $this->ask($options->getArgs()[0]); 78 break; 79 case 'chat': 80 $this->chat(); 81 break; 82 case 'split': 83 $this->split($options->getArgs()[0]); 84 break; 85 case 'page': 86 $this->page($options->getArgs()[0]); 87 break; 88 case 'info': 89 $this->showinfo(); 90 break; 91 default: 92 echo $options->help(); 93 } 94 } 95 96 /** 97 * @return void 98 */ 99 protected function showinfo() 100 { 101 echo 'model: ' . $this->getConf('model') . "\n"; 102 $stats = $this->helper->getStorage()->statistics(); 103 foreach ($stats as $key => $value) { 104 echo $key . ': ' . $value . "\n"; 105 } 106 107 //echo $this->helper->getModel()->listUpstreamModels(); 108 } 109 110 /** 111 * Check chunk availability for a given page 112 * 113 * @param string $page 114 * @return void 115 */ 116 protected function page($page) 117 { 118 $indexer = new Indexer(); 119 $pages = $indexer->getPages(); 120 $pos = array_search(cleanID($page), $pages); 121 122 if ($pos === false) { 123 $this->error('Page not found'); 124 return; 125 } 126 127 $storage = $this->helper->getStorage(); 128 $chunks = $storage->getPageChunks($page, $pos * 100); 129 if ($chunks) { 130 $this->success('Found ' . count($chunks) . ' chunks'); 131 } else { 132 $this->error('No chunks found'); 133 } 134 } 135 136 /** 137 * Split the given page into chunks and print them 138 * 139 * @param string $page 140 * @return void 141 * @throws Exception 142 */ 143 protected function split($page) 144 { 145 $text = rawWiki($page); 146 $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text); 147 foreach ($chunks as $chunk) { 148 echo $chunk; 149 echo "\n"; 150 $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE); 151 } 152 $this->success('Split into ' . count($chunks) . ' chunks'); 153 } 154 155 /** 156 * Interactive Chat Session 157 * 158 * @return void 159 * @throws Exception 160 */ 161 protected function chat() 162 { 163 $history = []; 164 while ($q = $this->readLine('Your Question')) { 165 $this->helper->getModel()->resetUsageStats(); 166 $result = $this->helper->askChatQuestion($q, $history); 167 $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE); 168 $history[] = [$result['question'], $result['answer']]; 169 $this->printAnswer($result); 170 } 171 } 172 173 /** 174 * Handle a single, standalone question 175 * 176 * @param string $query 177 * @return void 178 * @throws Exception 179 */ 180 protected function ask($query) 181 { 182 $result = $this->helper->askQuestion($query); 183 $this->printAnswer($result); 184 } 185 186 /** 187 * Get the pages that are similar to the query 188 * 189 * @param string $query 190 * @return void 191 */ 192 protected function similar($query) 193 { 194 $sources = $this->helper->getEmbeddings()->getSimilarChunks($query); 195 $this->printSources($sources); 196 } 197 198 /** 199 * Recreate chunks and embeddings for all pages 200 * 201 * @return void 202 * @todo make skip regex configurable 203 */ 204 protected function createEmbeddings($clear) 205 { 206 ini_set('memory_limit', -1); // we may need a lot of memory here 207 $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear); 208 $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]); 209 } 210 211 /** 212 * Print the given detailed answer in a nice way 213 * 214 * @param array $answer 215 * @return void 216 */ 217 protected function printAnswer($answer) 218 { 219 $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN); 220 echo "\n"; 221 $this->printSources($answer['sources']); 222 echo "\n"; 223 $this->printUsage(); 224 } 225 226 /** 227 * Print the given sources 228 * 229 * @param Chunk[] $sources 230 * @return void 231 */ 232 protected function printSources($sources) 233 { 234 foreach ($sources as $source) { 235 /** @var Chunk $source */ 236 $this->colors->ptln( 237 "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')', 238 Colors::C_LIGHTBLUE 239 ); 240 } 241 } 242 243 /** 244 * Print the usage statistics for OpenAI 245 * 246 * @return void 247 */ 248 protected function printUsage() 249 { 250 $this->info( 251 'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.', 252 $this->helper->getModel()->getUsageStats() 253 ); 254 } 255 256 /** 257 * Interactively ask for a value from the user 258 * 259 * @param string $prompt 260 * @return string 261 */ 262 protected function readLine($prompt) 263 { 264 $value = ''; 265 266 while ($value === '') { 267 echo $prompt; 268 echo ': '; 269 270 $fh = fopen('php://stdin', 'r'); 271 $value = trim(fgets($fh)); 272 fclose($fh); 273 } 274 275 return $value; 276 } 277} 278 279