xref: /plugin/aichat/cli.php (revision 0de7e020fcc340c97acd36e48cdb20a9d43528b6)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\plugin\aichat\AbstractCLI;
5use dokuwiki\plugin\aichat\Chunk;
6use dokuwiki\plugin\aichat\ModelFactory;
7use dokuwiki\Search\Indexer;
8use splitbrain\phpcli\Colors;
9use splitbrain\phpcli\Options;
10use splitbrain\phpcli\TableFormatter;
11
12/**
13 * DokuWiki Plugin aichat (CLI Component)
14 *
15 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
16 * @author  Andreas Gohr <gohr@cosmocode.de>
17 */
18class cli_plugin_aichat extends AbstractCLI
19{
20    /** @var helper_plugin_aichat */
21    protected $helper;
22
23    /** @inheritDoc */
24    protected function setup(Options $options)
25    {
26        parent::setup($options);
27
28        $options->setHelp(
29            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
30            'This may incur costs.'
31        );
32
33        $options->registerOption(
34            'model',
35            'Overrides the chat and rephrasing model settings and uses this model instead',
36            '',
37            'model'
38        );
39
40        $options->registerCommand(
41            'embed',
42            'Create embeddings for all pages. This skips pages that already have embeddings'
43        );
44        $options->registerOption(
45            'clear',
46            'Clear all existing embeddings before creating new ones',
47            'c',
48            false,
49            'embed'
50        );
51
52        $options->registerCommand('maintenance', 'Run storage maintenance. Refer to the documentation for details.');
53
54        $options->registerCommand('similar', 'Search for similar pages');
55        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
56
57        $options->registerCommand('ask', 'Ask a question');
58        $options->registerArgument('question', 'The question to ask', true, 'ask');
59
60        $options->registerCommand('chat', 'Start an interactive chat session');
61
62        $options->registerCommand('models', 'List available models');
63
64        $options->registerCommand('info', 'Get Info about the vector storage and other stats');
65
66        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
67        $options->registerArgument('page', 'The page to split', true, 'split');
68
69        $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)');
70        $options->registerArgument('page', 'The page to check', true, 'page');
71        $options->registerOption('dump', 'Dump the chunks', 'd', false, 'page');
72
73        $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' .
74            ' Not supported on all storages.');
75        $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv');
76        $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv');
77    }
78
79    /** @inheritDoc */
80    protected function main(Options $options)
81    {
82        parent::main($options);
83
84        $model = $options->getOpt('model');
85        if($model) {
86            $this->helper->updateConfig(
87                ['chatmodel' => $model, 'rephasemodel' => $model]
88            );
89        }
90
91        switch ($options->getCmd()) {
92            case 'embed':
93                $this->createEmbeddings($options->getOpt('clear'));
94                break;
95            case 'maintenance':
96                $this->runMaintenance();
97                break;
98            case 'similar':
99                $this->similar($options->getArgs()[0]);
100                break;
101            case 'ask':
102                $this->ask($options->getArgs()[0]);
103                break;
104            case 'chat':
105                $this->chat();
106                break;
107            case 'models':
108                $this->models();
109                break;
110            case 'split':
111                $this->split($options->getArgs()[0]);
112                break;
113            case 'page':
114                $this->page($options->getArgs()[0], $options->getOpt('dump'));
115                break;
116            case 'info':
117                $this->showinfo();
118                break;
119            case 'tsv':
120                $args = $options->getArgs();
121                $vector = $args[0] ?? 'vector.tsv';
122                $meta = $args[1] ?? 'meta.tsv';
123                $this->tsv($vector, $meta);
124                break;
125            default:
126                echo $options->help();
127        }
128    }
129
130    /**
131     * @return void
132     */
133    protected function showinfo()
134    {
135        $stats = [
136            'chat model' => $this->getConf('chatmodel'),
137            'embed model' => $this->getConf('embedmodel'),
138        ];
139        $stats = array_merge(
140            $stats,
141            array_map('dformat', $this->helper->getRunData()),
142            $this->helper->getStorage()->statistics()
143        );
144        $this->printTable($stats);
145    }
146
147    /**
148     * Print key value data as tabular data
149     *
150     * @param array $data
151     * @param int $level
152     * @return void
153     */
154    protected function printTable($data, $level = 0)
155    {
156        $tf = new TableFormatter($this->colors);
157        foreach ($data as $key => $value) {
158            if (is_array($value)) {
159                echo $tf->format(
160                    [$level * 2, 20, '*'],
161                    ['', $key, ''],
162                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
163                );
164                $this->printTable($value, $level + 1);
165            } else {
166                echo $tf->format(
167                    [$level * 2, 20, '*'],
168                    ['', $key, $value],
169                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY]
170                );
171            }
172        }
173    }
174
175    /**
176     * Check chunk availability for a given page
177     *
178     * @param string $page
179     * @return void
180     */
181    protected function page($page, $dump = false)
182    {
183        $indexer = new Indexer();
184        $pages = $indexer->getPages();
185        $pos = array_search(cleanID($page), $pages);
186
187        if ($pos === false) {
188            $this->error('Page not found');
189            return;
190        }
191
192        $storage = $this->helper->getStorage();
193        $chunks = $storage->getPageChunks($page, $pos * 100);
194        if ($chunks) {
195            $this->success('Found ' . count($chunks) . ' chunks');
196            if ($dump) {
197                echo json_encode($chunks, JSON_PRETTY_PRINT);
198            }
199        } else {
200            $this->error('No chunks found');
201        }
202    }
203
204    /**
205     * Split the given page into chunks and print them
206     *
207     * @param string $page
208     * @return void
209     * @throws Exception
210     */
211    protected function split($page)
212    {
213        $text = rawWiki($page);
214        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
215        foreach ($chunks as $chunk) {
216            echo $chunk;
217            echo "\n";
218            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
219        }
220        $this->success('Split into ' . count($chunks) . ' chunks');
221    }
222
223    /**
224     * Interactive Chat Session
225     *
226     * @return void
227     * @throws Exception
228     */
229    protected function chat()
230    {
231        $history = [];
232        while ($q = $this->readLine('Your Question')) {
233            $this->helper->getChatModel()->resetUsageStats();
234            $this->helper->getRephraseModel()->resetUsageStats();
235            $this->helper->getEmbeddingModel()->resetUsageStats();
236            $result = $this->helper->askChatQuestion($q, $history);
237            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
238            $history[] = [$result['question'], $result['answer']];
239            $this->printAnswer($result);
240        }
241    }
242
243    /**
244     * Print information about the available models
245     *
246     * @return void
247     */
248    protected function models()
249    {
250        $result = (new ModelFactory($this->conf))->getModels();
251
252        $td = new TableFormatter($this->colors);
253        $cols = [30, 20, 20, '*'];
254        echo "==== Chat Models ====\n\n";
255        echo $td->format(
256            $cols,
257            ['Model', 'Token Limits', 'Price USD/M', 'Description'],
258            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
259        );
260        foreach ($result['chat'] as $name => $info) {
261            echo $td->format(
262                $cols,
263                [
264                    $name,
265                    sprintf(" In: %7d\nOut: %7d", $info['inputTokens'], $info['outputTokens']),
266                    sprintf(" In: %.2f\nOut: %.2f", $info['inputTokenPrice'], $info['outputTokenPrice']),
267                    $info['description'] . "\n"
268                ],
269                [
270                    $info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
271                ]
272            );
273        }
274
275        $cols = [30, 10, 10, 10, '*'];
276        echo "==== Embedding Models ====\n\n";
277        echo $td->format(
278            $cols,
279            ['Model', 'Token Limits', 'Price USD/M', 'Dimensions', 'Description'],
280            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
281        );
282        foreach ($result['embedding'] as $name => $info) {
283            echo $td->format(
284                $cols,
285                [
286                    $name,
287                    sprintf("%7d", $info['inputTokens']),
288                    sprintf("%.2f", $info['inputTokenPrice']),
289                    $info['dimensions'],
290                    $info['description'] . "\n"
291                ],
292                [
293                    $info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
294                ]
295            );
296        }
297
298        $this->colors->ptln('Current prices may differ', Colors::C_RED);
299    }
300
301    /**
302     * Handle a single, standalone question
303     *
304     * @param string $query
305     * @return void
306     * @throws Exception
307     */
308    protected function ask($query)
309    {
310        $result = $this->helper->askQuestion($query);
311        $this->printAnswer($result);
312    }
313
314    /**
315     * Get the pages that are similar to the query
316     *
317     * @param string $query
318     * @return void
319     */
320    protected function similar($query)
321    {
322        $langlimit = $this->helper->getLanguageLimit();
323        if ($langlimit) {
324            $this->info('Limiting results to {lang}', ['lang' => $langlimit]);
325        }
326
327        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit);
328        $this->printSources($sources);
329    }
330
331    /**
332     * Run the maintenance tasks
333     *
334     * @return void
335     */
336    protected function runMaintenance()
337    {
338        $start = time();
339        $this->helper->getStorage()->runMaintenance();
340        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
341        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
342
343        $data = $this->helper->getRunData();
344        $data['maintenance ran at'] = time();
345        $this->helper->setRunData($data);
346    }
347
348    /**
349     * Recreate chunks and embeddings for all pages
350     *
351     * @return void
352     */
353    protected function createEmbeddings($clear)
354    {
355        [$skipRE, $matchRE] = $this->getRegexps();
356
357        $start = time();
358        $this->helper->getEmbeddings()->createNewIndex($skipRE, $matchRE, $clear);
359        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
360        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
361
362        $data = $this->helper->getRunData();
363        $data['embed ran at'] = time();
364        $this->helper->setRunData($data);
365    }
366
367    /**
368     * Dump TSV files for debugging
369     *
370     * @return void
371     */
372    protected function tsv($vector, $meta)
373    {
374
375        $storage = $this->helper->getStorage();
376        $storage->dumpTSV($vector, $meta);
377        $this->success('written to ' . $vector . ' and ' . $meta);
378    }
379
380    /**
381     * Print the given detailed answer in a nice way
382     *
383     * @param array $answer
384     * @return void
385     */
386    protected function printAnswer($answer)
387    {
388        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
389        echo "\n";
390        $this->printSources($answer['sources']);
391        echo "\n";
392        $this->printUsage();
393    }
394
395    /**
396     * Print the given sources
397     *
398     * @param Chunk[] $sources
399     * @return void
400     */
401    protected function printSources($sources)
402    {
403        foreach ($sources as $source) {
404            /** @var Chunk $source */
405            $this->colors->ptln(
406                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
407                Colors::C_LIGHTBLUE
408            );
409        }
410    }
411
412    /**
413     * Print the usage statistics for OpenAI
414     *
415     * @return void
416     */
417    protected function printUsage()
418    {
419        $chat = $this->helper->getChatModel()->getUsageStats();
420        $rephrase = $this->helper->getRephraseModel()->getUsageStats();
421        $embed = $this->helper->getEmbeddingModel()->getUsageStats();
422
423        $this->info(
424            'Made {requests} requests in {time}s to models. Used {tokens} tokens for about ${cost}.',
425            [
426                'requests' => $chat['requests'] + $rephrase['requests'] + $embed['requests'],
427                'time' => $chat['time'] + $rephrase['time'] + $embed['time'],
428                'tokens' => $chat['tokens'] + $chat['tokens'] + $embed['tokens'],
429                'cost' => $chat['cost'] + $chat['cost'] + $embed['cost'],
430            ]
431        );
432    }
433
434    /**
435     * Interactively ask for a value from the user
436     *
437     * @param string $prompt
438     * @return string
439     */
440    protected function readLine($prompt)
441    {
442        $value = '';
443
444        while ($value === '') {
445            echo $prompt;
446            echo ': ';
447
448            $fh = fopen('php://stdin', 'r');
449            $value = trim(fgets($fh));
450            fclose($fh);
451        }
452
453        return $value;
454    }
455
456    /**
457     * Read the skip and match regex from the config
458     *
459     * Ensures the regular expressions are valid
460     *
461     * @return string[] [$skipRE, $matchRE]
462     */
463    protected function getRegexps()
464    {
465        $skip = $this->getConf('skipRegex');
466        $skipRE = '';
467        $match = $this->getConf('matchRegex');
468        $matchRE = '';
469
470        if ($skip) {
471            $skipRE = '/' . $skip . '/';
472            if (@preg_match($skipRE, '') === false) {
473                $this->error(preg_last_error_msg());
474                $this->error('Invalid regular expression in $conf[\'skipRegex\']. Ignored.');
475                $skipRE = '';
476            } else {
477                $this->success('Skipping pages matching ' . $skipRE);
478            }
479        }
480
481        if ($match) {
482            $matchRE = '/' . $match . '/';
483            if (@preg_match($matchRE, '') === false) {
484                $this->error(preg_last_error_msg());
485                $this->error('Invalid regular expression in $conf[\'matchRegex\']. Ignored.');
486                $matchRE = '';
487            } else {
488                $this->success('Only indexing pages matching ' . $matchRE);
489            }
490        }
491        return [$skipRE, $matchRE];
492    }
493}
494