xref: /plugin/aichat/cli.php (revision c2b7a1f7fd0f6c6579c9ee46f0437ff89c2fc4b3)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\plugin\aichat\Chunk;
5use dokuwiki\plugin\aichat\ModelFactory;
6use dokuwiki\Search\Indexer;
7use splitbrain\phpcli\Colors;
8use splitbrain\phpcli\Options;
9use splitbrain\phpcli\TableFormatter;
10
11/**
12 * DokuWiki Plugin aichat (CLI Component)
13 *
14 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
15 * @author  Andreas Gohr <gohr@cosmocode.de>
16 */
17class cli_plugin_aichat extends CLIPlugin
18{
19    /** @var helper_plugin_aichat */
20    protected $helper;
21
22    /** @inheritdoc */
23    public function __construct($autocatch = true)
24    {
25        parent::__construct($autocatch);
26        $this->helper = plugin_load('helper', 'aichat');
27        $this->helper->setLogger($this);
28        $this->loadConfig();
29    }
30
31    /** @inheritDoc */
32    protected function setup(Options $options)
33    {
34        $options->useCompactHelp();
35
36        $options->setHelp(
37            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
38            'This may incur costs.'
39        );
40
41        $options->registerCommand(
42            'embed',
43            'Create embeddings for all pages. This skips pages that already have embeddings'
44        );
45        $options->registerOption(
46            'clear',
47            'Clear all existing embeddings before creating new ones',
48            'c',
49            false,
50            'embed'
51        );
52
53        $options->registerCommand('maintenance', 'Run storage maintenance. Refer to the documentation for details.');
54
55        $options->registerCommand('similar', 'Search for similar pages');
56        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
57
58        $options->registerCommand('ask', 'Ask a question');
59        $options->registerArgument('question', 'The question to ask', true, 'ask');
60
61        $options->registerCommand('chat', 'Start an interactive chat session');
62
63        $options->registerCommand('models', 'List available models');
64
65        $options->registerCommand('info', 'Get Info about the vector storage and other stats');
66
67        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
68        $options->registerArgument('page', 'The page to split', true, 'split');
69
70        $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)');
71        $options->registerArgument('page', 'The page to check', true, 'page');
72        $options->registerOption('dump', 'Dump the chunks', 'd', false, 'page');
73
74        $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' .
75            ' Not supported on all storages.');
76        $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv');
77        $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv');
78    }
79
80    /** @inheritDoc */
81    protected function main(Options $options)
82    {
83        if ($this->loglevel['debug']['enabled']) {
84            $this->helper->factory->setDebug(true);
85        }
86
87        ini_set('memory_limit', -1);
88        switch ($options->getCmd()) {
89            case 'embed':
90                $this->createEmbeddings($options->getOpt('clear'));
91                break;
92            case 'maintenance':
93                $this->runMaintenance();
94                break;
95            case 'similar':
96                $this->similar($options->getArgs()[0]);
97                break;
98            case 'ask':
99                $this->ask($options->getArgs()[0]);
100                break;
101            case 'chat':
102                $this->chat();
103                break;
104            case 'models':
105                $this->models();
106                break;
107            case 'split':
108                $this->split($options->getArgs()[0]);
109                break;
110            case 'page':
111                $this->page($options->getArgs()[0], $options->getOpt('dump'));
112                break;
113            case 'info':
114                $this->showinfo();
115                break;
116            case 'tsv':
117                $args = $options->getArgs();
118                $vector = $args[0] ?? 'vector.tsv';
119                $meta = $args[1] ?? 'meta.tsv';
120                $this->tsv($vector, $meta);
121                break;
122            default:
123                echo $options->help();
124        }
125    }
126
127    /**
128     * @return void
129     */
130    protected function showinfo()
131    {
132        $stats = [
133            'chat model' => $this->getConf('chatmodel'),
134            'embed model' => $this->getConf('embedmodel'),
135        ];
136        $stats = array_merge(
137            $stats,
138            array_map('dformat', $this->helper->getRunData()),
139            $this->helper->getStorage()->statistics()
140        );
141        $this->printTable($stats);
142    }
143
144    /**
145     * Print key value data as tabular data
146     *
147     * @param array $data
148     * @param int $level
149     * @return void
150     */
151    protected function printTable($data, $level = 0)
152    {
153        $tf = new TableFormatter($this->colors);
154        foreach ($data as $key => $value) {
155            if (is_array($value)) {
156                echo $tf->format(
157                    [$level * 2, 20, '*'],
158                    ['', $key, ''],
159                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
160                );
161                $this->printTable($value, $level + 1);
162            } else {
163                echo $tf->format(
164                    [$level * 2, 20, '*'],
165                    ['', $key, $value],
166                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY]
167                );
168            }
169        }
170    }
171
172    /**
173     * Check chunk availability for a given page
174     *
175     * @param string $page
176     * @return void
177     */
178    protected function page($page, $dump = false)
179    {
180        $indexer = new Indexer();
181        $pages = $indexer->getPages();
182        $pos = array_search(cleanID($page), $pages);
183
184        if ($pos === false) {
185            $this->error('Page not found');
186            return;
187        }
188
189        $storage = $this->helper->getStorage();
190        $chunks = $storage->getPageChunks($page, $pos * 100);
191        if ($chunks) {
192            $this->success('Found ' . count($chunks) . ' chunks');
193            if ($dump) {
194                echo json_encode($chunks, JSON_PRETTY_PRINT);
195            }
196        } else {
197            $this->error('No chunks found');
198        }
199    }
200
201    /**
202     * Split the given page into chunks and print them
203     *
204     * @param string $page
205     * @return void
206     * @throws Exception
207     */
208    protected function split($page)
209    {
210        $text = rawWiki($page);
211        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
212        foreach ($chunks as $chunk) {
213            echo $chunk;
214            echo "\n";
215            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
216        }
217        $this->success('Split into ' . count($chunks) . ' chunks');
218    }
219
220    /**
221     * Interactive Chat Session
222     *
223     * @return void
224     * @throws Exception
225     */
226    protected function chat()
227    {
228        $history = [];
229        while ($q = $this->readLine('Your Question')) {
230            $this->helper->getChatModel()->resetUsageStats();
231            $this->helper->getRephraseModel()->resetUsageStats();
232            $this->helper->getEmbeddingModel()->resetUsageStats();
233            $result = $this->helper->askChatQuestion($q, $history);
234            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
235            $history[] = [$result['question'], $result['answer']];
236            $this->printAnswer($result);
237        }
238    }
239
240    /**
241     * Print information about the available models
242     *
243     * @return void
244     */
245    protected function models()
246    {
247        $result = (new ModelFactory($this->conf))->getModels();
248
249        $td = new TableFormatter($this->colors);
250        $cols = [30, 20, 20, '*'];
251        echo "==== Chat Models ====\n\n";
252        echo $td->format(
253            $cols,
254            ['Model', 'Token Limits', 'Price USD/M', 'Description'],
255            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
256        );
257        foreach ($result['chat'] as $name => $info) {
258            echo $td->format(
259                $cols,
260                [
261                    $name,
262                    sprintf(" In: %7d\nOut: %7d", $info['inputTokens'], $info['outputTokens']),
263                    sprintf(" In: %.2f\nOut: %.2f", $info['inputTokenPrice'], $info['outputTokenPrice']),
264                    $info['description'] . "\n"
265                ],
266                [
267                    $info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
268                ]
269            );
270        }
271
272        $cols = [30, 10, 10, 10, '*'];
273        echo "==== Embedding Models ====\n\n";
274        echo $td->format(
275            $cols,
276            ['Model', 'Token Limits', 'Price USD/M', 'Dimensions', 'Description'],
277            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
278        );
279        foreach ($result['embedding'] as $name => $info) {
280            echo $td->format(
281                $cols,
282                [
283                    $name,
284                    sprintf("%7d", $info['inputTokens']),
285                    sprintf("%.2f", $info['inputTokenPrice']),
286                    $info['dimensions'],
287                    $info['description'] . "\n"
288                ],
289                [
290                    $info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
291                ]
292            );
293        }
294
295        $this->colors->ptln('Current prices may differ', Colors::C_RED);
296    }
297
298    /**
299     * Handle a single, standalone question
300     *
301     * @param string $query
302     * @return void
303     * @throws Exception
304     */
305    protected function ask($query)
306    {
307        $result = $this->helper->askQuestion($query);
308        $this->printAnswer($result);
309    }
310
311    /**
312     * Get the pages that are similar to the query
313     *
314     * @param string $query
315     * @return void
316     */
317    protected function similar($query)
318    {
319        $langlimit = $this->helper->getLanguageLimit();
320        if ($langlimit) {
321            $this->info('Limiting results to {lang}', ['lang' => $langlimit]);
322        }
323
324        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit);
325        $this->printSources($sources);
326    }
327
328    /**
329     * Run the maintenance tasks
330     *
331     * @return void
332     */
333    protected function runMaintenance()
334    {
335        $start = time();
336        $this->helper->getStorage()->runMaintenance();
337        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
338        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
339
340        $data = $this->helper->getRunData();
341        $data['maintenance ran at'] = time();
342        $this->helper->setRunData($data);
343    }
344
345    /**
346     * Recreate chunks and embeddings for all pages
347     *
348     * @return void
349     */
350    protected function createEmbeddings($clear)
351    {
352        [$skipRE, $matchRE] = $this->getRegexps();
353
354        $start = time();
355        $this->helper->getEmbeddings()->createNewIndex($skipRE, $matchRE, $clear);
356        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
357        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
358
359        $data = $this->helper->getRunData();
360        $data['embed ran at'] = time();
361        $this->helper->setRunData($data);
362    }
363
364    /**
365     * Dump TSV files for debugging
366     *
367     * @return void
368     */
369    protected function tsv($vector, $meta)
370    {
371
372        $storage = $this->helper->getStorage();
373        $storage->dumpTSV($vector, $meta);
374        $this->success('written to ' . $vector . ' and ' . $meta);
375    }
376
377    /**
378     * Print the given detailed answer in a nice way
379     *
380     * @param array $answer
381     * @return void
382     */
383    protected function printAnswer($answer)
384    {
385        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
386        echo "\n";
387        $this->printSources($answer['sources']);
388        echo "\n";
389        $this->printUsage();
390    }
391
392    /**
393     * Print the given sources
394     *
395     * @param Chunk[] $sources
396     * @return void
397     */
398    protected function printSources($sources)
399    {
400        foreach ($sources as $source) {
401            /** @var Chunk $source */
402            $this->colors->ptln(
403                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
404                Colors::C_LIGHTBLUE
405            );
406        }
407    }
408
409    /**
410     * Print the usage statistics for OpenAI
411     *
412     * @return void
413     */
414    protected function printUsage()
415    {
416        $chat = $this->helper->getChatModel()->getUsageStats();
417        $rephrase = $this->helper->getRephraseModel()->getUsageStats();
418        $embed = $this->helper->getEmbeddingModel()->getUsageStats();
419
420        $this->info(
421            'Made {requests} requests in {time}s to models. Used {tokens} tokens for about ${cost}.',
422            [
423                'requests' => $chat['requests'] + $rephrase['requests'] + $embed['requests'],
424                'time' => $chat['time'] + $rephrase['time'] + $embed['time'],
425                'tokens' => $chat['tokens'] + $chat['tokens'] + $embed['tokens'],
426                'cost' => $chat['cost'] + $chat['cost'] + $embed['cost'],
427            ]
428        );
429    }
430
431    /**
432     * Interactively ask for a value from the user
433     *
434     * @param string $prompt
435     * @return string
436     */
437    protected function readLine($prompt)
438    {
439        $value = '';
440
441        while ($value === '') {
442            echo $prompt;
443            echo ': ';
444
445            $fh = fopen('php://stdin', 'r');
446            $value = trim(fgets($fh));
447            fclose($fh);
448        }
449
450        return $value;
451    }
452
453    /**
454     * Read the skip and match regex from the config
455     *
456     * Ensures the regular expressions are valid
457     *
458     * @return string[] [$skipRE, $matchRE]
459     */
460    protected function getRegexps()
461    {
462        $skip = $this->getConf('skipRegex');
463        $skipRE = '';
464        $match = $this->getConf('matchRegex');
465        $matchRE = '';
466
467        if ($skip) {
468            $skipRE = '/' . $skip . '/';
469            if (@preg_match($skipRE, '') === false) {
470                $this->error(preg_last_error_msg());
471                $this->error('Invalid regular expression in $conf[\'skipRegex\']. Ignored.');
472                $skipRE = '';
473            } else {
474                $this->success('Skipping pages matching ' . $skipRE);
475            }
476        }
477
478        if ($match) {
479            $matchRE = '/' . $match . '/';
480            if (@preg_match($matchRE, '') === false) {
481                $this->error(preg_last_error_msg());
482                $this->error('Invalid regular expression in $conf[\'matchRegex\']. Ignored.');
483                $matchRE = '';
484            } else {
485                $this->success('Only indexing pages matching ' . $matchRE);
486            }
487        }
488        return [$skipRE, $matchRE];
489    }
490}
491