xref: /plugin/aichat/cli.php (revision 87e464844e8a4bc0fa03608147b1a25b9b20b678)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\plugin\aichat\Chunk;
5use dokuwiki\Search\Indexer;
6use splitbrain\phpcli\Colors;
7use splitbrain\phpcli\Options;
8use splitbrain\phpcli\TableFormatter;
9
10/**
11 * DokuWiki Plugin aichat (CLI Component)
12 *
13 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
14 * @author  Andreas Gohr <gohr@cosmocode.de>
15 */
16class cli_plugin_aichat extends CLIPlugin
17{
18    /** @var helper_plugin_aichat */
19    protected $helper;
20
21    public function __construct($autocatch = true)
22    {
23        parent::__construct($autocatch);
24        $this->helper = plugin_load('helper', 'aichat');
25        $this->helper->setLogger($this);
26    }
27
28    /** @inheritDoc */
29    protected function setup(Options $options)
30    {
31        $options->useCompactHelp();
32
33        $options->setHelp(
34            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
35            'This may incur costs.'
36        );
37
38        $options->registerCommand(
39            'embed',
40            'Create embeddings for all pages. This skips pages that already have embeddings'
41        );
42        $options->registerOption(
43            'clear',
44            'Clear all existing embeddings before creating new ones',
45            'c',
46            false,
47            'embed'
48        );
49
50        $options->registerCommand('maintenance', 'Run storage maintenance. Refer to the documentation for details.');
51
52        $options->registerCommand('similar', 'Search for similar pages');
53        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
54
55        $options->registerCommand('ask', 'Ask a question');
56        $options->registerArgument('question', 'The question to ask', true, 'ask');
57
58        $options->registerCommand('chat', 'Start an interactive chat session');
59
60        $options->registerCommand('models', 'List available models');
61
62        $options->registerCommand('info', 'Get Info about the vector storage and other stats');
63
64        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
65        $options->registerArgument('page', 'The page to split', true, 'split');
66
67        $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)');
68        $options->registerArgument('page', 'The page to check', true, 'page');
69        $options->registerOption('dump', 'Dump the chunks', 'd', false, 'page');
70
71        $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' .
72            ' Not supported on all storages.');
73        $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv');
74        $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv');
75    }
76
77    /** @inheritDoc */
78    protected function main(Options $options)
79    {
80        $this->loadConfig();
81        ini_set('memory_limit', -1);
82        switch ($options->getCmd()) {
83            case 'embed':
84                $this->createEmbeddings($options->getOpt('clear'));
85                break;
86            case 'maintenance':
87                $this->runMaintenance();
88                break;
89            case 'similar':
90                $this->similar($options->getArgs()[0]);
91                break;
92            case 'ask':
93                $this->ask($options->getArgs()[0]);
94                break;
95            case 'chat':
96                $this->chat();
97                break;
98            case 'models':
99                $this->models();
100                break;
101            case 'split':
102                $this->split($options->getArgs()[0]);
103                break;
104            case 'page':
105                $this->page($options->getArgs()[0], $options->getOpt('dump'));
106                break;
107            case 'info':
108                $this->showinfo();
109                break;
110            case 'tsv':
111                $args = $options->getArgs();
112                $vector = $args[0] ?? 'vector.tsv';
113                $meta = $args[1] ?? 'meta.tsv';
114                $this->tsv($vector, $meta);
115                break;
116            default:
117                echo $options->help();
118        }
119    }
120
121    /**
122     * @return void
123     */
124    protected function showinfo()
125    {
126        $stats = [
127            'model' => $this->getConf('model'),
128        ];
129        $stats = array_merge(
130            $stats,
131            array_map('dformat', $this->helper->getRunData()),
132            $this->helper->getStorage()->statistics()
133        );
134        $this->printTable($stats);
135    }
136
137    /**
138     * Print key value data as tabular data
139     *
140     * @param array $data
141     * @param int $level
142     * @return void
143     */
144    protected function printTable($data, $level = 0)
145    {
146        $tf = new TableFormatter($this->colors);
147        foreach ($data as $key => $value) {
148            if (is_array($value)) {
149                echo $tf->format(
150                    [$level * 2, 20, '*'],
151                    ['', $key, ''],
152                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
153                );
154                $this->printTable($value, $level + 1);
155            } else {
156                echo $tf->format(
157                    [$level * 2, 20, '*'],
158                    ['', $key, $value],
159                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY]
160                );
161            }
162        }
163    }
164
165    /**
166     * Check chunk availability for a given page
167     *
168     * @param string $page
169     * @return void
170     */
171    protected function page($page, $dump = false)
172    {
173        $indexer = new Indexer();
174        $pages = $indexer->getPages();
175        $pos = array_search(cleanID($page), $pages);
176
177        if ($pos === false) {
178            $this->error('Page not found');
179            return;
180        }
181
182        $storage = $this->helper->getStorage();
183        $chunks = $storage->getPageChunks($page, $pos * 100);
184        if ($chunks) {
185            $this->success('Found ' . count($chunks) . ' chunks');
186            if ($dump) {
187                echo json_encode($chunks, JSON_PRETTY_PRINT);
188            }
189        } else {
190            $this->error('No chunks found');
191        }
192    }
193
194    /**
195     * Split the given page into chunks and print them
196     *
197     * @param string $page
198     * @return void
199     * @throws Exception
200     */
201    protected function split($page)
202    {
203        $text = rawWiki($page);
204        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
205        foreach ($chunks as $chunk) {
206            echo $chunk;
207            echo "\n";
208            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
209        }
210        $this->success('Split into ' . count($chunks) . ' chunks');
211    }
212
213    /**
214     * Interactive Chat Session
215     *
216     * @return void
217     * @throws Exception
218     */
219    protected function chat()
220    {
221        if ($this->loglevel['debug']['enabled']) {
222            $this->helper->getChatModel()->setDebug(true);
223        }
224
225        $history = [];
226        while ($q = $this->readLine('Your Question')) {
227            $this->helper->getChatModel()->resetUsageStats();
228            $result = $this->helper->askChatQuestion($q, $history);
229            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
230            $history[] = [$result['question'], $result['answer']];
231            $this->printAnswer($result);
232        }
233    }
234
235    protected function models()
236    {
237        $result = [
238            'chat' => [],
239            'embedding' => [],
240        ];
241
242
243        $jsons = glob(__DIR__ . '/Model/*/models.json');
244        foreach ($jsons as $json) {
245            $models = json_decode(file_get_contents($json), true);
246            foreach ($models as $type => $model) {
247                $namespace = basename(dirname($json));
248                foreach ($model as $name => $info) {
249
250
251                    $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\' . ucfirst($type) . 'Model';
252                    try {
253                        new $class($name, $this->conf);
254                        $info['confok'] = true;
255                    } catch (Exception $e) {
256                        $info['confok'] = false;
257                    }
258
259                    $result[$type]["$namespace $name"] = $info;
260                }
261            }
262        }
263
264        $td = new TableFormatter($this->colors);
265        $cols = [30, 20, 20, '*'];
266        echo "==== Chat Models ====\n\n";
267        echo $td->format(
268            $cols,
269            ['Model', 'Token Limits', 'Price USD/M', 'Description'],
270            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
271        );
272        foreach ($result['chat'] as $name => $info) {
273            echo $td->format(
274                $cols,
275                [
276                    $name,
277                    sprintf(" In: %7d\nOut: %7d", $info['inputTokens'], $info['outputTokens']),
278                    sprintf(" In: %.2f\nOut: %.2f", $info['inputTokenPrice'], $info['inputTokenPrice']),
279                    $info['description']."\n"
280                ],
281                [
282                    $info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
283                ]
284            );
285        }
286
287        $cols = [30, 10, 10, 10, '*'];
288        echo "==== Embedding Models ====\n\n";
289        echo $td->format(
290            $cols,
291            ['Model', 'Token Limits', 'Price USD/M', 'Dimensions', 'Description'],
292            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
293        );
294        foreach ($result['embedding'] as $name => $info) {
295            echo $td->format(
296                $cols,
297                [
298                    $name,
299                    sprintf("%7d", $info['inputTokens']),
300                    sprintf("%.2f", $info['inputTokenPrice']),
301                    $info['dimensions'],
302                    $info['description']."\n"
303                ],
304                [
305                    $info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
306                ]
307            );
308        }
309
310        $this->colors->ptln('Current prices may differ', Colors::C_RED);
311    }
312
313    /**
314     * Handle a single, standalone question
315     *
316     * @param string $query
317     * @return void
318     * @throws Exception
319     */
320    protected function ask($query)
321    {
322        if ($this->loglevel['debug']['enabled']) {
323            $this->helper->getChatModel()->setDebug(true);
324        }
325
326        $result = $this->helper->askQuestion($query);
327        $this->printAnswer($result);
328    }
329
330    /**
331     * Get the pages that are similar to the query
332     *
333     * @param string $query
334     * @return void
335     */
336    protected function similar($query)
337    {
338        $langlimit = $this->helper->getLanguageLimit();
339        if ($langlimit) {
340            $this->info('Limiting results to {lang}', ['lang' => $langlimit]);
341        }
342
343        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit);
344        $this->printSources($sources);
345    }
346
347    /**
348     * Run the maintenance tasks
349     *
350     * @return void
351     */
352    protected function runMaintenance()
353    {
354        $start = time();
355        $this->helper->getStorage()->runMaintenance();
356        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
357        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
358
359        $data = $this->helper->getRunData();
360        $data['maintenance ran at'] = time();
361        $this->helper->setRunData($data);
362    }
363
364    /**
365     * Recreate chunks and embeddings for all pages
366     *
367     * @return void
368     */
369    protected function createEmbeddings($clear)
370    {
371        [$skipRE, $matchRE] = $this->getRegexps();
372
373        $start = time();
374        $this->helper->getEmbeddings()->createNewIndex($skipRE, $matchRE, $clear);
375        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
376        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
377
378        $data = $this->helper->getRunData();
379        $data['embed ran at'] = time();
380        $this->helper->setRunData($data);
381    }
382
383    /**
384     * Dump TSV files for debugging
385     *
386     * @return void
387     */
388    protected function tsv($vector, $meta)
389    {
390
391        $storage = $this->helper->getStorage();
392        $storage->dumpTSV($vector, $meta);
393        $this->success('written to ' . $vector . ' and ' . $meta);
394    }
395
396    /**
397     * Print the given detailed answer in a nice way
398     *
399     * @param array $answer
400     * @return void
401     */
402    protected function printAnswer($answer)
403    {
404        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
405        echo "\n";
406        $this->printSources($answer['sources']);
407        echo "\n";
408        $this->printUsage();
409    }
410
411    /**
412     * Print the given sources
413     *
414     * @param Chunk[] $sources
415     * @return void
416     */
417    protected function printSources($sources)
418    {
419        foreach ($sources as $source) {
420            /** @var Chunk $source */
421            $this->colors->ptln(
422                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
423                Colors::C_LIGHTBLUE
424            );
425        }
426    }
427
428    /**
429     * Print the usage statistics for OpenAI
430     *
431     * @return void
432     */
433    protected function printUsage()
434    {
435        $this->info(
436            'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.',
437            $this->helper->getChatModel()->getUsageStats()
438        );
439    }
440
441    /**
442     * Interactively ask for a value from the user
443     *
444     * @param string $prompt
445     * @return string
446     */
447    protected function readLine($prompt)
448    {
449        $value = '';
450
451        while ($value === '') {
452            echo $prompt;
453            echo ': ';
454
455            $fh = fopen('php://stdin', 'r');
456            $value = trim(fgets($fh));
457            fclose($fh);
458        }
459
460        return $value;
461    }
462
463    /**
464     * Read the skip and match regex from the config
465     *
466     * Ensures the regular expressions are valid
467     *
468     * @return string[] [$skipRE, $matchRE]
469     */
470    protected function getRegexps()
471    {
472        $skip = $this->getConf('skipRegex');
473        $skipRE = '';
474        $match = $this->getConf('matchRegex');
475        $matchRE = '';
476
477        if ($skip) {
478            $skipRE = '/' . $skip . '/';
479            if (@preg_match($skipRE, '') === false) {
480                $this->error(preg_last_error_msg());
481                $this->error('Invalid regular expression in $conf[\'skipRegex\']. Ignored.');
482                $skipRE = '';
483            } else {
484                $this->success('Skipping pages matching ' . $skipRE);
485            }
486        }
487
488        if ($match) {
489            $matchRE = '/' . $match . '/';
490            if (@preg_match($matchRE, '') === false) {
491                $this->error(preg_last_error_msg());
492                $this->error('Invalid regular expression in $conf[\'matchRegex\']. Ignored.');
493                $matchRE = '';
494            } else {
495                $this->success('Only indexing pages matching ' . $matchRE);
496            }
497        }
498        return [$skipRE, $matchRE];
499    }
500}
501