xref: /plugin/aichat/cli.php (revision 34a1c47875552330ce367360d99f2c3f9f69af94)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\plugin\aichat\Chunk;
5use dokuwiki\Search\Indexer;
6use splitbrain\phpcli\Colors;
7use splitbrain\phpcli\Options;
8use splitbrain\phpcli\TableFormatter;
9
10/**
11 * DokuWiki Plugin aichat (CLI Component)
12 *
13 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
14 * @author  Andreas Gohr <gohr@cosmocode.de>
15 */
16class cli_plugin_aichat extends CLIPlugin
17{
18    /** @var helper_plugin_aichat */
19    protected $helper;
20
21    public function __construct($autocatch = true)
22    {
23        parent::__construct($autocatch);
24        $this->helper = plugin_load('helper', 'aichat');
25        $this->helper->setLogger($this);
26    }
27
28    /** @inheritDoc */
29    protected function setup(Options $options)
30    {
31        $options->useCompactHelp();
32
33        $options->setHelp(
34            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
35            'This may incur costs.'
36        );
37
38        $options->registerCommand(
39            'embed',
40            'Create embeddings for all pages. This skips pages that already have embeddings'
41        );
42        $options->registerOption(
43            'clear',
44            'Clear all existing embeddings before creating new ones',
45            'c',
46            false,
47            'embed'
48        );
49
50        $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.');
51
52        $options->registerCommand('similar', 'Search for similar pages');
53        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
54
55        $options->registerCommand('ask', 'Ask a question');
56        $options->registerArgument('question', 'The question to ask', true, 'ask');
57
58        $options->registerCommand('chat', 'Start an interactive chat session');
59
60        $options->registerCommand('info', 'Get Info about the vector storage and other stats');
61
62        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
63        $options->registerArgument('page', 'The page to split', true, 'split');
64
65        $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)');
66        $options->registerArgument('page', 'The page to check', true, 'page');
67        $options->registerOption('dump', 'Dump the chunks', 'd', false, 'page');
68
69        $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' .
70            ' Not supported on all storages.');
71        $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv');
72        $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv');
73    }
74
75    /** @inheritDoc */
76    protected function main(Options $options)
77    {
78        ini_set('memory_limit', -1);
79        switch ($options->getCmd()) {
80            case 'embed':
81                $this->createEmbeddings($options->getOpt('clear'));
82                break;
83            case 'maintenance':
84                $this->runMaintenance();
85                break;
86            case 'similar':
87                $this->similar($options->getArgs()[0]);
88                break;
89            case 'ask':
90                $this->ask($options->getArgs()[0]);
91                break;
92            case 'chat':
93                $this->chat();
94                break;
95            case 'split':
96                $this->split($options->getArgs()[0]);
97                break;
98            case 'page':
99                $this->page($options->getArgs()[0], $options->getOpt('dump'));
100                break;
101            case 'info':
102                $this->showinfo();
103                break;
104            case 'tsv':
105                $args = $options->getArgs();
106                $vector = $args[0] ?? 'vector.tsv';
107                $meta = $args[1] ?? 'meta.tsv';
108                $this->tsv($vector, $meta);
109                break;
110            default:
111                echo $options->help();
112        }
113    }
114
115    /**
116     * @return void
117     */
118    protected function showinfo()
119    {
120        $stats = [
121            'model' => $this->getConf('model'),
122        ];
123        $stats = array_merge(
124            $stats,
125            array_map('dformat', $this->helper->getRunData()),
126            $this->helper->getStorage()->statistics()
127        );
128        $this->printTable($stats);
129    }
130
131    /**
132     * Print key value data as tabular data
133     *
134     * @param array $data
135     * @param int $level
136     * @return void
137     */
138    protected function printTable($data, $level = 0)
139    {
140        $tf = new TableFormatter($this->colors);
141        foreach ($data as $key => $value) {
142            if (is_array($value)) {
143                echo $tf->format(
144                    [$level * 2, 20, '*'],
145                    ['', $key, ''],
146                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
147                );
148                $this->printTable($value, $level + 1);
149            } else {
150                echo $tf->format(
151                    [$level * 2, 20, '*'],
152                    ['', $key, $value],
153                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY]
154                );
155            }
156        }
157    }
158
159    /**
160     * Check chunk availability for a given page
161     *
162     * @param string $page
163     * @return void
164     */
165    protected function page($page, $dump = false)
166    {
167        $indexer = new Indexer();
168        $pages = $indexer->getPages();
169        $pos = array_search(cleanID($page), $pages);
170
171        if ($pos === false) {
172            $this->error('Page not found');
173            return;
174        }
175
176        $storage = $this->helper->getStorage();
177        $chunks = $storage->getPageChunks($page, $pos * 100);
178        if ($chunks) {
179            $this->success('Found ' . count($chunks) . ' chunks');
180            if ($dump) {
181                echo json_encode($chunks, JSON_PRETTY_PRINT);
182            }
183        } else {
184            $this->error('No chunks found');
185        }
186    }
187
188    /**
189     * Split the given page into chunks and print them
190     *
191     * @param string $page
192     * @return void
193     * @throws Exception
194     */
195    protected function split($page)
196    {
197        $text = rawWiki($page);
198        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
199        foreach ($chunks as $chunk) {
200            echo $chunk;
201            echo "\n";
202            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
203        }
204        $this->success('Split into ' . count($chunks) . ' chunks');
205    }
206
207    /**
208     * Interactive Chat Session
209     *
210     * @return void
211     * @throws Exception
212     */
213    protected function chat()
214    {
215        if($this->loglevel['debug']['enabled']) {
216            $this->helper->getChatModel()->setDebug(true);
217        }
218
219        $history = [];
220        while ($q = $this->readLine('Your Question')) {
221            $this->helper->getChatModel()->resetUsageStats();
222            $result = $this->helper->askChatQuestion($q, $history);
223            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
224            $history[] = [$result['question'], $result['answer']];
225            $this->printAnswer($result);
226        }
227    }
228
229    /**
230     * Handle a single, standalone question
231     *
232     * @param string $query
233     * @return void
234     * @throws Exception
235     */
236    protected function ask($query)
237    {
238        if($this->loglevel['debug']['enabled']) {
239            $this->helper->getChatModel()->setDebug(true);
240        }
241
242        $result = $this->helper->askQuestion($query);
243        $this->printAnswer($result);
244    }
245
246    /**
247     * Get the pages that are similar to the query
248     *
249     * @param string $query
250     * @return void
251     */
252    protected function similar($query)
253    {
254        $langlimit = $this->helper->getLanguageLimit();
255        if ($langlimit) {
256            $this->info('Limiting results to {lang}', ['lang' => $langlimit]);
257        }
258
259        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit);
260        $this->printSources($sources);
261    }
262
263    /**
264     * Run the maintenance tasks
265     *
266     * @return void
267     */
268    protected function runMaintenance()
269    {
270        $start = time();
271        $this->helper->getStorage()->runMaintenance();
272        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
273        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
274
275        $data = $this->helper->getRunData();
276        $data['maintenance ran at'] = time();
277        $this->helper->setRunData($data);
278    }
279
280    /**
281     * Recreate chunks and embeddings for all pages
282     *
283     * @return void
284     */
285    protected function createEmbeddings($clear)
286    {
287        [$skipRE, $matchRE] = $this->getRegexps();
288
289        $start = time();
290        $this->helper->getEmbeddings()->createNewIndex($skipRE, $matchRE, $clear);
291        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
292        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
293
294        $data = $this->helper->getRunData();
295        $data['embed ran at'] = time();
296        $this->helper->setRunData($data);
297    }
298
299    /**
300     * Dump TSV files for debugging
301     *
302     * @return void
303     */
304    protected function tsv($vector, $meta)
305    {
306
307        $storage = $this->helper->getStorage();
308        $storage->dumpTSV($vector, $meta);
309        $this->success('written to ' . $vector . ' and ' . $meta);
310    }
311
312    /**
313     * Print the given detailed answer in a nice way
314     *
315     * @param array $answer
316     * @return void
317     */
318    protected function printAnswer($answer)
319    {
320        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
321        echo "\n";
322        $this->printSources($answer['sources']);
323        echo "\n";
324        $this->printUsage();
325    }
326
327    /**
328     * Print the given sources
329     *
330     * @param Chunk[] $sources
331     * @return void
332     */
333    protected function printSources($sources)
334    {
335        foreach ($sources as $source) {
336            /** @var Chunk $source */
337            $this->colors->ptln(
338                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
339                Colors::C_LIGHTBLUE
340            );
341        }
342    }
343
344    /**
345     * Print the usage statistics for OpenAI
346     *
347     * @return void
348     */
349    protected function printUsage()
350    {
351        $this->info(
352            'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.',
353            $this->helper->getChatModel()->getUsageStats()
354        );
355    }
356
357    /**
358     * Interactively ask for a value from the user
359     *
360     * @param string $prompt
361     * @return string
362     */
363    protected function readLine($prompt)
364    {
365        $value = '';
366
367        while ($value === '') {
368            echo $prompt;
369            echo ': ';
370
371            $fh = fopen('php://stdin', 'r');
372            $value = trim(fgets($fh));
373            fclose($fh);
374        }
375
376        return $value;
377    }
378
379    /**
380     * Read the skip and match regex from the config
381     *
382     * Ensures the regular expressions are valid
383     *
384     * @return string[] [$skipRE, $matchRE]
385     */
386    protected function getRegexps()
387    {
388        $skip = $this->getConf('skipRegex');
389        $skipRE = '';
390        $match = $this->getConf('matchRegex');
391        $matchRE = '';
392
393        if ($skip) {
394            $skipRE = '/' . $skip . '/';
395            if (@preg_match($skipRE, '') === false) {
396                $this->error(preg_last_error_msg());
397                $this->error('Invalid regular expression in $conf[\'skipRegex\']. Ignored.');
398                $skipRE = '';
399            } else {
400                $this->success('Skipping pages matching ' . $skipRE);
401            }
402        }
403
404        if ($match) {
405            $matchRE = '/' . $match . '/';
406            if (@preg_match($matchRE, '') === false) {
407                $this->error(preg_last_error_msg());
408                $this->error('Invalid regular expression in $conf[\'matchRegex\']. Ignored.');
409                $matchRE = '';
410            } else {
411                $this->success('Only indexing pages matching ' . $matchRE);
412            }
413        }
414        return [$skipRE, $matchRE];
415    }
416}
417