xref: /plugin/aichat/cli.php (revision 7ebc78955c65af90e7ee0afbd07adc15271113ba)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\plugin\aichat\Chunk;
5use dokuwiki\Search\Indexer;
6use splitbrain\phpcli\Colors;
7use splitbrain\phpcli\Options;
8use splitbrain\phpcli\TableFormatter;
9
10/**
11 * DokuWiki Plugin aichat (CLI Component)
12 *
13 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
14 * @author  Andreas Gohr <gohr@cosmocode.de>
15 */
16class cli_plugin_aichat extends CLIPlugin
17{
18    /** @var helper_plugin_aichat */
19    protected $helper;
20
21    public function __construct($autocatch = true)
22    {
23        parent::__construct($autocatch);
24        $this->helper = plugin_load('helper', 'aichat');
25        $this->helper->setLogger($this);
26    }
27
28    /** @inheritDoc */
29    protected function setup(Options $options)
30    {
31        $options->useCompactHelp();
32
33        $options->setHelp(
34            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
35            'This may incur costs.'
36        );
37
38        $options->registerCommand(
39            'embed',
40            'Create embeddings for all pages. This skips pages that already have embeddings'
41        );
42        $options->registerOption(
43            'clear',
44            'Clear all existing embeddings before creating new ones',
45            'c',
46            false,
47            'embed'
48        );
49
50        $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.');
51
52        $options->registerCommand('similar', 'Search for similar pages');
53        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
54
55        $options->registerCommand('ask', 'Ask a question');
56        $options->registerArgument('question', 'The question to ask', true, 'ask');
57
58        $options->registerCommand('chat', 'Start an interactive chat session');
59
60        $options->registerCommand('info', 'Get Info about the vector storage');
61
62        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
63        $options->registerArgument('page', 'The page to split', true, 'split');
64
65        $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)');
66        $options->registerArgument('page', 'The page to check', true, 'page');
67
68        $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' .
69            ' Not supported on all storages.');
70        $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv');
71        $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv');
72    }
73
74    /** @inheritDoc */
75    protected function main(Options $options)
76    {
77        ini_set('memory_limit', -1);
78        switch ($options->getCmd()) {
79            case 'embed':
80                $this->createEmbeddings($options->getOpt('clear'));
81                break;
82            case 'maintenance':
83                $this->runMaintenance();
84                break;
85            case 'similar':
86                $this->similar($options->getArgs()[0]);
87                break;
88            case 'ask':
89                $this->ask($options->getArgs()[0]);
90                break;
91            case 'chat':
92                $this->chat();
93                break;
94            case 'split':
95                $this->split($options->getArgs()[0]);
96                break;
97            case 'page':
98                $this->page($options->getArgs()[0]);
99                break;
100            case 'info':
101                $this->showinfo();
102                break;
103            case 'tsv':
104                $args = $options->getArgs();
105                $vector = $args[0] ?? 'vector.tsv';
106                $meta = $args[1] ?? 'meta.tsv';
107                $this->tsv($vector, $meta);
108                break;
109            default:
110                echo $options->help();
111        }
112    }
113
114    /**
115     * @return void
116     */
117    protected function showinfo()
118    {
119        $stats = [
120            'model' => $this->getConf('model'),
121        ];
122        $stats = array_merge($stats, $this->helper->getStorage()->statistics());
123        $this->printTable($stats);
124    }
125
126    /**
127     * Print key value data as tabular data
128     *
129     * @param array $data
130     * @param int $level
131     * @return void
132     */
133    protected function printTable($data, $level = 0)
134    {
135        $tf = new TableFormatter($this->colors);
136        foreach ($data as $key => $value) {
137            if (is_array($value)) {
138                echo $tf->format(
139                    [$level * 2, 15, '*'],
140                    ['', $key, ''],
141                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
142                );
143                $this->printTable($value, $level + 1);
144            } else {
145                echo $tf->format(
146                    [$level * 2, 15, '*'],
147                    ['', $key, $value],
148                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY]
149                );
150            }
151        }
152    }
153
154    /**
155     * Check chunk availability for a given page
156     *
157     * @param string $page
158     * @return void
159     */
160    protected function page($page)
161    {
162        $indexer = new Indexer();
163        $pages = $indexer->getPages();
164        $pos = array_search(cleanID($page), $pages);
165
166        if ($pos === false) {
167            $this->error('Page not found');
168            return;
169        }
170
171        $storage = $this->helper->getStorage();
172        $chunks = $storage->getPageChunks($page, $pos * 100);
173        if ($chunks) {
174            $this->success('Found ' . count($chunks) . ' chunks');
175        } else {
176            $this->error('No chunks found');
177        }
178    }
179
180    /**
181     * Split the given page into chunks and print them
182     *
183     * @param string $page
184     * @return void
185     * @throws Exception
186     */
187    protected function split($page)
188    {
189        $text = rawWiki($page);
190        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
191        foreach ($chunks as $chunk) {
192            echo $chunk;
193            echo "\n";
194            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
195        }
196        $this->success('Split into ' . count($chunks) . ' chunks');
197    }
198
199    /**
200     * Interactive Chat Session
201     *
202     * @return void
203     * @throws Exception
204     */
205    protected function chat()
206    {
207        $history = [];
208        while ($q = $this->readLine('Your Question')) {
209            $this->helper->getModel()->resetUsageStats();
210            $result = $this->helper->askChatQuestion($q, $history);
211            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
212            $history[] = [$result['question'], $result['answer']];
213            $this->printAnswer($result);
214        }
215    }
216
217    /**
218     * Handle a single, standalone question
219     *
220     * @param string $query
221     * @return void
222     * @throws Exception
223     */
224    protected function ask($query)
225    {
226        $result = $this->helper->askQuestion($query);
227        $this->printAnswer($result);
228    }
229
230    /**
231     * Get the pages that are similar to the query
232     *
233     * @param string $query
234     * @return void
235     */
236    protected function similar($query)
237    {
238        $langlimit = $this->helper->getLanguageLimit();
239        if ($langlimit) {
240            $this->info('Limiting results to {lang}', ['lang' => $langlimit]);
241        }
242
243        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit);
244        $this->printSources($sources);
245    }
246
247    /**
248     * Run the maintenance tasks
249     *
250     * @return void
251     */
252    protected function runMaintenance()
253    {
254        $start = time();
255        $this->helper->getStorage()->runMaintenance();
256        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
257        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
258    }
259
260    /**
261     * Recreate chunks and embeddings for all pages
262     *
263     * @return void
264     * @todo make skip regex configurable
265     */
266    protected function createEmbeddings($clear)
267    {
268        $start = time();
269        $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear);
270        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
271        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
272    }
273
274    /**
275     * Dump TSV files for debugging
276     *
277     * @return void
278     */
279    protected function tsv($vector, $meta)
280    {
281
282        $storage = $this->helper->getStorage();
283        $storage->dumpTSV($vector, $meta);
284        $this->success('written to ' . $vector . ' and ' . $meta);
285    }
286
287    /**
288     * Print the given detailed answer in a nice way
289     *
290     * @param array $answer
291     * @return void
292     */
293    protected function printAnswer($answer)
294    {
295        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
296        echo "\n";
297        $this->printSources($answer['sources']);
298        echo "\n";
299        $this->printUsage();
300    }
301
302    /**
303     * Print the given sources
304     *
305     * @param Chunk[] $sources
306     * @return void
307     */
308    protected function printSources($sources)
309    {
310        foreach ($sources as $source) {
311            /** @var Chunk $source */
312            $this->colors->ptln(
313                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
314                Colors::C_LIGHTBLUE
315            );
316        }
317    }
318
319    /**
320     * Print the usage statistics for OpenAI
321     *
322     * @return void
323     */
324    protected function printUsage()
325    {
326        $this->info(
327            'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.',
328            $this->helper->getModel()->getUsageStats()
329        );
330    }
331
332    /**
333     * Interactively ask for a value from the user
334     *
335     * @param string $prompt
336     * @return string
337     */
338    protected function readLine($prompt)
339    {
340        $value = '';
341
342        while ($value === '') {
343            echo $prompt;
344            echo ': ';
345
346            $fh = fopen('php://stdin', 'r');
347            $value = trim(fgets($fh));
348            fclose($fh);
349        }
350
351        return $value;
352    }
353}
354