xref: /plugin/aichat/cli.php (revision e33a1d7adcbf36c57f516e2f829ec8ad59cdb47b)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\plugin\aichat\Chunk;
5use dokuwiki\Search\Indexer;
6use splitbrain\phpcli\Colors;
7use splitbrain\phpcli\Options;
8use splitbrain\phpcli\TableFormatter;
9
10
11/**
12 * DokuWiki Plugin aichat (CLI Component)
13 *
14 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
15 * @author  Andreas Gohr <gohr@cosmocode.de>
16 */
17class cli_plugin_aichat extends CLIPlugin
18{
19    /** @var helper_plugin_aichat */
20    protected $helper;
21
22    public function __construct($autocatch = true)
23    {
24        parent::__construct($autocatch);
25        $this->helper = plugin_load('helper', 'aichat');
26        $this->helper->setLogger($this);
27    }
28
29    /** @inheritDoc */
30    protected function setup(Options $options)
31    {
32        $options->useCompactHelp();
33
34        $options->setHelp(
35            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
36            'This may incur costs.'
37        );
38
39        $options->registerCommand(
40            'embed',
41            'Create embeddings for all pages. This skips pages that already have embeddings'
42        );
43        $options->registerOption(
44            'clear',
45            'Clear all existing embeddings before creating new ones',
46            'c', false, 'embed'
47        );
48
49        $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.');
50
51        $options->registerCommand('similar', 'Search for similar pages');
52        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
53
54        $options->registerCommand('ask', 'Ask a question');
55        $options->registerArgument('question', 'The question to ask', true, 'ask');
56
57        $options->registerCommand('chat', 'Start an interactive chat session');
58
59        $options->registerCommand('info', 'Get Info about the vector storage');
60
61        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
62        $options->registerArgument('page', 'The page to split', true, 'split');
63
64        $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)');
65        $options->registerArgument('page', 'The page to check', true, 'page');
66
67        $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' .
68            ' Not supported on all storages.');
69        $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv');
70        $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv');
71
72    }
73
74    /** @inheritDoc */
75    protected function main(Options $options)
76    {
77        ini_set('memory_limit', -1);
78        switch ($options->getCmd()) {
79
80            case 'embed':
81                $this->createEmbeddings($options->getOpt('clear'));
82                break;
83            case 'maintenance':
84                $this->runMaintenance();
85                break;
86            case 'similar':
87                $this->similar($options->getArgs()[0]);
88                break;
89            case 'ask':
90                $this->ask($options->getArgs()[0]);
91                break;
92            case 'chat':
93                $this->chat();
94                break;
95            case 'split':
96                $this->split($options->getArgs()[0]);
97                break;
98            case 'page':
99                $this->page($options->getArgs()[0]);
100                break;
101            case 'info':
102                $this->showinfo();
103                break;
104            case 'tsv':
105                $args = $options->getArgs();
106                $vector = $args[0] ?? 'vector.tsv';
107                $meta = $args[1] ?? 'meta.tsv';
108                $this->tsv($vector, $meta);
109                break;
110            default:
111                echo $options->help();
112        }
113    }
114
115    /**
116     * @return void
117     */
118    protected function showinfo()
119    {
120        $stats = [
121            'model' => $this->getConf('model'),
122        ];
123        $stats = array_merge($stats, $this->helper->getStorage()->statistics());
124        $this->printTable($stats);
125    }
126
127    /**
128     * Print key value data as tabular data
129     *
130     * @param array $data
131     * @param int $level
132     * @return void
133     */
134    protected function printTable($data, $level = 0)
135    {
136        $tf = new TableFormatter($this->colors);
137        foreach ($data as $key => $value) {
138            if (is_array($value)) {
139                echo $tf->format(
140                    [$level * 2, 15, '*'],
141                    ['', $key, ''],
142                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
143                );
144                $this->printTable($value, $level + 1);
145            } else {
146                echo $tf->format(
147                    [$level * 2, 15, '*'],
148                    ['', $key, $value],
149                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY]
150                );
151            }
152        }
153    }
154
155    /**
156     * Check chunk availability for a given page
157     *
158     * @param string $page
159     * @return void
160     */
161    protected function page($page)
162    {
163        $indexer = new Indexer();
164        $pages = $indexer->getPages();
165        $pos = array_search(cleanID($page), $pages);
166
167        if ($pos === false) {
168            $this->error('Page not found');
169            return;
170        }
171
172        $storage = $this->helper->getStorage();
173        $chunks = $storage->getPageChunks($page, $pos * 100);
174        if ($chunks) {
175            $this->success('Found ' . count($chunks) . ' chunks');
176        } else {
177            $this->error('No chunks found');
178        }
179    }
180
181    /**
182     * Split the given page into chunks and print them
183     *
184     * @param string $page
185     * @return void
186     * @throws Exception
187     */
188    protected function split($page)
189    {
190        $text = rawWiki($page);
191        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
192        foreach ($chunks as $chunk) {
193            echo $chunk;
194            echo "\n";
195            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
196        }
197        $this->success('Split into ' . count($chunks) . ' chunks');
198    }
199
200    /**
201     * Interactive Chat Session
202     *
203     * @return void
204     * @throws Exception
205     */
206    protected function chat()
207    {
208        $history = [];
209        while ($q = $this->readLine('Your Question')) {
210            $this->helper->getModel()->resetUsageStats();
211            $result = $this->helper->askChatQuestion($q, $history);
212            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
213            $history[] = [$result['question'], $result['answer']];
214            $this->printAnswer($result);
215        }
216    }
217
218    /**
219     * Handle a single, standalone question
220     *
221     * @param string $query
222     * @return void
223     * @throws Exception
224     */
225    protected function ask($query)
226    {
227        $result = $this->helper->askQuestion($query);
228        $this->printAnswer($result);
229    }
230
231    /**
232     * Get the pages that are similar to the query
233     *
234     * @param string $query
235     * @return void
236     */
237    protected function similar($query)
238    {
239        $langlimit = $this->helper->getLanguageLimit();
240        if ($langlimit) {
241            $this->info('Limiting results to {lang}', ['lang' => $langlimit]);
242        }
243
244        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit);
245        $this->printSources($sources);
246    }
247
248    /**
249     * Run the maintenance tasks
250     *
251     * @return void
252     */
253    protected function runMaintenance()
254    {
255        $start = time();
256        $this->helper->getStorage()->runMaintenance();
257        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
258        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
259    }
260
261    /**
262     * Recreate chunks and embeddings for all pages
263     *
264     * @return void
265     * @todo make skip regex configurable
266     */
267    protected function createEmbeddings($clear)
268    {
269        $start = time();
270        $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear);
271        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
272        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
273    }
274
275    /**
276     * Dump TSV files for debugging
277     *
278     * @return void
279     */
280    protected function tsv($vector, $meta)
281    {
282
283        $storage = $this->helper->getStorage();
284        $storage->dumpTSV($vector, $meta);
285        $this->success('written to ' . $vector . ' and ' . $meta);
286    }
287
288    /**
289     * Print the given detailed answer in a nice way
290     *
291     * @param array $answer
292     * @return void
293     */
294    protected function printAnswer($answer)
295    {
296        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
297        echo "\n";
298        $this->printSources($answer['sources']);
299        echo "\n";
300        $this->printUsage();
301    }
302
303    /**
304     * Print the given sources
305     *
306     * @param Chunk[] $sources
307     * @return void
308     */
309    protected function printSources($sources)
310    {
311        foreach ($sources as $source) {
312            /** @var Chunk $source */
313            $this->colors->ptln(
314                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
315                Colors::C_LIGHTBLUE
316            );
317        }
318    }
319
320    /**
321     * Print the usage statistics for OpenAI
322     *
323     * @return void
324     */
325    protected function printUsage()
326    {
327        $this->info(
328            'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.',
329            $this->helper->getModel()->getUsageStats()
330        );
331    }
332
333    /**
334     * Interactively ask for a value from the user
335     *
336     * @param string $prompt
337     * @return string
338     */
339    protected function readLine($prompt)
340    {
341        $value = '';
342
343        while ($value === '') {
344            echo $prompt;
345            echo ': ';
346
347            $fh = fopen('php://stdin', 'r');
348            $value = trim(fgets($fh));
349            fclose($fh);
350        }
351
352        return $value;
353    }
354}
355