xref: /plugin/aichat/cli.php (revision 3379af09b7ec10f96a8d4f23b1563bd7f9ae79ac)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\plugin\aichat\Chunk;
5use dokuwiki\Search\Indexer;
6use splitbrain\phpcli\Colors;
7use splitbrain\phpcli\Options;
8use splitbrain\phpcli\TableFormatter;
9
10
11/**
12 * DokuWiki Plugin aichat (CLI Component)
13 *
14 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
15 * @author  Andreas Gohr <gohr@cosmocode.de>
16 */
17class cli_plugin_aichat extends CLIPlugin
18{
19    /** @var helper_plugin_aichat */
20    protected $helper;
21
22    public function __construct($autocatch = true)
23    {
24        parent::__construct($autocatch);
25        $this->helper = plugin_load('helper', 'aichat');
26        $this->helper->setLogger($this);
27    }
28
29    /** @inheritDoc */
30    protected function setup(Options $options)
31    {
32        $options->useCompactHelp();
33
34        $options->setHelp(
35            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
36            'This may incur costs.'
37        );
38
39        $options->registerCommand(
40            'embed',
41            'Create embeddings for all pages. This skips pages that already have embeddings'
42        );
43        $options->registerOption(
44            'clear',
45            'Clear all existing embeddings before creating new ones',
46            'c', false, 'embed'
47        );
48
49        $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.');
50
51        $options->registerCommand('similar', 'Search for similar pages');
52        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
53
54        $options->registerCommand('ask', 'Ask a question');
55        $options->registerArgument('question', 'The question to ask', true, 'ask');
56
57        $options->registerCommand('chat', 'Start an interactive chat session');
58
59        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
60        $options->registerArgument('page', 'The page to split', true, 'split');
61
62        $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)');
63        $options->registerArgument('page', 'The page to check', true, 'page');
64
65        $options->registerCommand('info', 'Get Info about the vector storage');
66    }
67
68    /** @inheritDoc */
69    protected function main(Options $options)
70    {
71        ini_set('memory_limit', -1);
72        switch ($options->getCmd()) {
73
74            case 'embed':
75                $this->createEmbeddings($options->getOpt('clear'));
76                break;
77            case 'maintenance':
78                $this->runMaintenance();
79                break;
80            case 'similar':
81                $this->similar($options->getArgs()[0]);
82                break;
83            case 'ask':
84                $this->ask($options->getArgs()[0]);
85                break;
86            case 'chat':
87                $this->chat();
88                break;
89            case 'split':
90                $this->split($options->getArgs()[0]);
91                break;
92            case 'page':
93                $this->page($options->getArgs()[0]);
94                break;
95            case 'info':
96                $this->showinfo();
97                break;
98            default:
99                echo $options->help();
100        }
101    }
102
103    /**
104     * @return void
105     */
106    protected function showinfo()
107    {
108
109        $stats = [
110            'model' => $this->getConf('model'),
111        ];
112        $stats = array_merge($stats, $this->helper->getStorage()->statistics());
113        $this->printTable($stats);
114    }
115
116    /**
117     * Print key value data as tabular data
118     *
119     * @param array $data
120     * @param int $level
121     * @return void
122     */
123    protected function printTable($data, $level = 0)
124    {
125        $tf = new TableFormatter($this->colors);
126        foreach ($data as $key => $value) {
127            if (is_array($value)) {
128                echo $tf->format(
129                    [$level * 2, 15, '*'],
130                    ['', $key, ''],
131                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
132                );
133                $this->printTable($value, $level + 1);
134            } else {
135                echo $tf->format(
136                    [$level * 2, 15, '*'],
137                    ['', $key, $value],
138                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY]
139                );
140            }
141        }
142    }
143
144    /**
145     * Check chunk availability for a given page
146     *
147     * @param string $page
148     * @return void
149     */
150    protected function page($page)
151    {
152        $indexer = new Indexer();
153        $pages = $indexer->getPages();
154        $pos = array_search(cleanID($page), $pages);
155
156        if ($pos === false) {
157            $this->error('Page not found');
158            return;
159        }
160
161        $storage = $this->helper->getStorage();
162        $chunks = $storage->getPageChunks($page, $pos * 100);
163        if ($chunks) {
164            $this->success('Found ' . count($chunks) . ' chunks');
165        } else {
166            $this->error('No chunks found');
167        }
168    }
169
170    /**
171     * Split the given page into chunks and print them
172     *
173     * @param string $page
174     * @return void
175     * @throws Exception
176     */
177    protected function split($page)
178    {
179        $text = rawWiki($page);
180        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
181        foreach ($chunks as $chunk) {
182            echo $chunk;
183            echo "\n";
184            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
185        }
186        $this->success('Split into ' . count($chunks) . ' chunks');
187    }
188
189    /**
190     * Interactive Chat Session
191     *
192     * @return void
193     * @throws Exception
194     */
195    protected function chat()
196    {
197        $history = [];
198        while ($q = $this->readLine('Your Question')) {
199            $this->helper->getModel()->resetUsageStats();
200            $result = $this->helper->askChatQuestion($q, $history);
201            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
202            $history[] = [$result['question'], $result['answer']];
203            $this->printAnswer($result);
204        }
205    }
206
207    /**
208     * Handle a single, standalone question
209     *
210     * @param string $query
211     * @return void
212     * @throws Exception
213     */
214    protected function ask($query)
215    {
216        $result = $this->helper->askQuestion($query);
217        $this->printAnswer($result);
218    }
219
220    /**
221     * Get the pages that are similar to the query
222     *
223     * @param string $query
224     * @return void
225     */
226    protected function similar($query)
227    {
228        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query);
229        $this->printSources($sources);
230    }
231
232    /**
233     * Run the maintenance tasks
234     *
235     * @return void
236     */
237    protected function runMaintenance()
238    {
239        $start = time();
240        $this->helper->getStorage()->runMaintenance();
241        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
242        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
243    }
244
245    /**
246     * Recreate chunks and embeddings for all pages
247     *
248     * @return void
249     * @todo make skip regex configurable
250     */
251    protected function createEmbeddings($clear)
252    {
253        $start = time();
254        $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear);
255        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
256        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
257    }
258
259    /**
260     * Print the given detailed answer in a nice way
261     *
262     * @param array $answer
263     * @return void
264     */
265    protected function printAnswer($answer)
266    {
267        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
268        echo "\n";
269        $this->printSources($answer['sources']);
270        echo "\n";
271        $this->printUsage();
272    }
273
274    /**
275     * Print the given sources
276     *
277     * @param Chunk[] $sources
278     * @return void
279     */
280    protected function printSources($sources)
281    {
282        foreach ($sources as $source) {
283            /** @var Chunk $source */
284            $this->colors->ptln(
285                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
286                Colors::C_LIGHTBLUE
287            );
288        }
289    }
290
291    /**
292     * Print the usage statistics for OpenAI
293     *
294     * @return void
295     */
296    protected function printUsage()
297    {
298        $this->info(
299            'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.',
300            $this->helper->getModel()->getUsageStats()
301        );
302    }
303
304    /**
305     * Interactively ask for a value from the user
306     *
307     * @param string $prompt
308     * @return string
309     */
310    protected function readLine($prompt)
311    {
312        $value = '';
313
314        while ($value === '') {
315            echo $prompt;
316            echo ': ';
317
318            $fh = fopen('php://stdin', 'r');
319            $value = trim(fgets($fh));
320            fclose($fh);
321        }
322
323        return $value;
324    }
325}
326
327