xref: /plugin/aichat/cli.php (revision 8285fff93bda4602771c807cbe9218d1f3b88d32)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\plugin\aichat\Chunk;
5use dokuwiki\Search\Indexer;
6use splitbrain\phpcli\Colors;
7use splitbrain\phpcli\Options;
8use splitbrain\phpcli\TableFormatter;
9
10
11/**
12 * DokuWiki Plugin aichat (CLI Component)
13 *
14 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
15 * @author  Andreas Gohr <gohr@cosmocode.de>
16 */
17class cli_plugin_aichat extends CLIPlugin
18{
19    /** @var helper_plugin_aichat */
20    protected $helper;
21
22    public function __construct($autocatch = true)
23    {
24        parent::__construct($autocatch);
25        $this->helper = plugin_load('helper', 'aichat');
26        $this->helper->setLogger($this);
27    }
28
29    /** @inheritDoc */
30    protected function setup(Options $options)
31    {
32        $options->useCompactHelp();
33
34        $options->setHelp(
35            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
36            'This may incur costs.'
37        );
38
39        $options->registerCommand(
40            'embed',
41            'Create embeddings for all pages. This skips pages that already have embeddings'
42        );
43        $options->registerOption(
44            'clear',
45            'Clear all existing embeddings before creating new ones',
46            'c', false, 'embed'
47        );
48
49        $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.');
50
51        $options->registerCommand('similar', 'Search for similar pages');
52        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
53
54        $options->registerCommand('ask', 'Ask a question');
55        $options->registerArgument('question', 'The question to ask', true, 'ask');
56
57        $options->registerCommand('chat', 'Start an interactive chat session');
58
59        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
60        $options->registerArgument('page', 'The page to split', true, 'split');
61
62        $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)');
63        $options->registerArgument('page', 'The page to check', true, 'page');
64
65        $options->registerCommand('info', 'Get Info about the vector storage');
66    }
67
68    /** @inheritDoc */
69    protected function main(Options $options)
70    {
71        ini_set('memory_limit', -1);
72        switch ($options->getCmd()) {
73
74            case 'embed':
75                $this->createEmbeddings($options->getOpt('clear'));
76                break;
77            case 'maintenance':
78                $this->runMaintenance();
79                break;
80            case 'similar':
81                $this->similar($options->getArgs()[0]);
82                break;
83            case 'ask':
84                $this->ask($options->getArgs()[0]);
85                break;
86            case 'chat':
87                $this->chat();
88                break;
89            case 'split':
90                $this->split($options->getArgs()[0]);
91                break;
92            case 'page':
93                $this->page($options->getArgs()[0]);
94                break;
95            case 'info':
96                $this->showinfo();
97                break;
98            default:
99                echo $options->help();
100        }
101    }
102
103    /**
104     * @return void
105     */
106    protected function showinfo()
107    {
108        $stats = [
109            'model' => $this->getConf('model'),
110        ];
111        $stats = array_merge($stats, $this->helper->getStorage()->statistics());
112        $this->printTable($stats);
113    }
114
115    /**
116     * Print key value data as tabular data
117     *
118     * @param array $data
119     * @param int $level
120     * @return void
121     */
122    protected function printTable($data, $level = 0)
123    {
124        $tf = new TableFormatter($this->colors);
125        foreach ($data as $key => $value) {
126            if (is_array($value)) {
127                echo $tf->format(
128                    [$level * 2, 15, '*'],
129                    ['', $key, ''],
130                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
131                );
132                $this->printTable($value, $level + 1);
133            } else {
134                echo $tf->format(
135                    [$level * 2, 15, '*'],
136                    ['', $key, $value],
137                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY]
138                );
139            }
140        }
141    }
142
143    /**
144     * Check chunk availability for a given page
145     *
146     * @param string $page
147     * @return void
148     */
149    protected function page($page)
150    {
151        $indexer = new Indexer();
152        $pages = $indexer->getPages();
153        $pos = array_search(cleanID($page), $pages);
154
155        if ($pos === false) {
156            $this->error('Page not found');
157            return;
158        }
159
160        $storage = $this->helper->getStorage();
161        $chunks = $storage->getPageChunks($page, $pos * 100);
162        if ($chunks) {
163            $this->success('Found ' . count($chunks) . ' chunks');
164        } else {
165            $this->error('No chunks found');
166        }
167    }
168
169    /**
170     * Split the given page into chunks and print them
171     *
172     * @param string $page
173     * @return void
174     * @throws Exception
175     */
176    protected function split($page)
177    {
178        $text = rawWiki($page);
179        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
180        foreach ($chunks as $chunk) {
181            echo $chunk;
182            echo "\n";
183            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
184        }
185        $this->success('Split into ' . count($chunks) . ' chunks');
186    }
187
188    /**
189     * Interactive Chat Session
190     *
191     * @return void
192     * @throws Exception
193     */
194    protected function chat()
195    {
196        $history = [];
197        while ($q = $this->readLine('Your Question')) {
198            $this->helper->getModel()->resetUsageStats();
199            $result = $this->helper->askChatQuestion($q, $history);
200            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
201            $history[] = [$result['question'], $result['answer']];
202            $this->printAnswer($result);
203        }
204    }
205
206    /**
207     * Handle a single, standalone question
208     *
209     * @param string $query
210     * @return void
211     * @throws Exception
212     */
213    protected function ask($query)
214    {
215        $result = $this->helper->askQuestion($query);
216        $this->printAnswer($result);
217    }
218
219    /**
220     * Get the pages that are similar to the query
221     *
222     * @param string $query
223     * @return void
224     */
225    protected function similar($query)
226    {
227        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query);
228        $this->printSources($sources);
229    }
230
231    /**
232     * Run the maintenance tasks
233     *
234     * @return void
235     */
236    protected function runMaintenance()
237    {
238        $start = time();
239        $this->helper->getStorage()->runMaintenance();
240        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
241        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
242    }
243
244    /**
245     * Recreate chunks and embeddings for all pages
246     *
247     * @return void
248     * @todo make skip regex configurable
249     */
250    protected function createEmbeddings($clear)
251    {
252        $start = time();
253        $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear);
254        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
255        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
256    }
257
258    /**
259     * Print the given detailed answer in a nice way
260     *
261     * @param array $answer
262     * @return void
263     */
264    protected function printAnswer($answer)
265    {
266        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
267        echo "\n";
268        $this->printSources($answer['sources']);
269        echo "\n";
270        $this->printUsage();
271    }
272
273    /**
274     * Print the given sources
275     *
276     * @param Chunk[] $sources
277     * @return void
278     */
279    protected function printSources($sources)
280    {
281        foreach ($sources as $source) {
282            /** @var Chunk $source */
283            $this->colors->ptln(
284                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
285                Colors::C_LIGHTBLUE
286            );
287        }
288    }
289
290    /**
291     * Print the usage statistics for OpenAI
292     *
293     * @return void
294     */
295    protected function printUsage()
296    {
297        $this->info(
298            'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.',
299            $this->helper->getModel()->getUsageStats()
300        );
301    }
302
303    /**
304     * Interactively ask for a value from the user
305     *
306     * @param string $prompt
307     * @return string
308     */
309    protected function readLine($prompt)
310    {
311        $value = '';
312
313        while ($value === '') {
314            echo $prompt;
315            echo ': ';
316
317            $fh = fopen('php://stdin', 'r');
318            $value = trim(fgets($fh));
319            fclose($fh);
320        }
321
322        return $value;
323    }
324}
325