xref: /plugin/aichat/cli.php (revision 01f06932bbd74c60ea6c93ab68b0d6cf32d05aea)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use dokuwiki\plugin\aichat\Chunk;
5use dokuwiki\Search\Indexer;
6use splitbrain\phpcli\Colors;
7use splitbrain\phpcli\Options;
8
9
10/**
11 * DokuWiki Plugin aichat (CLI Component)
12 *
13 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
14 * @author  Andreas Gohr <gohr@cosmocode.de>
15 */
16class cli_plugin_aichat extends CLIPlugin
17{
18    /** @var helper_plugin_aichat */
19    protected $helper;
20
21    public function __construct($autocatch = true)
22    {
23        parent::__construct($autocatch);
24        $this->helper = plugin_load('helper', 'aichat');
25        $this->helper->getEmbeddings()->setLogger($this);
26    }
27
28    /** @inheritDoc */
29    protected function setup(Options $options)
30    {
31        $options->useCompactHelp();
32
33        $options->setHelp(
34            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
35            'This may incur costs.'
36        );
37
38        $options->registerCommand(
39            'embed',
40            'Create embeddings for all pages. This skips pages that already have embeddings'
41        );
42        $options->registerOption(
43            'clear',
44            'Clear all existing embeddings before creating new ones',
45            'c', false, 'embed'
46        );
47
48        $options->registerCommand('similar', 'Search for similar pages');
49        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
50
51        $options->registerCommand('ask', 'Ask a question');
52        $options->registerArgument('question', 'The question to ask', true, 'ask');
53
54        $options->registerCommand('chat', 'Start an interactive chat session');
55
56        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
57        $options->registerArgument('page', 'The page to split', true, 'split');
58
59        $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)');
60        $options->registerArgument('page', 'The page to check', true, 'page');
61
62        $options->registerCommand('info', 'Get Info about the vector storage');
63    }
64
65    /** @inheritDoc */
66    protected function main(Options $options)
67    {
68        switch ($options->getCmd()) {
69
70            case 'embed':
71                $this->createEmbeddings($options->getOpt('clear'));
72                break;
73            case 'similar':
74                $this->similar($options->getArgs()[0]);
75                break;
76            case 'ask':
77                $this->ask($options->getArgs()[0]);
78                break;
79            case 'chat':
80                $this->chat();
81                break;
82            case 'split':
83                $this->split($options->getArgs()[0]);
84                break;
85            case 'page':
86                $this->page($options->getArgs()[0]);
87                break;
88            case 'info':
89                $this->showinfo();
90                break;
91            default:
92                echo $options->help();
93        }
94    }
95
96    /**
97     * @return void
98     */
99    protected function showinfo()
100    {
101        echo 'model: ' . $this->getConf('model') . "\n";
102        $stats = $this->helper->getStorage()->statistics();
103        foreach ($stats as $key => $value) {
104            echo $key . ': ' . $value . "\n";
105        }
106
107        //echo $this->helper->getModel()->listUpstreamModels();
108    }
109
110    /**
111     * Check chunk availability for a given page
112     *
113     * @param string $page
114     * @return void
115     */
116    protected function page($page)
117    {
118        $indexer = new Indexer();
119        $pages = $indexer->getPages();
120        $pos = array_search(cleanID($page), $pages);
121
122        if ($pos === false) {
123            $this->error('Page not found');
124            return;
125        }
126
127        $storage = $this->helper->getStorage();
128        $chunks = $storage->getPageChunks($page, $pos * 100);
129        if ($chunks) {
130            $this->success('Found ' . count($chunks) . ' chunks');
131        } else {
132            $this->error('No chunks found');
133        }
134    }
135
136    /**
137     * Split the given page into chunks and print them
138     *
139     * @param string $page
140     * @return void
141     * @throws Exception
142     */
143    protected function split($page)
144    {
145        $text = rawWiki($page);
146        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
147        foreach ($chunks as $chunk) {
148            echo $chunk;
149            echo "\n";
150            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
151        }
152        $this->success('Split into ' . count($chunks) . ' chunks');
153    }
154
155    /**
156     * Interactive Chat Session
157     *
158     * @return void
159     * @throws Exception
160     */
161    protected function chat()
162    {
163        $history = [];
164        while ($q = $this->readLine('Your Question')) {
165            $this->helper->getModel()->resetUsageStats();
166            $result = $this->helper->askChatQuestion($q, $history);
167            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
168            $history[] = [$result['question'], $result['answer']];
169            $this->printAnswer($result);
170        }
171    }
172
173    /**
174     * Handle a single, standalone question
175     *
176     * @param string $query
177     * @return void
178     * @throws Exception
179     */
180    protected function ask($query)
181    {
182        $result = $this->helper->askQuestion($query);
183        $this->printAnswer($result);
184    }
185
186    /**
187     * Get the pages that are similar to the query
188     *
189     * @param string $query
190     * @return void
191     */
192    protected function similar($query)
193    {
194        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query);
195        $this->printSources($sources);
196    }
197
198    /**
199     * Recreate chunks and embeddings for all pages
200     *
201     * @return void
202     * @todo make skip regex configurable
203     */
204    protected function createEmbeddings($clear)
205    {
206        ini_set('memory_limit', -1); // we may need a lot of memory here
207        $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear);
208        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
209    }
210
211    /**
212     * Print the given detailed answer in a nice way
213     *
214     * @param array $answer
215     * @return void
216     */
217    protected function printAnswer($answer)
218    {
219        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
220        echo "\n";
221        $this->printSources($answer['sources']);
222        echo "\n";
223        $this->printUsage();
224    }
225
226    /**
227     * Print the given sources
228     *
229     * @param Chunk[] $sources
230     * @return void
231     */
232    protected function printSources($sources)
233    {
234        foreach ($sources as $source) {
235            /** @var Chunk $source */
236            $this->colors->ptln(
237                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
238                Colors::C_LIGHTBLUE
239            );
240        }
241    }
242
243    /**
244     * Print the usage statistics for OpenAI
245     *
246     * @return void
247     */
248    protected function printUsage()
249    {
250        $this->info(
251            'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.',
252            $this->helper->getModel()->getUsageStats()
253        );
254    }
255
256    /**
257     * Interactively ask for a value from the user
258     *
259     * @param string $prompt
260     * @return string
261     */
262    protected function readLine($prompt)
263    {
264        $value = '';
265
266        while ($value === '') {
267            echo $prompt;
268            echo ': ';
269
270            $fh = fopen('php://stdin', 'r');
271            $value = trim(fgets($fh));
272            fclose($fh);
273        }
274
275        return $value;
276    }
277}
278
279