xref: /plugin/aichat/cli.php (revision 5284515d807f1a81ab1cc7dbdd445bd2cfbb2a16)
1<?php
2
3use dokuwiki\plugin\aichat\backend\Chunk;
4use splitbrain\phpcli\Colors;
5use splitbrain\phpcli\Options;
6
7
8/**
9 * DokuWiki Plugin aichat (CLI Component)
10 *
11 * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
12 * @author  Andreas Gohr <gohr@cosmocode.de>
13 */
14class cli_plugin_aichat extends \dokuwiki\Extension\CLIPlugin
15{
16    /** @var helper_plugin_aichat */
17    protected $helper;
18
19    public function __construct($autocatch = true)
20    {
21        parent::__construct($autocatch);
22        $this->helper = plugin_load('helper', 'aichat');
23        $this->helper->getEmbeddings()->setLogger($this);
24    }
25
26
27    /** @inheritDoc */
28    protected function setup(Options $options)
29    {
30        $options->useCompactHelp();
31
32        $options->setHelp(
33            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
34            'This may incur costs.'
35        );
36
37        $options->registerCommand(
38            'embed',
39            'Create embeddings for all pages. This skips pages that already have embeddings'
40        );
41        $options->registerOption(
42            'clear',
43            'Clear all existing embeddings before creating new ones',
44            'c', false, 'embed'
45        );
46
47        $options->registerCommand('similar', 'Search for similar pages');
48        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
49
50        $options->registerCommand('ask', 'Ask a question');
51        $options->registerArgument('question', 'The question to ask', true, 'ask');
52
53        $options->registerCommand('chat', 'Start an interactive chat session');
54
55        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
56        $options->registerArgument('page', 'The page to split', true, 'split');
57
58        $options->registerCommand('info', 'Get Info about the vector storage');
59    }
60
61    /** @inheritDoc */
62    protected function main(Options $options)
63    {
64        switch ($options->getCmd()) {
65
66            case 'embed':
67                $this->createEmbeddings($options->getOpt('clear'));
68                break;
69            case 'similar':
70                $this->similar($options->getArgs()[0]);
71                break;
72            case 'ask':
73                $this->ask($options->getArgs()[0]);
74                break;
75            case 'chat':
76                $this->chat();
77                break;
78            case 'split':
79                $this->split($options->getArgs()[0]);
80                break;
81            case 'info':
82                $this->treeinfo();
83                break;
84            default:
85                echo $options->help();
86        }
87    }
88
89    /**
90     * @return void
91     */
92    protected function treeinfo()
93    {
94        $stats = $this->helper->getEmbeddings()->getStorage()->statistics();
95        foreach ($stats as $key => $value) {
96            echo $key . ': ' . $value . "\n";
97        }
98    }
99
100    /**
101     * Split the given page into chunks and print them
102     *
103     * @param string $page
104     * @return void
105     * @throws Exception
106     */
107    protected function split($page)
108    {
109        $text = rawWiki($page);
110        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
111        foreach ($chunks as $chunk) {
112            echo $chunk;
113            echo "\n";
114            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
115        }
116        $this->success('Split into ' . count($chunks) . ' chunks');
117    }
118
119    /**
120     * Interactive Chat Session
121     *
122     * @return void
123     * @throws Exception
124     */
125    protected function chat()
126    {
127        $history = [];
128        while ($q = $this->readLine('Your Question')) {
129            if ($history) {
130                $question = $this->helper->rephraseChatQuestion($q, $history);
131                $this->colors->ptln("Interpretation: $question", Colors::C_LIGHTPURPLE);
132            } else {
133                $question = $q;
134            }
135            $result = $this->helper->askQuestion($question);
136            $history[] = [$q, $result['answer']];
137            $this->printAnswer($result);
138        }
139    }
140
141    /**
142     * Print the given detailed answer in a nice way
143     *
144     * @param array $answer
145     * @return void
146     */
147    protected function printAnswer($answer)
148    {
149        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
150        echo "\n";
151        foreach ($answer['sources'] as $source) {
152            /** @var Chunk $source */
153            $this->colors->ptln("\t" . $source->getPage(), Colors::C_LIGHTBLUE);
154        }
155        echo "\n";
156    }
157
158    /**
159     * Handle a single, standalone question
160     *
161     * @param string $query
162     * @return void
163     * @throws Exception
164     */
165    protected function ask($query)
166    {
167        $result = $this->helper->askQuestion($query);
168        $this->printAnswer($result);
169    }
170
171    /**
172     * Get the pages that are similar to the query
173     *
174     * @param string $query
175     * @return void
176     */
177    protected function similar($query)
178    {
179        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query);
180        foreach ($sources as $source) {
181            $this->colors->ptln($source->getPage(), Colors::C_LIGHTBLUE);
182        }
183    }
184
185    /**
186     * Recreate chunks and embeddings for all pages
187     *
188     * @return void
189     * @todo make skip regex configurable
190     */
191    protected function createEmbeddings($clear)
192    {
193        ini_set('memory_limit', -1); // we may need a lot of memory here
194        $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear);
195        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
196    }
197
198    /**
199     * Interactively ask for a value from the user
200     *
201     * @param string $prompt
202     * @return string
203     */
204    protected function readLine($prompt)
205    {
206        $value = '';
207
208        while ($value === '') {
209            echo $prompt;
210            echo ': ';
211
212            $fh = fopen('php://stdin', 'r');
213            $value = trim(fgets($fh));
214            fclose($fh);
215        }
216
217        return $value;
218    }
219}
220
221