xref: /plugin/aichat/cli.php (revision 3379af09b7ec10f96a8d4f23b1563bd7f9ae79ac)
18817535bSAndreas Gohr<?php
28817535bSAndreas Gohr
3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
4f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
501f06932SAndreas Gohruse dokuwiki\Search\Indexer;
6c4584168SAndreas Gohruse splitbrain\phpcli\Colors;
78817535bSAndreas Gohruse splitbrain\phpcli\Options;
8*3379af09SAndreas Gohruse splitbrain\phpcli\TableFormatter;
98817535bSAndreas Gohr
108817535bSAndreas Gohr
118817535bSAndreas Gohr/**
128817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component)
138817535bSAndreas Gohr *
148817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
158817535bSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
168817535bSAndreas Gohr */
17f6ef2e50SAndreas Gohrclass cli_plugin_aichat extends CLIPlugin
188817535bSAndreas Gohr{
190337f47fSAndreas Gohr    /** @var helper_plugin_aichat */
200337f47fSAndreas Gohr    protected $helper;
210337f47fSAndreas Gohr
220337f47fSAndreas Gohr    public function __construct($autocatch = true)
230337f47fSAndreas Gohr    {
240337f47fSAndreas Gohr        parent::__construct($autocatch);
250337f47fSAndreas Gohr        $this->helper = plugin_load('helper', 'aichat');
26*3379af09SAndreas Gohr        $this->helper->setLogger($this);
270337f47fSAndreas Gohr    }
280337f47fSAndreas Gohr
298817535bSAndreas Gohr    /** @inheritDoc */
308817535bSAndreas Gohr    protected function setup(Options $options)
318817535bSAndreas Gohr    {
32bddd899cSAndreas Gohr        $options->useCompactHelp();
33bddd899cSAndreas Gohr
345284515dSAndreas Gohr        $options->setHelp(
355284515dSAndreas Gohr            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
365284515dSAndreas Gohr            'This may incur costs.'
375284515dSAndreas Gohr        );
388817535bSAndreas Gohr
395284515dSAndreas Gohr        $options->registerCommand(
405284515dSAndreas Gohr            'embed',
415284515dSAndreas Gohr            'Create embeddings for all pages. This skips pages that already have embeddings'
425284515dSAndreas Gohr        );
435284515dSAndreas Gohr        $options->registerOption(
445284515dSAndreas Gohr            'clear',
455284515dSAndreas Gohr            'Clear all existing embeddings before creating new ones',
465284515dSAndreas Gohr            'c', false, 'embed'
475284515dSAndreas Gohr        );
488817535bSAndreas Gohr
49*3379af09SAndreas Gohr        $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.');
50*3379af09SAndreas Gohr
518817535bSAndreas Gohr        $options->registerCommand('similar', 'Search for similar pages');
528817535bSAndreas Gohr        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
538817535bSAndreas Gohr
548817535bSAndreas Gohr        $options->registerCommand('ask', 'Ask a question');
558817535bSAndreas Gohr        $options->registerArgument('question', 'The question to ask', true, 'ask');
56c4584168SAndreas Gohr
57c4584168SAndreas Gohr        $options->registerCommand('chat', 'Start an interactive chat session');
58ad38c5fdSAndreas Gohr
59ad38c5fdSAndreas Gohr        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
60ad38c5fdSAndreas Gohr        $options->registerArgument('page', 'The page to split', true, 'split');
615786be46SAndreas Gohr
6201f06932SAndreas Gohr        $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)');
6301f06932SAndreas Gohr        $options->registerArgument('page', 'The page to check', true, 'page');
6401f06932SAndreas Gohr
65bddd899cSAndreas Gohr        $options->registerCommand('info', 'Get Info about the vector storage');
668817535bSAndreas Gohr    }
678817535bSAndreas Gohr
688817535bSAndreas Gohr    /** @inheritDoc */
698817535bSAndreas Gohr    protected function main(Options $options)
708817535bSAndreas Gohr    {
71*3379af09SAndreas Gohr        ini_set('memory_limit', -1);
728817535bSAndreas Gohr        switch ($options->getCmd()) {
738817535bSAndreas Gohr
748817535bSAndreas Gohr            case 'embed':
755284515dSAndreas Gohr                $this->createEmbeddings($options->getOpt('clear'));
768817535bSAndreas Gohr                break;
77*3379af09SAndreas Gohr            case 'maintenance':
78*3379af09SAndreas Gohr                $this->runMaintenance();
79*3379af09SAndreas Gohr                break;
808817535bSAndreas Gohr            case 'similar':
818817535bSAndreas Gohr                $this->similar($options->getArgs()[0]);
828817535bSAndreas Gohr                break;
837552f1aaSAndreas Gohr            case 'ask':
847552f1aaSAndreas Gohr                $this->ask($options->getArgs()[0]);
857552f1aaSAndreas Gohr                break;
86c4584168SAndreas Gohr            case 'chat':
87c4584168SAndreas Gohr                $this->chat();
88c4584168SAndreas Gohr                break;
89ad38c5fdSAndreas Gohr            case 'split':
90ad38c5fdSAndreas Gohr                $this->split($options->getArgs()[0]);
91ad38c5fdSAndreas Gohr                break;
9201f06932SAndreas Gohr            case 'page':
9301f06932SAndreas Gohr                $this->page($options->getArgs()[0]);
9401f06932SAndreas Gohr                break;
955786be46SAndreas Gohr            case 'info':
96f6ef2e50SAndreas Gohr                $this->showinfo();
975786be46SAndreas Gohr                break;
988817535bSAndreas Gohr            default:
998817535bSAndreas Gohr                echo $options->help();
1008817535bSAndreas Gohr        }
1018817535bSAndreas Gohr    }
1028817535bSAndreas Gohr
103c4584168SAndreas Gohr    /**
1045786be46SAndreas Gohr     * @return void
1055786be46SAndreas Gohr     */
106f6ef2e50SAndreas Gohr    protected function showinfo()
1075786be46SAndreas Gohr    {
108*3379af09SAndreas Gohr
109*3379af09SAndreas Gohr        $stats = [
110*3379af09SAndreas Gohr            'model' => $this->getConf('model'),
111*3379af09SAndreas Gohr        ];
112*3379af09SAndreas Gohr        $stats = array_merge($stats, $this->helper->getStorage()->statistics());
113*3379af09SAndreas Gohr        $this->printTable($stats);
1147ee8b02dSAndreas Gohr    }
115911314cdSAndreas Gohr
116*3379af09SAndreas Gohr    /**
117*3379af09SAndreas Gohr     * Print key value data as tabular data
118*3379af09SAndreas Gohr     *
119*3379af09SAndreas Gohr     * @param array $data
120*3379af09SAndreas Gohr     * @param int $level
121*3379af09SAndreas Gohr     * @return void
122*3379af09SAndreas Gohr     */
123*3379af09SAndreas Gohr    protected function printTable($data, $level = 0)
124*3379af09SAndreas Gohr    {
125*3379af09SAndreas Gohr        $tf = new TableFormatter($this->colors);
126*3379af09SAndreas Gohr        foreach ($data as $key => $value) {
127*3379af09SAndreas Gohr            if (is_array($value)) {
128*3379af09SAndreas Gohr                echo $tf->format(
129*3379af09SAndreas Gohr                    [$level * 2, 15, '*'],
130*3379af09SAndreas Gohr                    ['', $key, ''],
131*3379af09SAndreas Gohr                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
132*3379af09SAndreas Gohr                );
133*3379af09SAndreas Gohr                $this->printTable($value, $level + 1);
134*3379af09SAndreas Gohr            } else {
135*3379af09SAndreas Gohr                echo $tf->format(
136*3379af09SAndreas Gohr                    [$level * 2, 15, '*'],
137*3379af09SAndreas Gohr                    ['', $key, $value],
138*3379af09SAndreas Gohr                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY]
139*3379af09SAndreas Gohr                );
140*3379af09SAndreas Gohr            }
141*3379af09SAndreas Gohr        }
1425786be46SAndreas Gohr    }
1435786be46SAndreas Gohr
1445786be46SAndreas Gohr    /**
14501f06932SAndreas Gohr     * Check chunk availability for a given page
14601f06932SAndreas Gohr     *
14701f06932SAndreas Gohr     * @param string $page
14801f06932SAndreas Gohr     * @return void
14901f06932SAndreas Gohr     */
15001f06932SAndreas Gohr    protected function page($page)
15101f06932SAndreas Gohr    {
15201f06932SAndreas Gohr        $indexer = new Indexer();
15301f06932SAndreas Gohr        $pages = $indexer->getPages();
15401f06932SAndreas Gohr        $pos = array_search(cleanID($page), $pages);
15501f06932SAndreas Gohr
15601f06932SAndreas Gohr        if ($pos === false) {
15701f06932SAndreas Gohr            $this->error('Page not found');
15801f06932SAndreas Gohr            return;
15901f06932SAndreas Gohr        }
16001f06932SAndreas Gohr
16101f06932SAndreas Gohr        $storage = $this->helper->getStorage();
16201f06932SAndreas Gohr        $chunks = $storage->getPageChunks($page, $pos * 100);
16301f06932SAndreas Gohr        if ($chunks) {
16401f06932SAndreas Gohr            $this->success('Found ' . count($chunks) . ' chunks');
16501f06932SAndreas Gohr        } else {
16601f06932SAndreas Gohr            $this->error('No chunks found');
16701f06932SAndreas Gohr        }
16801f06932SAndreas Gohr    }
16901f06932SAndreas Gohr
17001f06932SAndreas Gohr    /**
171ad38c5fdSAndreas Gohr     * Split the given page into chunks and print them
172ad38c5fdSAndreas Gohr     *
173ad38c5fdSAndreas Gohr     * @param string $page
174ad38c5fdSAndreas Gohr     * @return void
175ad38c5fdSAndreas Gohr     * @throws Exception
176ad38c5fdSAndreas Gohr     */
177ad38c5fdSAndreas Gohr    protected function split($page)
178ad38c5fdSAndreas Gohr    {
179ad38c5fdSAndreas Gohr        $text = rawWiki($page);
180ad38c5fdSAndreas Gohr        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
181ad38c5fdSAndreas Gohr        foreach ($chunks as $chunk) {
182ad38c5fdSAndreas Gohr            echo $chunk;
183ad38c5fdSAndreas Gohr            echo "\n";
184ad38c5fdSAndreas Gohr            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
185ad38c5fdSAndreas Gohr        }
186ad38c5fdSAndreas Gohr        $this->success('Split into ' . count($chunks) . ' chunks');
187ad38c5fdSAndreas Gohr    }
188ad38c5fdSAndreas Gohr
189ad38c5fdSAndreas Gohr    /**
190c4584168SAndreas Gohr     * Interactive Chat Session
191c4584168SAndreas Gohr     *
192c4584168SAndreas Gohr     * @return void
193c4584168SAndreas Gohr     * @throws Exception
194c4584168SAndreas Gohr     */
195c4584168SAndreas Gohr    protected function chat()
196c4584168SAndreas Gohr    {
197c4584168SAndreas Gohr        $history = [];
198c4584168SAndreas Gohr        while ($q = $this->readLine('Your Question')) {
199f6ef2e50SAndreas Gohr            $this->helper->getModel()->resetUsageStats();
200f6ef2e50SAndreas Gohr            $result = $this->helper->askChatQuestion($q, $history);
201f6ef2e50SAndreas Gohr            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
202f6ef2e50SAndreas Gohr            $history[] = [$result['question'], $result['answer']];
203c4584168SAndreas Gohr            $this->printAnswer($result);
204c4584168SAndreas Gohr        }
205c4584168SAndreas Gohr    }
206c4584168SAndreas Gohr
207c4584168SAndreas Gohr    /**
208c4584168SAndreas Gohr     * Handle a single, standalone question
209c4584168SAndreas Gohr     *
210c4584168SAndreas Gohr     * @param string $query
211c4584168SAndreas Gohr     * @return void
212c4584168SAndreas Gohr     * @throws Exception
213c4584168SAndreas Gohr     */
214c4584168SAndreas Gohr    protected function ask($query)
215c4584168SAndreas Gohr    {
2160337f47fSAndreas Gohr        $result = $this->helper->askQuestion($query);
217c4584168SAndreas Gohr        $this->printAnswer($result);
2187552f1aaSAndreas Gohr    }
2197552f1aaSAndreas Gohr
220c4584168SAndreas Gohr    /**
221c4584168SAndreas Gohr     * Get the pages that are similar to the query
222c4584168SAndreas Gohr     *
223c4584168SAndreas Gohr     * @param string $query
224c4584168SAndreas Gohr     * @return void
225c4584168SAndreas Gohr     */
2268817535bSAndreas Gohr    protected function similar($query)
2278817535bSAndreas Gohr    {
2280337f47fSAndreas Gohr        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query);
229f6ef2e50SAndreas Gohr        $this->printSources($sources);
2308817535bSAndreas Gohr    }
2318817535bSAndreas Gohr
232c4584168SAndreas Gohr    /**
233*3379af09SAndreas Gohr     * Run the maintenance tasks
234*3379af09SAndreas Gohr     *
235*3379af09SAndreas Gohr     * @return void
236*3379af09SAndreas Gohr     */
237*3379af09SAndreas Gohr    protected function runMaintenance()
238*3379af09SAndreas Gohr    {
239*3379af09SAndreas Gohr        $start = time();
240*3379af09SAndreas Gohr        $this->helper->getStorage()->runMaintenance();
241*3379af09SAndreas Gohr        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
242*3379af09SAndreas Gohr        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
243*3379af09SAndreas Gohr    }
244*3379af09SAndreas Gohr
245*3379af09SAndreas Gohr    /**
246c4584168SAndreas Gohr     * Recreate chunks and embeddings for all pages
247c4584168SAndreas Gohr     *
248c4584168SAndreas Gohr     * @return void
249ad38c5fdSAndreas Gohr     * @todo make skip regex configurable
250c4584168SAndreas Gohr     */
2515284515dSAndreas Gohr    protected function createEmbeddings($clear)
2528817535bSAndreas Gohr    {
253*3379af09SAndreas Gohr        $start = time();
2545284515dSAndreas Gohr        $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear);
255ad38c5fdSAndreas Gohr        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
256*3379af09SAndreas Gohr        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
2578817535bSAndreas Gohr    }
2588817535bSAndreas Gohr
259c4584168SAndreas Gohr    /**
26055392016SAndreas Gohr     * Print the given detailed answer in a nice way
26155392016SAndreas Gohr     *
26255392016SAndreas Gohr     * @param array $answer
26355392016SAndreas Gohr     * @return void
26455392016SAndreas Gohr     */
26555392016SAndreas Gohr    protected function printAnswer($answer)
26655392016SAndreas Gohr    {
26755392016SAndreas Gohr        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
26855392016SAndreas Gohr        echo "\n";
269f6ef2e50SAndreas Gohr        $this->printSources($answer['sources']);
27055392016SAndreas Gohr        echo "\n";
27155392016SAndreas Gohr        $this->printUsage();
27255392016SAndreas Gohr    }
27355392016SAndreas Gohr
27455392016SAndreas Gohr    /**
275f6ef2e50SAndreas Gohr     * Print the given sources
276f6ef2e50SAndreas Gohr     *
277f6ef2e50SAndreas Gohr     * @param Chunk[] $sources
278f6ef2e50SAndreas Gohr     * @return void
279f6ef2e50SAndreas Gohr     */
280f6ef2e50SAndreas Gohr    protected function printSources($sources)
281f6ef2e50SAndreas Gohr    {
282f6ef2e50SAndreas Gohr        foreach ($sources as $source) {
283f6ef2e50SAndreas Gohr            /** @var Chunk $source */
2849b3d1b36SAndreas Gohr            $this->colors->ptln(
2859b3d1b36SAndreas Gohr                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
2869b3d1b36SAndreas Gohr                Colors::C_LIGHTBLUE
2879b3d1b36SAndreas Gohr            );
288f6ef2e50SAndreas Gohr        }
289f6ef2e50SAndreas Gohr    }
290f6ef2e50SAndreas Gohr
291f6ef2e50SAndreas Gohr    /**
29255392016SAndreas Gohr     * Print the usage statistics for OpenAI
29355392016SAndreas Gohr     *
29455392016SAndreas Gohr     * @return void
29555392016SAndreas Gohr     */
296f6ef2e50SAndreas Gohr    protected function printUsage()
297f6ef2e50SAndreas Gohr    {
29855392016SAndreas Gohr        $this->info(
299f6ef2e50SAndreas Gohr            'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.',
300f6ef2e50SAndreas Gohr            $this->helper->getModel()->getUsageStats()
30155392016SAndreas Gohr        );
30255392016SAndreas Gohr    }
30355392016SAndreas Gohr
30455392016SAndreas Gohr    /**
305c4584168SAndreas Gohr     * Interactively ask for a value from the user
306c4584168SAndreas Gohr     *
307c4584168SAndreas Gohr     * @param string $prompt
308c4584168SAndreas Gohr     * @return string
309c4584168SAndreas Gohr     */
310c4584168SAndreas Gohr    protected function readLine($prompt)
311c4584168SAndreas Gohr    {
312c4584168SAndreas Gohr        $value = '';
3138817535bSAndreas Gohr
314c4584168SAndreas Gohr        while ($value === '') {
315c4584168SAndreas Gohr            echo $prompt;
316c4584168SAndreas Gohr            echo ': ';
317c4584168SAndreas Gohr
318c4584168SAndreas Gohr            $fh = fopen('php://stdin', 'r');
319c4584168SAndreas Gohr            $value = trim(fgets($fh));
320c4584168SAndreas Gohr            fclose($fh);
321c4584168SAndreas Gohr        }
322c4584168SAndreas Gohr
323c4584168SAndreas Gohr        return $value;
324c4584168SAndreas Gohr    }
3258817535bSAndreas Gohr}
3268817535bSAndreas Gohr
327