xref: /plugin/aichat/cli.php (revision e1251882372557ff3ce8f12d253df8d66390690e)
18817535bSAndreas Gohr<?php
28817535bSAndreas Gohr
3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
40de7e020SAndreas Gohruse dokuwiki\plugin\aichat\AbstractCLI;
5f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
6c2b7a1f7SAndreas Gohruse dokuwiki\plugin\aichat\ModelFactory;
701f06932SAndreas Gohruse dokuwiki\Search\Indexer;
8c4584168SAndreas Gohruse splitbrain\phpcli\Colors;
98817535bSAndreas Gohruse splitbrain\phpcli\Options;
103379af09SAndreas Gohruse splitbrain\phpcli\TableFormatter;
118817535bSAndreas Gohr
128817535bSAndreas Gohr/**
138817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component)
148817535bSAndreas Gohr *
158817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
168817535bSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
178817535bSAndreas Gohr */
180de7e020SAndreas Gohrclass cli_plugin_aichat extends AbstractCLI
198817535bSAndreas Gohr{
200337f47fSAndreas Gohr    /** @var helper_plugin_aichat */
210337f47fSAndreas Gohr    protected $helper;
220337f47fSAndreas Gohr
238817535bSAndreas Gohr    /** @inheritDoc */
248817535bSAndreas Gohr    protected function setup(Options $options)
258817535bSAndreas Gohr    {
260de7e020SAndreas Gohr        parent::setup($options);
27bddd899cSAndreas Gohr
285284515dSAndreas Gohr        $options->setHelp(
295284515dSAndreas Gohr            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
305284515dSAndreas Gohr            'This may incur costs.'
315284515dSAndreas Gohr        );
328817535bSAndreas Gohr
330de7e020SAndreas Gohr        $options->registerOption(
340de7e020SAndreas Gohr            'model',
350de7e020SAndreas Gohr            'Overrides the chat and rephrasing model settings and uses this model instead',
360de7e020SAndreas Gohr            '',
370de7e020SAndreas Gohr            'model'
380de7e020SAndreas Gohr        );
390de7e020SAndreas Gohr
405284515dSAndreas Gohr        $options->registerCommand(
415284515dSAndreas Gohr            'embed',
425284515dSAndreas Gohr            'Create embeddings for all pages. This skips pages that already have embeddings'
435284515dSAndreas Gohr        );
445284515dSAndreas Gohr        $options->registerOption(
455284515dSAndreas Gohr            'clear',
465284515dSAndreas Gohr            'Clear all existing embeddings before creating new ones',
477ebc7895Ssplitbrain            'c',
487ebc7895Ssplitbrain            false,
497ebc7895Ssplitbrain            'embed'
505284515dSAndreas Gohr        );
518817535bSAndreas Gohr
52e8451b21SAndreas Gohr        $options->registerCommand('maintenance', 'Run storage maintenance. Refer to the documentation for details.');
533379af09SAndreas Gohr
548817535bSAndreas Gohr        $options->registerCommand('similar', 'Search for similar pages');
558817535bSAndreas Gohr        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
568817535bSAndreas Gohr
578817535bSAndreas Gohr        $options->registerCommand('ask', 'Ask a question');
588817535bSAndreas Gohr        $options->registerArgument('question', 'The question to ask', true, 'ask');
59c4584168SAndreas Gohr
60c4584168SAndreas Gohr        $options->registerCommand('chat', 'Start an interactive chat session');
61ad38c5fdSAndreas Gohr
62e8451b21SAndreas Gohr        $options->registerCommand('models', 'List available models');
63e8451b21SAndreas Gohr
64e75dc39fSAndreas Gohr        $options->registerCommand('info', 'Get Info about the vector storage and other stats');
658c8b7ba6SAndreas Gohr
66ad38c5fdSAndreas Gohr        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
67ad38c5fdSAndreas Gohr        $options->registerArgument('page', 'The page to split', true, 'split');
685786be46SAndreas Gohr
6901f06932SAndreas Gohr        $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)');
7001f06932SAndreas Gohr        $options->registerArgument('page', 'The page to check', true, 'page');
71dc355d57SAndreas Gohr        $options->registerOption('dump', 'Dump the chunks', 'd', false, 'page');
7201f06932SAndreas Gohr
738c8b7ba6SAndreas Gohr        $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' .
748c8b7ba6SAndreas Gohr            ' Not supported on all storages.');
758c8b7ba6SAndreas Gohr        $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv');
768c8b7ba6SAndreas Gohr        $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv');
778817535bSAndreas Gohr    }
788817535bSAndreas Gohr
798817535bSAndreas Gohr    /** @inheritDoc */
808817535bSAndreas Gohr    protected function main(Options $options)
818817535bSAndreas Gohr    {
820de7e020SAndreas Gohr        parent::main($options);
83*e1251882SAndreas Gohr        auth_setup(); // make sure ACLs are initialized
840de7e020SAndreas Gohr
850de7e020SAndreas Gohr        $model = $options->getOpt('model');
860de7e020SAndreas Gohr        if ($model) {
870de7e020SAndreas Gohr            $this->helper->updateConfig(
880de7e020SAndreas Gohr                ['chatmodel' => $model, 'rephasemodel' => $model]
890de7e020SAndreas Gohr            );
90c2b7a1f7SAndreas Gohr        }
91c2b7a1f7SAndreas Gohr
928817535bSAndreas Gohr        switch ($options->getCmd()) {
938817535bSAndreas Gohr            case 'embed':
945284515dSAndreas Gohr                $this->createEmbeddings($options->getOpt('clear'));
958817535bSAndreas Gohr                break;
963379af09SAndreas Gohr            case 'maintenance':
973379af09SAndreas Gohr                $this->runMaintenance();
983379af09SAndreas Gohr                break;
998817535bSAndreas Gohr            case 'similar':
1008817535bSAndreas Gohr                $this->similar($options->getArgs()[0]);
1018817535bSAndreas Gohr                break;
1027552f1aaSAndreas Gohr            case 'ask':
1037552f1aaSAndreas Gohr                $this->ask($options->getArgs()[0]);
1047552f1aaSAndreas Gohr                break;
105c4584168SAndreas Gohr            case 'chat':
106c4584168SAndreas Gohr                $this->chat();
107c4584168SAndreas Gohr                break;
108e8451b21SAndreas Gohr            case 'models':
109e8451b21SAndreas Gohr                $this->models();
110e8451b21SAndreas Gohr                break;
111ad38c5fdSAndreas Gohr            case 'split':
112ad38c5fdSAndreas Gohr                $this->split($options->getArgs()[0]);
113ad38c5fdSAndreas Gohr                break;
11401f06932SAndreas Gohr            case 'page':
115dc355d57SAndreas Gohr                $this->page($options->getArgs()[0], $options->getOpt('dump'));
11601f06932SAndreas Gohr                break;
1175786be46SAndreas Gohr            case 'info':
118f6ef2e50SAndreas Gohr                $this->showinfo();
1195786be46SAndreas Gohr                break;
1208c8b7ba6SAndreas Gohr            case 'tsv':
1218c8b7ba6SAndreas Gohr                $args = $options->getArgs();
1228c8b7ba6SAndreas Gohr                $vector = $args[0] ?? 'vector.tsv';
1238c8b7ba6SAndreas Gohr                $meta = $args[1] ?? 'meta.tsv';
1248c8b7ba6SAndreas Gohr                $this->tsv($vector, $meta);
1258c8b7ba6SAndreas Gohr                break;
1268817535bSAndreas Gohr            default:
1278817535bSAndreas Gohr                echo $options->help();
1288817535bSAndreas Gohr        }
1298817535bSAndreas Gohr    }
1308817535bSAndreas Gohr
131c4584168SAndreas Gohr    /**
1325786be46SAndreas Gohr     * @return void
1335786be46SAndreas Gohr     */
134f6ef2e50SAndreas Gohr    protected function showinfo()
1355786be46SAndreas Gohr    {
1363379af09SAndreas Gohr        $stats = [
137b446155bSAndreas Gohr            'embed model' => (string) $this->helper->getEmbeddingModel(),
138b446155bSAndreas Gohr            'rephrase model' => (string) $this->helper->getRephraseModel(),
139b446155bSAndreas Gohr            'chat model' => (string) $this->helper->getChatModel(),
1403379af09SAndreas Gohr        ];
141e75dc39fSAndreas Gohr        $stats = array_merge(
142e75dc39fSAndreas Gohr            $stats,
143bae450a9SAndreas Gohr            $this->helper->getRunData(),
144e75dc39fSAndreas Gohr            $this->helper->getStorage()->statistics()
145e75dc39fSAndreas Gohr        );
1463379af09SAndreas Gohr        $this->printTable($stats);
1477ee8b02dSAndreas Gohr    }
148911314cdSAndreas Gohr
1493379af09SAndreas Gohr    /**
1503379af09SAndreas Gohr     * Print key value data as tabular data
1513379af09SAndreas Gohr     *
1523379af09SAndreas Gohr     * @param array $data
1533379af09SAndreas Gohr     * @param int $level
1543379af09SAndreas Gohr     * @return void
1553379af09SAndreas Gohr     */
1563379af09SAndreas Gohr    protected function printTable($data, $level = 0)
1573379af09SAndreas Gohr    {
1583379af09SAndreas Gohr        $tf = new TableFormatter($this->colors);
1593379af09SAndreas Gohr        foreach ($data as $key => $value) {
1603379af09SAndreas Gohr            if (is_array($value)) {
1613379af09SAndreas Gohr                echo $tf->format(
162e75dc39fSAndreas Gohr                    [$level * 2, 20, '*'],
1633379af09SAndreas Gohr                    ['', $key, ''],
1643379af09SAndreas Gohr                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
1653379af09SAndreas Gohr                );
1663379af09SAndreas Gohr                $this->printTable($value, $level + 1);
1673379af09SAndreas Gohr            } else {
1683379af09SAndreas Gohr                echo $tf->format(
169e75dc39fSAndreas Gohr                    [$level * 2, 20, '*'],
1703379af09SAndreas Gohr                    ['', $key, $value],
1713379af09SAndreas Gohr                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY]
1723379af09SAndreas Gohr                );
1733379af09SAndreas Gohr            }
1743379af09SAndreas Gohr        }
1755786be46SAndreas Gohr    }
1765786be46SAndreas Gohr
1775786be46SAndreas Gohr    /**
17801f06932SAndreas Gohr     * Check chunk availability for a given page
17901f06932SAndreas Gohr     *
18001f06932SAndreas Gohr     * @param string $page
18101f06932SAndreas Gohr     * @return void
18201f06932SAndreas Gohr     */
183dc355d57SAndreas Gohr    protected function page($page, $dump = false)
18401f06932SAndreas Gohr    {
18501f06932SAndreas Gohr        $indexer = new Indexer();
18601f06932SAndreas Gohr        $pages = $indexer->getPages();
18701f06932SAndreas Gohr        $pos = array_search(cleanID($page), $pages);
18801f06932SAndreas Gohr
18901f06932SAndreas Gohr        if ($pos === false) {
19001f06932SAndreas Gohr            $this->error('Page not found');
19101f06932SAndreas Gohr            return;
19201f06932SAndreas Gohr        }
19301f06932SAndreas Gohr
19401f06932SAndreas Gohr        $storage = $this->helper->getStorage();
19501f06932SAndreas Gohr        $chunks = $storage->getPageChunks($page, $pos * 100);
19601f06932SAndreas Gohr        if ($chunks) {
19701f06932SAndreas Gohr            $this->success('Found ' . count($chunks) . ' chunks');
198dc355d57SAndreas Gohr            if ($dump) {
199dc355d57SAndreas Gohr                echo json_encode($chunks, JSON_PRETTY_PRINT);
200dc355d57SAndreas Gohr            }
20101f06932SAndreas Gohr        } else {
20201f06932SAndreas Gohr            $this->error('No chunks found');
20301f06932SAndreas Gohr        }
20401f06932SAndreas Gohr    }
20501f06932SAndreas Gohr
20601f06932SAndreas Gohr    /**
207ad38c5fdSAndreas Gohr     * Split the given page into chunks and print them
208ad38c5fdSAndreas Gohr     *
209ad38c5fdSAndreas Gohr     * @param string $page
210ad38c5fdSAndreas Gohr     * @return void
211ad38c5fdSAndreas Gohr     * @throws Exception
212ad38c5fdSAndreas Gohr     */
213ad38c5fdSAndreas Gohr    protected function split($page)
214ad38c5fdSAndreas Gohr    {
215ab1f8ddeSAndreas Gohr        $chunks = $this->helper->getEmbeddings()->createPageChunks($page, 0);
216ad38c5fdSAndreas Gohr        foreach ($chunks as $chunk) {
217ab1f8ddeSAndreas Gohr            echo $chunk->getText();
218ad38c5fdSAndreas Gohr            echo "\n";
219ad38c5fdSAndreas Gohr            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
220ad38c5fdSAndreas Gohr        }
221ad38c5fdSAndreas Gohr        $this->success('Split into ' . count($chunks) . ' chunks');
222ad38c5fdSAndreas Gohr    }
223ad38c5fdSAndreas Gohr
224ad38c5fdSAndreas Gohr    /**
225c4584168SAndreas Gohr     * Interactive Chat Session
226c4584168SAndreas Gohr     *
227c4584168SAndreas Gohr     * @return void
228c4584168SAndreas Gohr     * @throws Exception
229c4584168SAndreas Gohr     */
230c4584168SAndreas Gohr    protected function chat()
231c4584168SAndreas Gohr    {
232c4584168SAndreas Gohr        $history = [];
233c4584168SAndreas Gohr        while ($q = $this->readLine('Your Question')) {
2346a18e0f4SAndreas Gohr            $this->helper->getChatModel()->resetUsageStats();
23551aa8517SAndreas Gohr            $this->helper->getRephraseModel()->resetUsageStats();
236c2b7a1f7SAndreas Gohr            $this->helper->getEmbeddingModel()->resetUsageStats();
237f6ef2e50SAndreas Gohr            $result = $this->helper->askChatQuestion($q, $history);
238f6ef2e50SAndreas Gohr            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
239f6ef2e50SAndreas Gohr            $history[] = [$result['question'], $result['answer']];
240c4584168SAndreas Gohr            $this->printAnswer($result);
241c4584168SAndreas Gohr        }
242c4584168SAndreas Gohr    }
243c4584168SAndreas Gohr
244c2b7a1f7SAndreas Gohr    /**
245c2b7a1f7SAndreas Gohr     * Print information about the available models
246c2b7a1f7SAndreas Gohr     *
247c2b7a1f7SAndreas Gohr     * @return void
248c2b7a1f7SAndreas Gohr     */
249e8451b21SAndreas Gohr    protected function models()
250e8451b21SAndreas Gohr    {
251c2b7a1f7SAndreas Gohr        $result = (new ModelFactory($this->conf))->getModels();
252e8451b21SAndreas Gohr
253e8451b21SAndreas Gohr        $td = new TableFormatter($this->colors);
254e8451b21SAndreas Gohr        $cols = [30, 20, 20, '*'];
255e8451b21SAndreas Gohr        echo "==== Chat Models ====\n\n";
256e8451b21SAndreas Gohr        echo $td->format(
257e8451b21SAndreas Gohr            $cols,
258e8451b21SAndreas Gohr            ['Model', 'Token Limits', 'Price USD/M', 'Description'],
259e8451b21SAndreas Gohr            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
260e8451b21SAndreas Gohr        );
261e8451b21SAndreas Gohr        foreach ($result['chat'] as $name => $info) {
262e8451b21SAndreas Gohr            echo $td->format(
263e8451b21SAndreas Gohr                $cols,
264e8451b21SAndreas Gohr                [
265e8451b21SAndreas Gohr                    $name,
266e8451b21SAndreas Gohr                    sprintf(" In: %7d\nOut: %7d", $info['inputTokens'], $info['outputTokens']),
2672045e15aSAndreas Gohr                    sprintf(" In: %.2f\nOut: %.2f", $info['inputTokenPrice'], $info['outputTokenPrice']),
268e8451b21SAndreas Gohr                    $info['description'] . "\n"
269e8451b21SAndreas Gohr                ],
270e8451b21SAndreas Gohr                [
271c2b7a1f7SAndreas Gohr                    $info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
272e8451b21SAndreas Gohr                ]
273e8451b21SAndreas Gohr            );
274e8451b21SAndreas Gohr        }
275e8451b21SAndreas Gohr
27687e46484SAndreas Gohr        $cols = [30, 10, 10, 10, '*'];
277e8451b21SAndreas Gohr        echo "==== Embedding Models ====\n\n";
278e8451b21SAndreas Gohr        echo $td->format(
279e8451b21SAndreas Gohr            $cols,
28087e46484SAndreas Gohr            ['Model', 'Token Limits', 'Price USD/M', 'Dimensions', 'Description'],
28187e46484SAndreas Gohr            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
282e8451b21SAndreas Gohr        );
283e8451b21SAndreas Gohr        foreach ($result['embedding'] as $name => $info) {
284e8451b21SAndreas Gohr            echo $td->format(
285e8451b21SAndreas Gohr                $cols,
286e8451b21SAndreas Gohr                [
287e8451b21SAndreas Gohr                    $name,
288e8451b21SAndreas Gohr                    sprintf("%7d", $info['inputTokens']),
289e8451b21SAndreas Gohr                    sprintf("%.2f", $info['inputTokenPrice']),
29087e46484SAndreas Gohr                    $info['dimensions'],
291e8451b21SAndreas Gohr                    $info['description'] . "\n"
292e8451b21SAndreas Gohr                ],
293e8451b21SAndreas Gohr                [
294c2b7a1f7SAndreas Gohr                    $info['instance'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
295e8451b21SAndreas Gohr                ]
296e8451b21SAndreas Gohr            );
297e8451b21SAndreas Gohr        }
298e8451b21SAndreas Gohr
299e8451b21SAndreas Gohr        $this->colors->ptln('Current prices may differ', Colors::C_RED);
300e8451b21SAndreas Gohr    }
301e8451b21SAndreas Gohr
302c4584168SAndreas Gohr    /**
303c4584168SAndreas Gohr     * Handle a single, standalone question
304c4584168SAndreas Gohr     *
305c4584168SAndreas Gohr     * @param string $query
306c4584168SAndreas Gohr     * @return void
307c4584168SAndreas Gohr     * @throws Exception
308c4584168SAndreas Gohr     */
309c4584168SAndreas Gohr    protected function ask($query)
310c4584168SAndreas Gohr    {
3110337f47fSAndreas Gohr        $result = $this->helper->askQuestion($query);
312c4584168SAndreas Gohr        $this->printAnswer($result);
3137552f1aaSAndreas Gohr    }
3147552f1aaSAndreas Gohr
315c4584168SAndreas Gohr    /**
316c4584168SAndreas Gohr     * Get the pages that are similar to the query
317c4584168SAndreas Gohr     *
318c4584168SAndreas Gohr     * @param string $query
319c4584168SAndreas Gohr     * @return void
320c4584168SAndreas Gohr     */
3218817535bSAndreas Gohr    protected function similar($query)
3228817535bSAndreas Gohr    {
323e33a1d7aSAndreas Gohr        $langlimit = $this->helper->getLanguageLimit();
324e33a1d7aSAndreas Gohr        if ($langlimit) {
325e33a1d7aSAndreas Gohr            $this->info('Limiting results to {lang}', ['lang' => $langlimit]);
326e33a1d7aSAndreas Gohr        }
327e33a1d7aSAndreas Gohr
328e33a1d7aSAndreas Gohr        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit);
329f6ef2e50SAndreas Gohr        $this->printSources($sources);
3308817535bSAndreas Gohr    }
3318817535bSAndreas Gohr
332c4584168SAndreas Gohr    /**
3333379af09SAndreas Gohr     * Run the maintenance tasks
3343379af09SAndreas Gohr     *
3353379af09SAndreas Gohr     * @return void
3363379af09SAndreas Gohr     */
3373379af09SAndreas Gohr    protected function runMaintenance()
3383379af09SAndreas Gohr    {
3393379af09SAndreas Gohr        $start = time();
3403379af09SAndreas Gohr        $this->helper->getStorage()->runMaintenance();
3413379af09SAndreas Gohr        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
3423379af09SAndreas Gohr        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
343e75dc39fSAndreas Gohr
344e75dc39fSAndreas Gohr        $data = $this->helper->getRunData();
345bae450a9SAndreas Gohr        $data['maintenance ran at'] = dformat();
346e75dc39fSAndreas Gohr        $this->helper->setRunData($data);
3473379af09SAndreas Gohr    }
3483379af09SAndreas Gohr
3493379af09SAndreas Gohr    /**
350c4584168SAndreas Gohr     * Recreate chunks and embeddings for all pages
351c4584168SAndreas Gohr     *
352c4584168SAndreas Gohr     * @return void
353c4584168SAndreas Gohr     */
3545284515dSAndreas Gohr    protected function createEmbeddings($clear)
3558817535bSAndreas Gohr    {
356d5c102b3SAndreas Gohr        [$skipRE, $matchRE] = $this->getRegexps();
357d5c102b3SAndreas Gohr
358bae450a9SAndreas Gohr        $data = $this->helper->getRunData();
359bae450a9SAndreas Gohr        $lastEmbedModel = $data['embed used'] ?? '';
360bae450a9SAndreas Gohr
361bae450a9SAndreas Gohr        if(
362bae450a9SAndreas Gohr            !$clear && $lastEmbedModel &&
363bae450a9SAndreas Gohr            $lastEmbedModel != (string) $this->helper->getEmbeddingModel()
364bae450a9SAndreas Gohr        ){
365bae450a9SAndreas Gohr            $this->warning('Embedding model has changed since last run. Forcing an index rebuild');
366bae450a9SAndreas Gohr            $clear = true;
367bae450a9SAndreas Gohr        }
368bae450a9SAndreas Gohr
3693379af09SAndreas Gohr        $start = time();
370d5c102b3SAndreas Gohr        $this->helper->getEmbeddings()->createNewIndex($skipRE, $matchRE, $clear);
371ad38c5fdSAndreas Gohr        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
3723379af09SAndreas Gohr        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
373e75dc39fSAndreas Gohr
374bae450a9SAndreas Gohr
375bae450a9SAndreas Gohr        $data['embed ran at'] = dformat();
376bae450a9SAndreas Gohr        $data['embed used'] = (string) $this->helper->getEmbeddingModel();
377e75dc39fSAndreas Gohr        $this->helper->setRunData($data);
3788817535bSAndreas Gohr    }
3798817535bSAndreas Gohr
380c4584168SAndreas Gohr    /**
3818c8b7ba6SAndreas Gohr     * Dump TSV files for debugging
3828c8b7ba6SAndreas Gohr     *
3838c8b7ba6SAndreas Gohr     * @return void
3848c8b7ba6SAndreas Gohr     */
3858c8b7ba6SAndreas Gohr    protected function tsv($vector, $meta)
3868c8b7ba6SAndreas Gohr    {
3878c8b7ba6SAndreas Gohr
3888c8b7ba6SAndreas Gohr        $storage = $this->helper->getStorage();
3898c8b7ba6SAndreas Gohr        $storage->dumpTSV($vector, $meta);
3908c8b7ba6SAndreas Gohr        $this->success('written to ' . $vector . ' and ' . $meta);
3918c8b7ba6SAndreas Gohr    }
3928c8b7ba6SAndreas Gohr
3938c8b7ba6SAndreas Gohr    /**
39455392016SAndreas Gohr     * Print the given detailed answer in a nice way
39555392016SAndreas Gohr     *
39655392016SAndreas Gohr     * @param array $answer
39755392016SAndreas Gohr     * @return void
39855392016SAndreas Gohr     */
39955392016SAndreas Gohr    protected function printAnswer($answer)
40055392016SAndreas Gohr    {
40155392016SAndreas Gohr        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
40255392016SAndreas Gohr        echo "\n";
403f6ef2e50SAndreas Gohr        $this->printSources($answer['sources']);
40455392016SAndreas Gohr        echo "\n";
40555392016SAndreas Gohr        $this->printUsage();
40655392016SAndreas Gohr    }
40755392016SAndreas Gohr
40855392016SAndreas Gohr    /**
409f6ef2e50SAndreas Gohr     * Print the given sources
410f6ef2e50SAndreas Gohr     *
411f6ef2e50SAndreas Gohr     * @param Chunk[] $sources
412f6ef2e50SAndreas Gohr     * @return void
413f6ef2e50SAndreas Gohr     */
414f6ef2e50SAndreas Gohr    protected function printSources($sources)
415f6ef2e50SAndreas Gohr    {
416f6ef2e50SAndreas Gohr        foreach ($sources as $source) {
417f6ef2e50SAndreas Gohr            /** @var Chunk $source */
4189b3d1b36SAndreas Gohr            $this->colors->ptln(
4199b3d1b36SAndreas Gohr                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
4209b3d1b36SAndreas Gohr                Colors::C_LIGHTBLUE
4219b3d1b36SAndreas Gohr            );
422f6ef2e50SAndreas Gohr        }
423f6ef2e50SAndreas Gohr    }
424f6ef2e50SAndreas Gohr
425f6ef2e50SAndreas Gohr    /**
42655392016SAndreas Gohr     * Print the usage statistics for OpenAI
42755392016SAndreas Gohr     *
42855392016SAndreas Gohr     * @return void
42955392016SAndreas Gohr     */
430f6ef2e50SAndreas Gohr    protected function printUsage()
431f6ef2e50SAndreas Gohr    {
43251aa8517SAndreas Gohr        $chat = $this->helper->getChatModel()->getUsageStats();
43351aa8517SAndreas Gohr        $rephrase = $this->helper->getRephraseModel()->getUsageStats();
434c2b7a1f7SAndreas Gohr        $embed = $this->helper->getEmbeddingModel()->getUsageStats();
43551aa8517SAndreas Gohr
43655392016SAndreas Gohr        $this->info(
43751aa8517SAndreas Gohr            'Made {requests} requests in {time}s to models. Used {tokens} tokens for about ${cost}.',
43851aa8517SAndreas Gohr            [
43951aa8517SAndreas Gohr                'requests' => $chat['requests'] + $rephrase['requests'] + $embed['requests'],
44051aa8517SAndreas Gohr                'time' => $chat['time'] + $rephrase['time'] + $embed['time'],
44151aa8517SAndreas Gohr                'tokens' => $chat['tokens'] + $chat['tokens'] + $embed['tokens'],
44251aa8517SAndreas Gohr                'cost' => $chat['cost'] + $chat['cost'] + $embed['cost'],
44351aa8517SAndreas Gohr            ]
44455392016SAndreas Gohr        );
44555392016SAndreas Gohr    }
44655392016SAndreas Gohr
44755392016SAndreas Gohr    /**
448c4584168SAndreas Gohr     * Interactively ask for a value from the user
449c4584168SAndreas Gohr     *
450c4584168SAndreas Gohr     * @param string $prompt
451c4584168SAndreas Gohr     * @return string
452c4584168SAndreas Gohr     */
453c4584168SAndreas Gohr    protected function readLine($prompt)
454c4584168SAndreas Gohr    {
455c4584168SAndreas Gohr        $value = '';
4568817535bSAndreas Gohr
457c4584168SAndreas Gohr        while ($value === '') {
458c4584168SAndreas Gohr            echo $prompt;
459c4584168SAndreas Gohr            echo ': ';
460c4584168SAndreas Gohr
461c4584168SAndreas Gohr            $fh = fopen('php://stdin', 'r');
462c4584168SAndreas Gohr            $value = trim(fgets($fh));
463c4584168SAndreas Gohr            fclose($fh);
464c4584168SAndreas Gohr        }
465c4584168SAndreas Gohr
466c4584168SAndreas Gohr        return $value;
467c4584168SAndreas Gohr    }
468d5c102b3SAndreas Gohr
469d5c102b3SAndreas Gohr    /**
470d5c102b3SAndreas Gohr     * Read the skip and match regex from the config
471d5c102b3SAndreas Gohr     *
472d5c102b3SAndreas Gohr     * Ensures the regular expressions are valid
473d5c102b3SAndreas Gohr     *
474d5c102b3SAndreas Gohr     * @return string[] [$skipRE, $matchRE]
475d5c102b3SAndreas Gohr     */
476d5c102b3SAndreas Gohr    protected function getRegexps()
477d5c102b3SAndreas Gohr    {
478d5c102b3SAndreas Gohr        $skip = $this->getConf('skipRegex');
479d5c102b3SAndreas Gohr        $skipRE = '';
480d5c102b3SAndreas Gohr        $match = $this->getConf('matchRegex');
481d5c102b3SAndreas Gohr        $matchRE = '';
482d5c102b3SAndreas Gohr
483d5c102b3SAndreas Gohr        if ($skip) {
484d5c102b3SAndreas Gohr            $skipRE = '/' . $skip . '/';
48549a7d3ccSsplitbrain            if (@preg_match($skipRE, '') === false) {
486d5c102b3SAndreas Gohr                $this->error(preg_last_error_msg());
487d5c102b3SAndreas Gohr                $this->error('Invalid regular expression in $conf[\'skipRegex\']. Ignored.');
488d5c102b3SAndreas Gohr                $skipRE = '';
489d5c102b3SAndreas Gohr            } else {
490d5c102b3SAndreas Gohr                $this->success('Skipping pages matching ' . $skipRE);
491d5c102b3SAndreas Gohr            }
492d5c102b3SAndreas Gohr        }
493d5c102b3SAndreas Gohr
494d5c102b3SAndreas Gohr        if ($match) {
495d5c102b3SAndreas Gohr            $matchRE = '/' . $match . '/';
49649a7d3ccSsplitbrain            if (@preg_match($matchRE, '') === false) {
497d5c102b3SAndreas Gohr                $this->error(preg_last_error_msg());
498d5c102b3SAndreas Gohr                $this->error('Invalid regular expression in $conf[\'matchRegex\']. Ignored.');
499d5c102b3SAndreas Gohr                $matchRE = '';
500d5c102b3SAndreas Gohr            } else {
501d5c102b3SAndreas Gohr                $this->success('Only indexing pages matching ' . $matchRE);
502d5c102b3SAndreas Gohr            }
503d5c102b3SAndreas Gohr        }
504d5c102b3SAndreas Gohr        return [$skipRE, $matchRE];
505d5c102b3SAndreas Gohr    }
5068817535bSAndreas Gohr}
507