xref: /plugin/aichat/cli.php (revision 51aa8517a15244890eb0132c8019c9857c046a12)
18817535bSAndreas Gohr<?php
28817535bSAndreas Gohr
3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
4f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
501f06932SAndreas Gohruse dokuwiki\Search\Indexer;
6c4584168SAndreas Gohruse splitbrain\phpcli\Colors;
78817535bSAndreas Gohruse splitbrain\phpcli\Options;
83379af09SAndreas Gohruse splitbrain\phpcli\TableFormatter;
98817535bSAndreas Gohr
108817535bSAndreas Gohr/**
118817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component)
128817535bSAndreas Gohr *
138817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
148817535bSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
158817535bSAndreas Gohr */
16f6ef2e50SAndreas Gohrclass cli_plugin_aichat extends CLIPlugin
178817535bSAndreas Gohr{
180337f47fSAndreas Gohr    /** @var helper_plugin_aichat */
190337f47fSAndreas Gohr    protected $helper;
200337f47fSAndreas Gohr
210337f47fSAndreas Gohr    public function __construct($autocatch = true)
220337f47fSAndreas Gohr    {
230337f47fSAndreas Gohr        parent::__construct($autocatch);
240337f47fSAndreas Gohr        $this->helper = plugin_load('helper', 'aichat');
253379af09SAndreas Gohr        $this->helper->setLogger($this);
260337f47fSAndreas Gohr    }
270337f47fSAndreas Gohr
288817535bSAndreas Gohr    /** @inheritDoc */
298817535bSAndreas Gohr    protected function setup(Options $options)
308817535bSAndreas Gohr    {
31bddd899cSAndreas Gohr        $options->useCompactHelp();
32bddd899cSAndreas Gohr
335284515dSAndreas Gohr        $options->setHelp(
345284515dSAndreas Gohr            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
355284515dSAndreas Gohr            'This may incur costs.'
365284515dSAndreas Gohr        );
378817535bSAndreas Gohr
385284515dSAndreas Gohr        $options->registerCommand(
395284515dSAndreas Gohr            'embed',
405284515dSAndreas Gohr            'Create embeddings for all pages. This skips pages that already have embeddings'
415284515dSAndreas Gohr        );
425284515dSAndreas Gohr        $options->registerOption(
435284515dSAndreas Gohr            'clear',
445284515dSAndreas Gohr            'Clear all existing embeddings before creating new ones',
457ebc7895Ssplitbrain            'c',
467ebc7895Ssplitbrain            false,
477ebc7895Ssplitbrain            'embed'
485284515dSAndreas Gohr        );
498817535bSAndreas Gohr
50e8451b21SAndreas Gohr        $options->registerCommand('maintenance', 'Run storage maintenance. Refer to the documentation for details.');
513379af09SAndreas Gohr
528817535bSAndreas Gohr        $options->registerCommand('similar', 'Search for similar pages');
538817535bSAndreas Gohr        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
548817535bSAndreas Gohr
558817535bSAndreas Gohr        $options->registerCommand('ask', 'Ask a question');
568817535bSAndreas Gohr        $options->registerArgument('question', 'The question to ask', true, 'ask');
57c4584168SAndreas Gohr
58c4584168SAndreas Gohr        $options->registerCommand('chat', 'Start an interactive chat session');
59ad38c5fdSAndreas Gohr
60e8451b21SAndreas Gohr        $options->registerCommand('models', 'List available models');
61e8451b21SAndreas Gohr
62e75dc39fSAndreas Gohr        $options->registerCommand('info', 'Get Info about the vector storage and other stats');
638c8b7ba6SAndreas Gohr
64ad38c5fdSAndreas Gohr        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
65ad38c5fdSAndreas Gohr        $options->registerArgument('page', 'The page to split', true, 'split');
665786be46SAndreas Gohr
6701f06932SAndreas Gohr        $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)');
6801f06932SAndreas Gohr        $options->registerArgument('page', 'The page to check', true, 'page');
69dc355d57SAndreas Gohr        $options->registerOption('dump', 'Dump the chunks', 'd', false, 'page');
7001f06932SAndreas Gohr
718c8b7ba6SAndreas Gohr        $options->registerCommand('tsv', 'Create TSV files for visualizing at http://projector.tensorflow.org/' .
728c8b7ba6SAndreas Gohr            ' Not supported on all storages.');
738c8b7ba6SAndreas Gohr        $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv');
748c8b7ba6SAndreas Gohr        $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv');
758817535bSAndreas Gohr    }
768817535bSAndreas Gohr
778817535bSAndreas Gohr    /** @inheritDoc */
788817535bSAndreas Gohr    protected function main(Options $options)
798817535bSAndreas Gohr    {
80e8451b21SAndreas Gohr        $this->loadConfig();
813379af09SAndreas Gohr        ini_set('memory_limit', -1);
828817535bSAndreas Gohr        switch ($options->getCmd()) {
838817535bSAndreas Gohr            case 'embed':
845284515dSAndreas Gohr                $this->createEmbeddings($options->getOpt('clear'));
858817535bSAndreas Gohr                break;
863379af09SAndreas Gohr            case 'maintenance':
873379af09SAndreas Gohr                $this->runMaintenance();
883379af09SAndreas Gohr                break;
898817535bSAndreas Gohr            case 'similar':
908817535bSAndreas Gohr                $this->similar($options->getArgs()[0]);
918817535bSAndreas Gohr                break;
927552f1aaSAndreas Gohr            case 'ask':
937552f1aaSAndreas Gohr                $this->ask($options->getArgs()[0]);
947552f1aaSAndreas Gohr                break;
95c4584168SAndreas Gohr            case 'chat':
96c4584168SAndreas Gohr                $this->chat();
97c4584168SAndreas Gohr                break;
98e8451b21SAndreas Gohr            case 'models':
99e8451b21SAndreas Gohr                $this->models();
100e8451b21SAndreas Gohr                break;
101ad38c5fdSAndreas Gohr            case 'split':
102ad38c5fdSAndreas Gohr                $this->split($options->getArgs()[0]);
103ad38c5fdSAndreas Gohr                break;
10401f06932SAndreas Gohr            case 'page':
105dc355d57SAndreas Gohr                $this->page($options->getArgs()[0], $options->getOpt('dump'));
10601f06932SAndreas Gohr                break;
1075786be46SAndreas Gohr            case 'info':
108f6ef2e50SAndreas Gohr                $this->showinfo();
1095786be46SAndreas Gohr                break;
1108c8b7ba6SAndreas Gohr            case 'tsv':
1118c8b7ba6SAndreas Gohr                $args = $options->getArgs();
1128c8b7ba6SAndreas Gohr                $vector = $args[0] ?? 'vector.tsv';
1138c8b7ba6SAndreas Gohr                $meta = $args[1] ?? 'meta.tsv';
1148c8b7ba6SAndreas Gohr                $this->tsv($vector, $meta);
1158c8b7ba6SAndreas Gohr                break;
1168817535bSAndreas Gohr            default:
1178817535bSAndreas Gohr                echo $options->help();
1188817535bSAndreas Gohr        }
1198817535bSAndreas Gohr    }
1208817535bSAndreas Gohr
121c4584168SAndreas Gohr    /**
1225786be46SAndreas Gohr     * @return void
1235786be46SAndreas Gohr     */
124f6ef2e50SAndreas Gohr    protected function showinfo()
1255786be46SAndreas Gohr    {
1263379af09SAndreas Gohr        $stats = [
12799b713bfSAndreas Gohr            'chat model' => $this->getConf('chatmodel'),
12899b713bfSAndreas Gohr            'embed model' => $this->getConf('embedmodel'),
1293379af09SAndreas Gohr        ];
130e75dc39fSAndreas Gohr        $stats = array_merge(
131e75dc39fSAndreas Gohr            $stats,
132e75dc39fSAndreas Gohr            array_map('dformat', $this->helper->getRunData()),
133e75dc39fSAndreas Gohr            $this->helper->getStorage()->statistics()
134e75dc39fSAndreas Gohr        );
1353379af09SAndreas Gohr        $this->printTable($stats);
1367ee8b02dSAndreas Gohr    }
137911314cdSAndreas Gohr
1383379af09SAndreas Gohr    /**
1393379af09SAndreas Gohr     * Print key value data as tabular data
1403379af09SAndreas Gohr     *
1413379af09SAndreas Gohr     * @param array $data
1423379af09SAndreas Gohr     * @param int $level
1433379af09SAndreas Gohr     * @return void
1443379af09SAndreas Gohr     */
1453379af09SAndreas Gohr    protected function printTable($data, $level = 0)
1463379af09SAndreas Gohr    {
1473379af09SAndreas Gohr        $tf = new TableFormatter($this->colors);
1483379af09SAndreas Gohr        foreach ($data as $key => $value) {
1493379af09SAndreas Gohr            if (is_array($value)) {
1503379af09SAndreas Gohr                echo $tf->format(
151e75dc39fSAndreas Gohr                    [$level * 2, 20, '*'],
1523379af09SAndreas Gohr                    ['', $key, ''],
1533379af09SAndreas Gohr                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
1543379af09SAndreas Gohr                );
1553379af09SAndreas Gohr                $this->printTable($value, $level + 1);
1563379af09SAndreas Gohr            } else {
1573379af09SAndreas Gohr                echo $tf->format(
158e75dc39fSAndreas Gohr                    [$level * 2, 20, '*'],
1593379af09SAndreas Gohr                    ['', $key, $value],
1603379af09SAndreas Gohr                    [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTGRAY]
1613379af09SAndreas Gohr                );
1623379af09SAndreas Gohr            }
1633379af09SAndreas Gohr        }
1645786be46SAndreas Gohr    }
1655786be46SAndreas Gohr
1665786be46SAndreas Gohr    /**
16701f06932SAndreas Gohr     * Check chunk availability for a given page
16801f06932SAndreas Gohr     *
16901f06932SAndreas Gohr     * @param string $page
17001f06932SAndreas Gohr     * @return void
17101f06932SAndreas Gohr     */
172dc355d57SAndreas Gohr    protected function page($page, $dump = false)
17301f06932SAndreas Gohr    {
17401f06932SAndreas Gohr        $indexer = new Indexer();
17501f06932SAndreas Gohr        $pages = $indexer->getPages();
17601f06932SAndreas Gohr        $pos = array_search(cleanID($page), $pages);
17701f06932SAndreas Gohr
17801f06932SAndreas Gohr        if ($pos === false) {
17901f06932SAndreas Gohr            $this->error('Page not found');
18001f06932SAndreas Gohr            return;
18101f06932SAndreas Gohr        }
18201f06932SAndreas Gohr
18301f06932SAndreas Gohr        $storage = $this->helper->getStorage();
18401f06932SAndreas Gohr        $chunks = $storage->getPageChunks($page, $pos * 100);
18501f06932SAndreas Gohr        if ($chunks) {
18601f06932SAndreas Gohr            $this->success('Found ' . count($chunks) . ' chunks');
187dc355d57SAndreas Gohr            if ($dump) {
188dc355d57SAndreas Gohr                echo json_encode($chunks, JSON_PRETTY_PRINT);
189dc355d57SAndreas Gohr            }
19001f06932SAndreas Gohr        } else {
19101f06932SAndreas Gohr            $this->error('No chunks found');
19201f06932SAndreas Gohr        }
19301f06932SAndreas Gohr    }
19401f06932SAndreas Gohr
19501f06932SAndreas Gohr    /**
196ad38c5fdSAndreas Gohr     * Split the given page into chunks and print them
197ad38c5fdSAndreas Gohr     *
198ad38c5fdSAndreas Gohr     * @param string $page
199ad38c5fdSAndreas Gohr     * @return void
200ad38c5fdSAndreas Gohr     * @throws Exception
201ad38c5fdSAndreas Gohr     */
202ad38c5fdSAndreas Gohr    protected function split($page)
203ad38c5fdSAndreas Gohr    {
204ad38c5fdSAndreas Gohr        $text = rawWiki($page);
205ad38c5fdSAndreas Gohr        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
206ad38c5fdSAndreas Gohr        foreach ($chunks as $chunk) {
207ad38c5fdSAndreas Gohr            echo $chunk;
208ad38c5fdSAndreas Gohr            echo "\n";
209ad38c5fdSAndreas Gohr            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
210ad38c5fdSAndreas Gohr        }
211ad38c5fdSAndreas Gohr        $this->success('Split into ' . count($chunks) . ' chunks');
212ad38c5fdSAndreas Gohr    }
213ad38c5fdSAndreas Gohr
214ad38c5fdSAndreas Gohr    /**
215c4584168SAndreas Gohr     * Interactive Chat Session
216c4584168SAndreas Gohr     *
217c4584168SAndreas Gohr     * @return void
218c4584168SAndreas Gohr     * @throws Exception
219c4584168SAndreas Gohr     */
220c4584168SAndreas Gohr    protected function chat()
221c4584168SAndreas Gohr    {
22234a1c478SAndreas Gohr        if ($this->loglevel['debug']['enabled']) {
22334a1c478SAndreas Gohr            $this->helper->getChatModel()->setDebug(true);
224*51aa8517SAndreas Gohr            $this->helper->getRephraseModel()->setDebug(true);
225*51aa8517SAndreas Gohr            $this->helper->getEmbedModel()->setDebug(true);
22634a1c478SAndreas Gohr        }
22734a1c478SAndreas Gohr
228c4584168SAndreas Gohr        $history = [];
229c4584168SAndreas Gohr        while ($q = $this->readLine('Your Question')) {
2306a18e0f4SAndreas Gohr            $this->helper->getChatModel()->resetUsageStats();
231*51aa8517SAndreas Gohr            $this->helper->getRephraseModel()->resetUsageStats();
232*51aa8517SAndreas Gohr            $this->helper->getEmbedModel()->resetUsageStats();
233f6ef2e50SAndreas Gohr            $result = $this->helper->askChatQuestion($q, $history);
234f6ef2e50SAndreas Gohr            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
235f6ef2e50SAndreas Gohr            $history[] = [$result['question'], $result['answer']];
236c4584168SAndreas Gohr            $this->printAnswer($result);
237c4584168SAndreas Gohr        }
238c4584168SAndreas Gohr    }
239c4584168SAndreas Gohr
240e8451b21SAndreas Gohr    protected function models()
241e8451b21SAndreas Gohr    {
242e8451b21SAndreas Gohr        $result = [
243e8451b21SAndreas Gohr            'chat' => [],
244e8451b21SAndreas Gohr            'embedding' => [],
245e8451b21SAndreas Gohr        ];
246e8451b21SAndreas Gohr
247e8451b21SAndreas Gohr
248e8451b21SAndreas Gohr        $jsons = glob(__DIR__ . '/Model/*/models.json');
249e8451b21SAndreas Gohr        foreach ($jsons as $json) {
250e8451b21SAndreas Gohr            $models = json_decode(file_get_contents($json), true);
251e8451b21SAndreas Gohr            foreach ($models as $type => $model) {
252e8451b21SAndreas Gohr                $namespace = basename(dirname($json));
253e8451b21SAndreas Gohr                foreach ($model as $name => $info) {
254e8451b21SAndreas Gohr
255e8451b21SAndreas Gohr
256e8451b21SAndreas Gohr                    $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $namespace . '\\' . ucfirst($type) . 'Model';
257e8451b21SAndreas Gohr                    try {
258e8451b21SAndreas Gohr                        new $class($name, $this->conf);
259e8451b21SAndreas Gohr                        $info['confok'] = true;
260e8451b21SAndreas Gohr                    } catch (Exception $e) {
261e8451b21SAndreas Gohr                        $info['confok'] = false;
262e8451b21SAndreas Gohr                    }
263e8451b21SAndreas Gohr
264e8451b21SAndreas Gohr                    $result[$type]["$namespace $name"] = $info;
265e8451b21SAndreas Gohr                }
266e8451b21SAndreas Gohr            }
267e8451b21SAndreas Gohr        }
268e8451b21SAndreas Gohr
269e8451b21SAndreas Gohr        $td = new TableFormatter($this->colors);
270e8451b21SAndreas Gohr        $cols = [30, 20, 20, '*'];
271e8451b21SAndreas Gohr        echo "==== Chat Models ====\n\n";
272e8451b21SAndreas Gohr        echo $td->format(
273e8451b21SAndreas Gohr            $cols,
274e8451b21SAndreas Gohr            ['Model', 'Token Limits', 'Price USD/M', 'Description'],
275e8451b21SAndreas Gohr            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
276e8451b21SAndreas Gohr        );
277e8451b21SAndreas Gohr        foreach ($result['chat'] as $name => $info) {
278e8451b21SAndreas Gohr            echo $td->format(
279e8451b21SAndreas Gohr                $cols,
280e8451b21SAndreas Gohr                [
281e8451b21SAndreas Gohr                    $name,
282e8451b21SAndreas Gohr                    sprintf(" In: %7d\nOut: %7d", $info['inputTokens'], $info['outputTokens']),
2832045e15aSAndreas Gohr                    sprintf(" In: %.2f\nOut: %.2f", $info['inputTokenPrice'], $info['outputTokenPrice']),
284e8451b21SAndreas Gohr                    $info['description'] . "\n"
285e8451b21SAndreas Gohr                ],
286e8451b21SAndreas Gohr                [
287e8451b21SAndreas Gohr                    $info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
288e8451b21SAndreas Gohr                ]
289e8451b21SAndreas Gohr            );
290e8451b21SAndreas Gohr        }
291e8451b21SAndreas Gohr
29287e46484SAndreas Gohr        $cols = [30, 10, 10, 10, '*'];
293e8451b21SAndreas Gohr        echo "==== Embedding Models ====\n\n";
294e8451b21SAndreas Gohr        echo $td->format(
295e8451b21SAndreas Gohr            $cols,
29687e46484SAndreas Gohr            ['Model', 'Token Limits', 'Price USD/M', 'Dimensions', 'Description'],
29787e46484SAndreas Gohr            [Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE, Colors::C_LIGHTBLUE]
298e8451b21SAndreas Gohr        );
299e8451b21SAndreas Gohr        foreach ($result['embedding'] as $name => $info) {
300e8451b21SAndreas Gohr            echo $td->format(
301e8451b21SAndreas Gohr                $cols,
302e8451b21SAndreas Gohr                [
303e8451b21SAndreas Gohr                    $name,
304e8451b21SAndreas Gohr                    sprintf("%7d", $info['inputTokens']),
305e8451b21SAndreas Gohr                    sprintf("%.2f", $info['inputTokenPrice']),
30687e46484SAndreas Gohr                    $info['dimensions'],
307e8451b21SAndreas Gohr                    $info['description'] . "\n"
308e8451b21SAndreas Gohr                ],
309e8451b21SAndreas Gohr                [
310e8451b21SAndreas Gohr                    $info['confok'] ? Colors::C_LIGHTGREEN : Colors::C_LIGHTRED,
311e8451b21SAndreas Gohr                ]
312e8451b21SAndreas Gohr            );
313e8451b21SAndreas Gohr        }
314e8451b21SAndreas Gohr
315e8451b21SAndreas Gohr        $this->colors->ptln('Current prices may differ', Colors::C_RED);
316e8451b21SAndreas Gohr    }
317e8451b21SAndreas Gohr
318c4584168SAndreas Gohr    /**
319c4584168SAndreas Gohr     * Handle a single, standalone question
320c4584168SAndreas Gohr     *
321c4584168SAndreas Gohr     * @param string $query
322c4584168SAndreas Gohr     * @return void
323c4584168SAndreas Gohr     * @throws Exception
324c4584168SAndreas Gohr     */
325c4584168SAndreas Gohr    protected function ask($query)
326c4584168SAndreas Gohr    {
32734a1c478SAndreas Gohr        if ($this->loglevel['debug']['enabled']) {
32834a1c478SAndreas Gohr            $this->helper->getChatModel()->setDebug(true);
329*51aa8517SAndreas Gohr            $this->helper->getRephraseModel()->setDebug(true);
330*51aa8517SAndreas Gohr            $this->helper->getEmbedModel()->setDebug(true);
33134a1c478SAndreas Gohr        }
33234a1c478SAndreas Gohr
3330337f47fSAndreas Gohr        $result = $this->helper->askQuestion($query);
334c4584168SAndreas Gohr        $this->printAnswer($result);
3357552f1aaSAndreas Gohr    }
3367552f1aaSAndreas Gohr
337c4584168SAndreas Gohr    /**
338c4584168SAndreas Gohr     * Get the pages that are similar to the query
339c4584168SAndreas Gohr     *
340c4584168SAndreas Gohr     * @param string $query
341c4584168SAndreas Gohr     * @return void
342c4584168SAndreas Gohr     */
3438817535bSAndreas Gohr    protected function similar($query)
3448817535bSAndreas Gohr    {
345e33a1d7aSAndreas Gohr        $langlimit = $this->helper->getLanguageLimit();
346e33a1d7aSAndreas Gohr        if ($langlimit) {
347e33a1d7aSAndreas Gohr            $this->info('Limiting results to {lang}', ['lang' => $langlimit]);
348e33a1d7aSAndreas Gohr        }
349e33a1d7aSAndreas Gohr
350e33a1d7aSAndreas Gohr        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query, $langlimit);
351f6ef2e50SAndreas Gohr        $this->printSources($sources);
3528817535bSAndreas Gohr    }
3538817535bSAndreas Gohr
354c4584168SAndreas Gohr    /**
3553379af09SAndreas Gohr     * Run the maintenance tasks
3563379af09SAndreas Gohr     *
3573379af09SAndreas Gohr     * @return void
3583379af09SAndreas Gohr     */
3593379af09SAndreas Gohr    protected function runMaintenance()
3603379af09SAndreas Gohr    {
3613379af09SAndreas Gohr        $start = time();
3623379af09SAndreas Gohr        $this->helper->getStorage()->runMaintenance();
3633379af09SAndreas Gohr        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
3643379af09SAndreas Gohr        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
365e75dc39fSAndreas Gohr
366e75dc39fSAndreas Gohr        $data = $this->helper->getRunData();
367e75dc39fSAndreas Gohr        $data['maintenance ran at'] = time();
368e75dc39fSAndreas Gohr        $this->helper->setRunData($data);
3693379af09SAndreas Gohr    }
3703379af09SAndreas Gohr
3713379af09SAndreas Gohr    /**
372c4584168SAndreas Gohr     * Recreate chunks and embeddings for all pages
373c4584168SAndreas Gohr     *
374c4584168SAndreas Gohr     * @return void
375c4584168SAndreas Gohr     */
3765284515dSAndreas Gohr    protected function createEmbeddings($clear)
3778817535bSAndreas Gohr    {
378d5c102b3SAndreas Gohr        [$skipRE, $matchRE] = $this->getRegexps();
379d5c102b3SAndreas Gohr
3803379af09SAndreas Gohr        $start = time();
381d5c102b3SAndreas Gohr        $this->helper->getEmbeddings()->createNewIndex($skipRE, $matchRE, $clear);
382ad38c5fdSAndreas Gohr        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
3833379af09SAndreas Gohr        $this->notice('Spent time: {time}min', ['time' => round((time() - $start) / 60, 2)]);
384e75dc39fSAndreas Gohr
385e75dc39fSAndreas Gohr        $data = $this->helper->getRunData();
386e75dc39fSAndreas Gohr        $data['embed ran at'] = time();
387e75dc39fSAndreas Gohr        $this->helper->setRunData($data);
3888817535bSAndreas Gohr    }
3898817535bSAndreas Gohr
390c4584168SAndreas Gohr    /**
3918c8b7ba6SAndreas Gohr     * Dump TSV files for debugging
3928c8b7ba6SAndreas Gohr     *
3938c8b7ba6SAndreas Gohr     * @return void
3948c8b7ba6SAndreas Gohr     */
3958c8b7ba6SAndreas Gohr    protected function tsv($vector, $meta)
3968c8b7ba6SAndreas Gohr    {
3978c8b7ba6SAndreas Gohr
3988c8b7ba6SAndreas Gohr        $storage = $this->helper->getStorage();
3998c8b7ba6SAndreas Gohr        $storage->dumpTSV($vector, $meta);
4008c8b7ba6SAndreas Gohr        $this->success('written to ' . $vector . ' and ' . $meta);
4018c8b7ba6SAndreas Gohr    }
4028c8b7ba6SAndreas Gohr
4038c8b7ba6SAndreas Gohr    /**
40455392016SAndreas Gohr     * Print the given detailed answer in a nice way
40555392016SAndreas Gohr     *
40655392016SAndreas Gohr     * @param array $answer
40755392016SAndreas Gohr     * @return void
40855392016SAndreas Gohr     */
40955392016SAndreas Gohr    protected function printAnswer($answer)
41055392016SAndreas Gohr    {
41155392016SAndreas Gohr        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
41255392016SAndreas Gohr        echo "\n";
413f6ef2e50SAndreas Gohr        $this->printSources($answer['sources']);
41455392016SAndreas Gohr        echo "\n";
41555392016SAndreas Gohr        $this->printUsage();
41655392016SAndreas Gohr    }
41755392016SAndreas Gohr
41855392016SAndreas Gohr    /**
419f6ef2e50SAndreas Gohr     * Print the given sources
420f6ef2e50SAndreas Gohr     *
421f6ef2e50SAndreas Gohr     * @param Chunk[] $sources
422f6ef2e50SAndreas Gohr     * @return void
423f6ef2e50SAndreas Gohr     */
424f6ef2e50SAndreas Gohr    protected function printSources($sources)
425f6ef2e50SAndreas Gohr    {
426f6ef2e50SAndreas Gohr        foreach ($sources as $source) {
427f6ef2e50SAndreas Gohr            /** @var Chunk $source */
4289b3d1b36SAndreas Gohr            $this->colors->ptln(
4299b3d1b36SAndreas Gohr                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
4309b3d1b36SAndreas Gohr                Colors::C_LIGHTBLUE
4319b3d1b36SAndreas Gohr            );
432f6ef2e50SAndreas Gohr        }
433f6ef2e50SAndreas Gohr    }
434f6ef2e50SAndreas Gohr
435f6ef2e50SAndreas Gohr    /**
43655392016SAndreas Gohr     * Print the usage statistics for OpenAI
43755392016SAndreas Gohr     *
43855392016SAndreas Gohr     * @return void
43955392016SAndreas Gohr     */
440f6ef2e50SAndreas Gohr    protected function printUsage()
441f6ef2e50SAndreas Gohr    {
442*51aa8517SAndreas Gohr        $chat = $this->helper->getChatModel()->getUsageStats();
443*51aa8517SAndreas Gohr        $rephrase = $this->helper->getRephraseModel()->getUsageStats();
444*51aa8517SAndreas Gohr        $embed = $this->helper->getEmbedModel()->getUsageStats();
445*51aa8517SAndreas Gohr
44655392016SAndreas Gohr        $this->info(
447*51aa8517SAndreas Gohr            'Made {requests} requests in {time}s to models. Used {tokens} tokens for about ${cost}.',
448*51aa8517SAndreas Gohr            [
449*51aa8517SAndreas Gohr                'requests' => $chat['requests'] + $rephrase['requests'] + $embed['requests'],
450*51aa8517SAndreas Gohr                'time' => $chat['time'] + $rephrase['time'] + $embed['time'],
451*51aa8517SAndreas Gohr                'tokens' => $chat['tokens'] + $chat['tokens'] + $embed['tokens'],
452*51aa8517SAndreas Gohr                'cost' => $chat['cost'] + $chat['cost'] + $embed['cost'],
453*51aa8517SAndreas Gohr            ]
45455392016SAndreas Gohr        );
45555392016SAndreas Gohr    }
45655392016SAndreas Gohr
45755392016SAndreas Gohr    /**
458c4584168SAndreas Gohr     * Interactively ask for a value from the user
459c4584168SAndreas Gohr     *
460c4584168SAndreas Gohr     * @param string $prompt
461c4584168SAndreas Gohr     * @return string
462c4584168SAndreas Gohr     */
463c4584168SAndreas Gohr    protected function readLine($prompt)
464c4584168SAndreas Gohr    {
465c4584168SAndreas Gohr        $value = '';
4668817535bSAndreas Gohr
467c4584168SAndreas Gohr        while ($value === '') {
468c4584168SAndreas Gohr            echo $prompt;
469c4584168SAndreas Gohr            echo ': ';
470c4584168SAndreas Gohr
471c4584168SAndreas Gohr            $fh = fopen('php://stdin', 'r');
472c4584168SAndreas Gohr            $value = trim(fgets($fh));
473c4584168SAndreas Gohr            fclose($fh);
474c4584168SAndreas Gohr        }
475c4584168SAndreas Gohr
476c4584168SAndreas Gohr        return $value;
477c4584168SAndreas Gohr    }
478d5c102b3SAndreas Gohr
479d5c102b3SAndreas Gohr    /**
480d5c102b3SAndreas Gohr     * Read the skip and match regex from the config
481d5c102b3SAndreas Gohr     *
482d5c102b3SAndreas Gohr     * Ensures the regular expressions are valid
483d5c102b3SAndreas Gohr     *
484d5c102b3SAndreas Gohr     * @return string[] [$skipRE, $matchRE]
485d5c102b3SAndreas Gohr     */
486d5c102b3SAndreas Gohr    protected function getRegexps()
487d5c102b3SAndreas Gohr    {
488d5c102b3SAndreas Gohr        $skip = $this->getConf('skipRegex');
489d5c102b3SAndreas Gohr        $skipRE = '';
490d5c102b3SAndreas Gohr        $match = $this->getConf('matchRegex');
491d5c102b3SAndreas Gohr        $matchRE = '';
492d5c102b3SAndreas Gohr
493d5c102b3SAndreas Gohr        if ($skip) {
494d5c102b3SAndreas Gohr            $skipRE = '/' . $skip . '/';
49549a7d3ccSsplitbrain            if (@preg_match($skipRE, '') === false) {
496d5c102b3SAndreas Gohr                $this->error(preg_last_error_msg());
497d5c102b3SAndreas Gohr                $this->error('Invalid regular expression in $conf[\'skipRegex\']. Ignored.');
498d5c102b3SAndreas Gohr                $skipRE = '';
499d5c102b3SAndreas Gohr            } else {
500d5c102b3SAndreas Gohr                $this->success('Skipping pages matching ' . $skipRE);
501d5c102b3SAndreas Gohr            }
502d5c102b3SAndreas Gohr        }
503d5c102b3SAndreas Gohr
504d5c102b3SAndreas Gohr        if ($match) {
505d5c102b3SAndreas Gohr            $matchRE = '/' . $match . '/';
50649a7d3ccSsplitbrain            if (@preg_match($matchRE, '') === false) {
507d5c102b3SAndreas Gohr                $this->error(preg_last_error_msg());
508d5c102b3SAndreas Gohr                $this->error('Invalid regular expression in $conf[\'matchRegex\']. Ignored.');
509d5c102b3SAndreas Gohr                $matchRE = '';
510d5c102b3SAndreas Gohr            } else {
511d5c102b3SAndreas Gohr                $this->success('Only indexing pages matching ' . $matchRE);
512d5c102b3SAndreas Gohr            }
513d5c102b3SAndreas Gohr        }
514d5c102b3SAndreas Gohr        return [$skipRE, $matchRE];
515d5c102b3SAndreas Gohr    }
5168817535bSAndreas Gohr}
517