xref: /plugin/aichat/cli.php (revision 911314cdcf61977f3dcec01a4980522271a96e1c)
18817535bSAndreas Gohr<?php
28817535bSAndreas Gohr
3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
4f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
5c4584168SAndreas Gohruse splitbrain\phpcli\Colors;
68817535bSAndreas Gohruse splitbrain\phpcli\Options;
78817535bSAndreas Gohr
88817535bSAndreas Gohr
98817535bSAndreas Gohr/**
108817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component)
118817535bSAndreas Gohr *
128817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
138817535bSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
148817535bSAndreas Gohr */
15f6ef2e50SAndreas Gohrclass cli_plugin_aichat extends CLIPlugin
168817535bSAndreas Gohr{
170337f47fSAndreas Gohr    /** @var helper_plugin_aichat */
180337f47fSAndreas Gohr    protected $helper;
190337f47fSAndreas Gohr
200337f47fSAndreas Gohr    public function __construct($autocatch = true)
210337f47fSAndreas Gohr    {
220337f47fSAndreas Gohr        parent::__construct($autocatch);
230337f47fSAndreas Gohr        $this->helper = plugin_load('helper', 'aichat');
242ecc089aSAndreas Gohr        $this->helper->getEmbeddings()->setLogger($this);
250337f47fSAndreas Gohr    }
260337f47fSAndreas Gohr
278817535bSAndreas Gohr    /** @inheritDoc */
288817535bSAndreas Gohr    protected function setup(Options $options)
298817535bSAndreas Gohr    {
30bddd899cSAndreas Gohr        $options->useCompactHelp();
31bddd899cSAndreas Gohr
325284515dSAndreas Gohr        $options->setHelp(
335284515dSAndreas Gohr            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
345284515dSAndreas Gohr            'This may incur costs.'
355284515dSAndreas Gohr        );
368817535bSAndreas Gohr
375284515dSAndreas Gohr        $options->registerCommand(
385284515dSAndreas Gohr            'embed',
395284515dSAndreas Gohr            'Create embeddings for all pages. This skips pages that already have embeddings'
405284515dSAndreas Gohr        );
415284515dSAndreas Gohr        $options->registerOption(
425284515dSAndreas Gohr            'clear',
435284515dSAndreas Gohr            'Clear all existing embeddings before creating new ones',
445284515dSAndreas Gohr            'c', false, 'embed'
455284515dSAndreas Gohr        );
468817535bSAndreas Gohr
478817535bSAndreas Gohr        $options->registerCommand('similar', 'Search for similar pages');
488817535bSAndreas Gohr        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
498817535bSAndreas Gohr
508817535bSAndreas Gohr        $options->registerCommand('ask', 'Ask a question');
518817535bSAndreas Gohr        $options->registerArgument('question', 'The question to ask', true, 'ask');
52c4584168SAndreas Gohr
53c4584168SAndreas Gohr        $options->registerCommand('chat', 'Start an interactive chat session');
54ad38c5fdSAndreas Gohr
55ad38c5fdSAndreas Gohr        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
56ad38c5fdSAndreas Gohr        $options->registerArgument('page', 'The page to split', true, 'split');
575786be46SAndreas Gohr
58bddd899cSAndreas Gohr        $options->registerCommand('info', 'Get Info about the vector storage');
598817535bSAndreas Gohr    }
608817535bSAndreas Gohr
618817535bSAndreas Gohr    /** @inheritDoc */
628817535bSAndreas Gohr    protected function main(Options $options)
638817535bSAndreas Gohr    {
648817535bSAndreas Gohr        switch ($options->getCmd()) {
658817535bSAndreas Gohr
668817535bSAndreas Gohr            case 'embed':
675284515dSAndreas Gohr                $this->createEmbeddings($options->getOpt('clear'));
688817535bSAndreas Gohr                break;
698817535bSAndreas Gohr            case 'similar':
708817535bSAndreas Gohr                $this->similar($options->getArgs()[0]);
718817535bSAndreas Gohr                break;
727552f1aaSAndreas Gohr            case 'ask':
737552f1aaSAndreas Gohr                $this->ask($options->getArgs()[0]);
747552f1aaSAndreas Gohr                break;
75c4584168SAndreas Gohr            case 'chat':
76c4584168SAndreas Gohr                $this->chat();
77c4584168SAndreas Gohr                break;
78ad38c5fdSAndreas Gohr            case 'split':
79ad38c5fdSAndreas Gohr                $this->split($options->getArgs()[0]);
80ad38c5fdSAndreas Gohr                break;
815786be46SAndreas Gohr            case 'info':
82f6ef2e50SAndreas Gohr                $this->showinfo();
835786be46SAndreas Gohr                break;
848817535bSAndreas Gohr            default:
858817535bSAndreas Gohr                echo $options->help();
868817535bSAndreas Gohr        }
878817535bSAndreas Gohr    }
888817535bSAndreas Gohr
89c4584168SAndreas Gohr    /**
905786be46SAndreas Gohr     * @return void
915786be46SAndreas Gohr     */
92f6ef2e50SAndreas Gohr    protected function showinfo()
935786be46SAndreas Gohr    {
94f6ef2e50SAndreas Gohr        echo 'model: ' . $this->getConf('model') . "\n";
957ee8b02dSAndreas Gohr        $stats = $this->helper->getEmbeddings()->getStorage()->statistics();
967ee8b02dSAndreas Gohr        foreach ($stats as $key => $value) {
977ee8b02dSAndreas Gohr            echo $key . ': ' . $value . "\n";
987ee8b02dSAndreas Gohr        }
99*911314cdSAndreas Gohr
100*911314cdSAndreas Gohr        //echo $this->helper->getModel()->listUpstreamModels();
1015786be46SAndreas Gohr    }
1025786be46SAndreas Gohr
1035786be46SAndreas Gohr    /**
104ad38c5fdSAndreas Gohr     * Split the given page into chunks and print them
105ad38c5fdSAndreas Gohr     *
106ad38c5fdSAndreas Gohr     * @param string $page
107ad38c5fdSAndreas Gohr     * @return void
108ad38c5fdSAndreas Gohr     * @throws Exception
109ad38c5fdSAndreas Gohr     */
110ad38c5fdSAndreas Gohr    protected function split($page)
111ad38c5fdSAndreas Gohr    {
112ad38c5fdSAndreas Gohr        $text = rawWiki($page);
113ad38c5fdSAndreas Gohr        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
114ad38c5fdSAndreas Gohr        foreach ($chunks as $chunk) {
115ad38c5fdSAndreas Gohr            echo $chunk;
116ad38c5fdSAndreas Gohr            echo "\n";
117ad38c5fdSAndreas Gohr            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
118ad38c5fdSAndreas Gohr        }
119ad38c5fdSAndreas Gohr        $this->success('Split into ' . count($chunks) . ' chunks');
120ad38c5fdSAndreas Gohr    }
121ad38c5fdSAndreas Gohr
122ad38c5fdSAndreas Gohr    /**
123c4584168SAndreas Gohr     * Interactive Chat Session
124c4584168SAndreas Gohr     *
125c4584168SAndreas Gohr     * @return void
126c4584168SAndreas Gohr     * @throws Exception
127c4584168SAndreas Gohr     */
128c4584168SAndreas Gohr    protected function chat()
129c4584168SAndreas Gohr    {
130c4584168SAndreas Gohr        $history = [];
131c4584168SAndreas Gohr        while ($q = $this->readLine('Your Question')) {
132f6ef2e50SAndreas Gohr            $this->helper->getModel()->resetUsageStats();
133f6ef2e50SAndreas Gohr            $result = $this->helper->askChatQuestion($q, $history);
134f6ef2e50SAndreas Gohr            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
135f6ef2e50SAndreas Gohr            $history[] = [$result['question'], $result['answer']];
136c4584168SAndreas Gohr            $this->printAnswer($result);
137c4584168SAndreas Gohr        }
138c4584168SAndreas Gohr    }
139c4584168SAndreas Gohr
140c4584168SAndreas Gohr    /**
141c4584168SAndreas Gohr     * Handle a single, standalone question
142c4584168SAndreas Gohr     *
143c4584168SAndreas Gohr     * @param string $query
144c4584168SAndreas Gohr     * @return void
145c4584168SAndreas Gohr     * @throws Exception
146c4584168SAndreas Gohr     */
147c4584168SAndreas Gohr    protected function ask($query)
148c4584168SAndreas Gohr    {
1490337f47fSAndreas Gohr        $result = $this->helper->askQuestion($query);
150c4584168SAndreas Gohr        $this->printAnswer($result);
1517552f1aaSAndreas Gohr    }
1527552f1aaSAndreas Gohr
153c4584168SAndreas Gohr    /**
154c4584168SAndreas Gohr     * Get the pages that are similar to the query
155c4584168SAndreas Gohr     *
156c4584168SAndreas Gohr     * @param string $query
157c4584168SAndreas Gohr     * @return void
158c4584168SAndreas Gohr     */
1598817535bSAndreas Gohr    protected function similar($query)
1608817535bSAndreas Gohr    {
1610337f47fSAndreas Gohr        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query);
162f6ef2e50SAndreas Gohr        $this->printSources($sources);
1638817535bSAndreas Gohr    }
1648817535bSAndreas Gohr
165c4584168SAndreas Gohr    /**
166c4584168SAndreas Gohr     * Recreate chunks and embeddings for all pages
167c4584168SAndreas Gohr     *
168c4584168SAndreas Gohr     * @return void
169ad38c5fdSAndreas Gohr     * @todo make skip regex configurable
170c4584168SAndreas Gohr     */
1715284515dSAndreas Gohr    protected function createEmbeddings($clear)
1728817535bSAndreas Gohr    {
173ad38c5fdSAndreas Gohr        ini_set('memory_limit', -1); // we may need a lot of memory here
1745284515dSAndreas Gohr        $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear);
175ad38c5fdSAndreas Gohr        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
1768817535bSAndreas Gohr    }
1778817535bSAndreas Gohr
178c4584168SAndreas Gohr    /**
17955392016SAndreas Gohr     * Print the given detailed answer in a nice way
18055392016SAndreas Gohr     *
18155392016SAndreas Gohr     * @param array $answer
18255392016SAndreas Gohr     * @return void
18355392016SAndreas Gohr     */
18455392016SAndreas Gohr    protected function printAnswer($answer)
18555392016SAndreas Gohr    {
18655392016SAndreas Gohr        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
18755392016SAndreas Gohr        echo "\n";
188f6ef2e50SAndreas Gohr        $this->printSources($answer['sources']);
18955392016SAndreas Gohr        echo "\n";
19055392016SAndreas Gohr        $this->printUsage();
19155392016SAndreas Gohr    }
19255392016SAndreas Gohr
19355392016SAndreas Gohr    /**
194f6ef2e50SAndreas Gohr     * Print the given sources
195f6ef2e50SAndreas Gohr     *
196f6ef2e50SAndreas Gohr     * @param Chunk[] $sources
197f6ef2e50SAndreas Gohr     * @return void
198f6ef2e50SAndreas Gohr     */
199f6ef2e50SAndreas Gohr    protected function printSources($sources)
200f6ef2e50SAndreas Gohr    {
201f6ef2e50SAndreas Gohr        foreach ($sources as $source) {
202f6ef2e50SAndreas Gohr            /** @var Chunk $source */
2039b3d1b36SAndreas Gohr            $this->colors->ptln(
2049b3d1b36SAndreas Gohr                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
2059b3d1b36SAndreas Gohr                Colors::C_LIGHTBLUE
2069b3d1b36SAndreas Gohr            );
207f6ef2e50SAndreas Gohr        }
208f6ef2e50SAndreas Gohr    }
209f6ef2e50SAndreas Gohr
210f6ef2e50SAndreas Gohr    /**
21155392016SAndreas Gohr     * Print the usage statistics for OpenAI
21255392016SAndreas Gohr     *
21355392016SAndreas Gohr     * @return void
21455392016SAndreas Gohr     */
215f6ef2e50SAndreas Gohr    protected function printUsage()
216f6ef2e50SAndreas Gohr    {
21755392016SAndreas Gohr        $this->info(
218f6ef2e50SAndreas Gohr            'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.',
219f6ef2e50SAndreas Gohr            $this->helper->getModel()->getUsageStats()
22055392016SAndreas Gohr        );
22155392016SAndreas Gohr    }
22255392016SAndreas Gohr
22355392016SAndreas Gohr    /**
224c4584168SAndreas Gohr     * Interactively ask for a value from the user
225c4584168SAndreas Gohr     *
226c4584168SAndreas Gohr     * @param string $prompt
227c4584168SAndreas Gohr     * @return string
228c4584168SAndreas Gohr     */
229c4584168SAndreas Gohr    protected function readLine($prompt)
230c4584168SAndreas Gohr    {
231c4584168SAndreas Gohr        $value = '';
2328817535bSAndreas Gohr
233c4584168SAndreas Gohr        while ($value === '') {
234c4584168SAndreas Gohr            echo $prompt;
235c4584168SAndreas Gohr            echo ': ';
236c4584168SAndreas Gohr
237c4584168SAndreas Gohr            $fh = fopen('php://stdin', 'r');
238c4584168SAndreas Gohr            $value = trim(fgets($fh));
239c4584168SAndreas Gohr            fclose($fh);
240c4584168SAndreas Gohr        }
241c4584168SAndreas Gohr
242c4584168SAndreas Gohr        return $value;
243c4584168SAndreas Gohr    }
2448817535bSAndreas Gohr}
2458817535bSAndreas Gohr
246