xref: /plugin/aichat/cli.php (revision 01f06932bbd74c60ea6c93ab68b0d6cf32d05aea)
18817535bSAndreas Gohr<?php
28817535bSAndreas Gohr
3f6ef2e50SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
4f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
5*01f06932SAndreas Gohruse dokuwiki\Search\Indexer;
6c4584168SAndreas Gohruse splitbrain\phpcli\Colors;
78817535bSAndreas Gohruse splitbrain\phpcli\Options;
88817535bSAndreas Gohr
98817535bSAndreas Gohr
108817535bSAndreas Gohr/**
118817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component)
128817535bSAndreas Gohr *
138817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
148817535bSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
158817535bSAndreas Gohr */
16f6ef2e50SAndreas Gohrclass cli_plugin_aichat extends CLIPlugin
178817535bSAndreas Gohr{
180337f47fSAndreas Gohr    /** @var helper_plugin_aichat */
190337f47fSAndreas Gohr    protected $helper;
200337f47fSAndreas Gohr
210337f47fSAndreas Gohr    public function __construct($autocatch = true)
220337f47fSAndreas Gohr    {
230337f47fSAndreas Gohr        parent::__construct($autocatch);
240337f47fSAndreas Gohr        $this->helper = plugin_load('helper', 'aichat');
252ecc089aSAndreas Gohr        $this->helper->getEmbeddings()->setLogger($this);
260337f47fSAndreas Gohr    }
270337f47fSAndreas Gohr
288817535bSAndreas Gohr    /** @inheritDoc */
298817535bSAndreas Gohr    protected function setup(Options $options)
308817535bSAndreas Gohr    {
31bddd899cSAndreas Gohr        $options->useCompactHelp();
32bddd899cSAndreas Gohr
335284515dSAndreas Gohr        $options->setHelp(
345284515dSAndreas Gohr            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
355284515dSAndreas Gohr            'This may incur costs.'
365284515dSAndreas Gohr        );
378817535bSAndreas Gohr
385284515dSAndreas Gohr        $options->registerCommand(
395284515dSAndreas Gohr            'embed',
405284515dSAndreas Gohr            'Create embeddings for all pages. This skips pages that already have embeddings'
415284515dSAndreas Gohr        );
425284515dSAndreas Gohr        $options->registerOption(
435284515dSAndreas Gohr            'clear',
445284515dSAndreas Gohr            'Clear all existing embeddings before creating new ones',
455284515dSAndreas Gohr            'c', false, 'embed'
465284515dSAndreas Gohr        );
478817535bSAndreas Gohr
488817535bSAndreas Gohr        $options->registerCommand('similar', 'Search for similar pages');
498817535bSAndreas Gohr        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
508817535bSAndreas Gohr
518817535bSAndreas Gohr        $options->registerCommand('ask', 'Ask a question');
528817535bSAndreas Gohr        $options->registerArgument('question', 'The question to ask', true, 'ask');
53c4584168SAndreas Gohr
54c4584168SAndreas Gohr        $options->registerCommand('chat', 'Start an interactive chat session');
55ad38c5fdSAndreas Gohr
56ad38c5fdSAndreas Gohr        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
57ad38c5fdSAndreas Gohr        $options->registerArgument('page', 'The page to split', true, 'split');
585786be46SAndreas Gohr
59*01f06932SAndreas Gohr        $options->registerCommand('page', 'Check if chunks for a given page are available (for debugging)');
60*01f06932SAndreas Gohr        $options->registerArgument('page', 'The page to check', true, 'page');
61*01f06932SAndreas Gohr
62bddd899cSAndreas Gohr        $options->registerCommand('info', 'Get Info about the vector storage');
638817535bSAndreas Gohr    }
648817535bSAndreas Gohr
658817535bSAndreas Gohr    /** @inheritDoc */
668817535bSAndreas Gohr    protected function main(Options $options)
678817535bSAndreas Gohr    {
688817535bSAndreas Gohr        switch ($options->getCmd()) {
698817535bSAndreas Gohr
708817535bSAndreas Gohr            case 'embed':
715284515dSAndreas Gohr                $this->createEmbeddings($options->getOpt('clear'));
728817535bSAndreas Gohr                break;
738817535bSAndreas Gohr            case 'similar':
748817535bSAndreas Gohr                $this->similar($options->getArgs()[0]);
758817535bSAndreas Gohr                break;
767552f1aaSAndreas Gohr            case 'ask':
777552f1aaSAndreas Gohr                $this->ask($options->getArgs()[0]);
787552f1aaSAndreas Gohr                break;
79c4584168SAndreas Gohr            case 'chat':
80c4584168SAndreas Gohr                $this->chat();
81c4584168SAndreas Gohr                break;
82ad38c5fdSAndreas Gohr            case 'split':
83ad38c5fdSAndreas Gohr                $this->split($options->getArgs()[0]);
84ad38c5fdSAndreas Gohr                break;
85*01f06932SAndreas Gohr            case 'page':
86*01f06932SAndreas Gohr                $this->page($options->getArgs()[0]);
87*01f06932SAndreas Gohr                break;
885786be46SAndreas Gohr            case 'info':
89f6ef2e50SAndreas Gohr                $this->showinfo();
905786be46SAndreas Gohr                break;
918817535bSAndreas Gohr            default:
928817535bSAndreas Gohr                echo $options->help();
938817535bSAndreas Gohr        }
948817535bSAndreas Gohr    }
958817535bSAndreas Gohr
96c4584168SAndreas Gohr    /**
975786be46SAndreas Gohr     * @return void
985786be46SAndreas Gohr     */
99f6ef2e50SAndreas Gohr    protected function showinfo()
1005786be46SAndreas Gohr    {
101f6ef2e50SAndreas Gohr        echo 'model: ' . $this->getConf('model') . "\n";
102*01f06932SAndreas Gohr        $stats = $this->helper->getStorage()->statistics();
1037ee8b02dSAndreas Gohr        foreach ($stats as $key => $value) {
1047ee8b02dSAndreas Gohr            echo $key . ': ' . $value . "\n";
1057ee8b02dSAndreas Gohr        }
106911314cdSAndreas Gohr
107911314cdSAndreas Gohr        //echo $this->helper->getModel()->listUpstreamModels();
1085786be46SAndreas Gohr    }
1095786be46SAndreas Gohr
1105786be46SAndreas Gohr    /**
111*01f06932SAndreas Gohr     * Check chunk availability for a given page
112*01f06932SAndreas Gohr     *
113*01f06932SAndreas Gohr     * @param string $page
114*01f06932SAndreas Gohr     * @return void
115*01f06932SAndreas Gohr     */
116*01f06932SAndreas Gohr    protected function page($page)
117*01f06932SAndreas Gohr    {
118*01f06932SAndreas Gohr        $indexer = new Indexer();
119*01f06932SAndreas Gohr        $pages = $indexer->getPages();
120*01f06932SAndreas Gohr        $pos = array_search(cleanID($page), $pages);
121*01f06932SAndreas Gohr
122*01f06932SAndreas Gohr        if ($pos === false) {
123*01f06932SAndreas Gohr            $this->error('Page not found');
124*01f06932SAndreas Gohr            return;
125*01f06932SAndreas Gohr        }
126*01f06932SAndreas Gohr
127*01f06932SAndreas Gohr        $storage = $this->helper->getStorage();
128*01f06932SAndreas Gohr        $chunks = $storage->getPageChunks($page, $pos * 100);
129*01f06932SAndreas Gohr        if ($chunks) {
130*01f06932SAndreas Gohr            $this->success('Found ' . count($chunks) . ' chunks');
131*01f06932SAndreas Gohr        } else {
132*01f06932SAndreas Gohr            $this->error('No chunks found');
133*01f06932SAndreas Gohr        }
134*01f06932SAndreas Gohr    }
135*01f06932SAndreas Gohr
136*01f06932SAndreas Gohr    /**
137ad38c5fdSAndreas Gohr     * Split the given page into chunks and print them
138ad38c5fdSAndreas Gohr     *
139ad38c5fdSAndreas Gohr     * @param string $page
140ad38c5fdSAndreas Gohr     * @return void
141ad38c5fdSAndreas Gohr     * @throws Exception
142ad38c5fdSAndreas Gohr     */
143ad38c5fdSAndreas Gohr    protected function split($page)
144ad38c5fdSAndreas Gohr    {
145ad38c5fdSAndreas Gohr        $text = rawWiki($page);
146ad38c5fdSAndreas Gohr        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
147ad38c5fdSAndreas Gohr        foreach ($chunks as $chunk) {
148ad38c5fdSAndreas Gohr            echo $chunk;
149ad38c5fdSAndreas Gohr            echo "\n";
150ad38c5fdSAndreas Gohr            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
151ad38c5fdSAndreas Gohr        }
152ad38c5fdSAndreas Gohr        $this->success('Split into ' . count($chunks) . ' chunks');
153ad38c5fdSAndreas Gohr    }
154ad38c5fdSAndreas Gohr
155ad38c5fdSAndreas Gohr    /**
156c4584168SAndreas Gohr     * Interactive Chat Session
157c4584168SAndreas Gohr     *
158c4584168SAndreas Gohr     * @return void
159c4584168SAndreas Gohr     * @throws Exception
160c4584168SAndreas Gohr     */
161c4584168SAndreas Gohr    protected function chat()
162c4584168SAndreas Gohr    {
163c4584168SAndreas Gohr        $history = [];
164c4584168SAndreas Gohr        while ($q = $this->readLine('Your Question')) {
165f6ef2e50SAndreas Gohr            $this->helper->getModel()->resetUsageStats();
166f6ef2e50SAndreas Gohr            $result = $this->helper->askChatQuestion($q, $history);
167f6ef2e50SAndreas Gohr            $this->colors->ptln("Interpretation: {$result['question']}", Colors::C_LIGHTPURPLE);
168f6ef2e50SAndreas Gohr            $history[] = [$result['question'], $result['answer']];
169c4584168SAndreas Gohr            $this->printAnswer($result);
170c4584168SAndreas Gohr        }
171c4584168SAndreas Gohr    }
172c4584168SAndreas Gohr
173c4584168SAndreas Gohr    /**
174c4584168SAndreas Gohr     * Handle a single, standalone question
175c4584168SAndreas Gohr     *
176c4584168SAndreas Gohr     * @param string $query
177c4584168SAndreas Gohr     * @return void
178c4584168SAndreas Gohr     * @throws Exception
179c4584168SAndreas Gohr     */
180c4584168SAndreas Gohr    protected function ask($query)
181c4584168SAndreas Gohr    {
1820337f47fSAndreas Gohr        $result = $this->helper->askQuestion($query);
183c4584168SAndreas Gohr        $this->printAnswer($result);
1847552f1aaSAndreas Gohr    }
1857552f1aaSAndreas Gohr
186c4584168SAndreas Gohr    /**
187c4584168SAndreas Gohr     * Get the pages that are similar to the query
188c4584168SAndreas Gohr     *
189c4584168SAndreas Gohr     * @param string $query
190c4584168SAndreas Gohr     * @return void
191c4584168SAndreas Gohr     */
1928817535bSAndreas Gohr    protected function similar($query)
1938817535bSAndreas Gohr    {
1940337f47fSAndreas Gohr        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query);
195f6ef2e50SAndreas Gohr        $this->printSources($sources);
1968817535bSAndreas Gohr    }
1978817535bSAndreas Gohr
198c4584168SAndreas Gohr    /**
199c4584168SAndreas Gohr     * Recreate chunks and embeddings for all pages
200c4584168SAndreas Gohr     *
201c4584168SAndreas Gohr     * @return void
202ad38c5fdSAndreas Gohr     * @todo make skip regex configurable
203c4584168SAndreas Gohr     */
2045284515dSAndreas Gohr    protected function createEmbeddings($clear)
2058817535bSAndreas Gohr    {
206ad38c5fdSAndreas Gohr        ini_set('memory_limit', -1); // we may need a lot of memory here
2075284515dSAndreas Gohr        $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear);
208ad38c5fdSAndreas Gohr        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
2098817535bSAndreas Gohr    }
2108817535bSAndreas Gohr
211c4584168SAndreas Gohr    /**
21255392016SAndreas Gohr     * Print the given detailed answer in a nice way
21355392016SAndreas Gohr     *
21455392016SAndreas Gohr     * @param array $answer
21555392016SAndreas Gohr     * @return void
21655392016SAndreas Gohr     */
21755392016SAndreas Gohr    protected function printAnswer($answer)
21855392016SAndreas Gohr    {
21955392016SAndreas Gohr        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
22055392016SAndreas Gohr        echo "\n";
221f6ef2e50SAndreas Gohr        $this->printSources($answer['sources']);
22255392016SAndreas Gohr        echo "\n";
22355392016SAndreas Gohr        $this->printUsage();
22455392016SAndreas Gohr    }
22555392016SAndreas Gohr
22655392016SAndreas Gohr    /**
227f6ef2e50SAndreas Gohr     * Print the given sources
228f6ef2e50SAndreas Gohr     *
229f6ef2e50SAndreas Gohr     * @param Chunk[] $sources
230f6ef2e50SAndreas Gohr     * @return void
231f6ef2e50SAndreas Gohr     */
232f6ef2e50SAndreas Gohr    protected function printSources($sources)
233f6ef2e50SAndreas Gohr    {
234f6ef2e50SAndreas Gohr        foreach ($sources as $source) {
235f6ef2e50SAndreas Gohr            /** @var Chunk $source */
2369b3d1b36SAndreas Gohr            $this->colors->ptln(
2379b3d1b36SAndreas Gohr                "\t" . $source->getPage() . ' ' . $source->getId() . ' (' . $source->getScore() . ')',
2389b3d1b36SAndreas Gohr                Colors::C_LIGHTBLUE
2399b3d1b36SAndreas Gohr            );
240f6ef2e50SAndreas Gohr        }
241f6ef2e50SAndreas Gohr    }
242f6ef2e50SAndreas Gohr
243f6ef2e50SAndreas Gohr    /**
24455392016SAndreas Gohr     * Print the usage statistics for OpenAI
24555392016SAndreas Gohr     *
24655392016SAndreas Gohr     * @return void
24755392016SAndreas Gohr     */
248f6ef2e50SAndreas Gohr    protected function printUsage()
249f6ef2e50SAndreas Gohr    {
25055392016SAndreas Gohr        $this->info(
251f6ef2e50SAndreas Gohr            'Made {requests} requests in {time}s to Model. Used {tokens} tokens for about ${cost}.',
252f6ef2e50SAndreas Gohr            $this->helper->getModel()->getUsageStats()
25355392016SAndreas Gohr        );
25455392016SAndreas Gohr    }
25555392016SAndreas Gohr
25655392016SAndreas Gohr    /**
257c4584168SAndreas Gohr     * Interactively ask for a value from the user
258c4584168SAndreas Gohr     *
259c4584168SAndreas Gohr     * @param string $prompt
260c4584168SAndreas Gohr     * @return string
261c4584168SAndreas Gohr     */
262c4584168SAndreas Gohr    protected function readLine($prompt)
263c4584168SAndreas Gohr    {
264c4584168SAndreas Gohr        $value = '';
2658817535bSAndreas Gohr
266c4584168SAndreas Gohr        while ($value === '') {
267c4584168SAndreas Gohr            echo $prompt;
268c4584168SAndreas Gohr            echo ': ';
269c4584168SAndreas Gohr
270c4584168SAndreas Gohr            $fh = fopen('php://stdin', 'r');
271c4584168SAndreas Gohr            $value = trim(fgets($fh));
272c4584168SAndreas Gohr            fclose($fh);
273c4584168SAndreas Gohr        }
274c4584168SAndreas Gohr
275c4584168SAndreas Gohr        return $value;
276c4584168SAndreas Gohr    }
2778817535bSAndreas Gohr}
2788817535bSAndreas Gohr
279