xref: /plugin/aichat/cli.php (revision 553920162c56b922c6ae5be71ff4442e666a63d3)
18817535bSAndreas Gohr<?php
28817535bSAndreas Gohr
3bddd899cSAndreas Gohruse dokuwiki\plugin\aichat\backend\Chunk;
4c4584168SAndreas Gohruse splitbrain\phpcli\Colors;
58817535bSAndreas Gohruse splitbrain\phpcli\Options;
68817535bSAndreas Gohr
78817535bSAndreas Gohr
88817535bSAndreas Gohr/**
98817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component)
108817535bSAndreas Gohr *
118817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
128817535bSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
138817535bSAndreas Gohr */
148817535bSAndreas Gohrclass cli_plugin_aichat extends \dokuwiki\Extension\CLIPlugin
158817535bSAndreas Gohr{
160337f47fSAndreas Gohr    /** @var helper_plugin_aichat */
170337f47fSAndreas Gohr    protected $helper;
180337f47fSAndreas Gohr
190337f47fSAndreas Gohr    public function __construct($autocatch = true)
200337f47fSAndreas Gohr    {
210337f47fSAndreas Gohr        parent::__construct($autocatch);
220337f47fSAndreas Gohr        $this->helper = plugin_load('helper', 'aichat');
232ecc089aSAndreas Gohr        $this->helper->getEmbeddings()->setLogger($this);
240337f47fSAndreas Gohr    }
250337f47fSAndreas Gohr
268817535bSAndreas Gohr    /** @inheritDoc */
278817535bSAndreas Gohr    protected function setup(Options $options)
288817535bSAndreas Gohr    {
29bddd899cSAndreas Gohr        $options->useCompactHelp();
30bddd899cSAndreas Gohr
315284515dSAndreas Gohr        $options->setHelp(
325284515dSAndreas Gohr            'Manage and query the AI chatbot data. Please note that calls to your LLM provider will be made. ' .
335284515dSAndreas Gohr            'This may incur costs.'
345284515dSAndreas Gohr        );
358817535bSAndreas Gohr
365284515dSAndreas Gohr        $options->registerCommand(
375284515dSAndreas Gohr            'embed',
385284515dSAndreas Gohr            'Create embeddings for all pages. This skips pages that already have embeddings'
395284515dSAndreas Gohr        );
405284515dSAndreas Gohr        $options->registerOption(
415284515dSAndreas Gohr            'clear',
425284515dSAndreas Gohr            'Clear all existing embeddings before creating new ones',
435284515dSAndreas Gohr            'c', false, 'embed'
445284515dSAndreas Gohr        );
458817535bSAndreas Gohr
468817535bSAndreas Gohr        $options->registerCommand('similar', 'Search for similar pages');
478817535bSAndreas Gohr        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
488817535bSAndreas Gohr
498817535bSAndreas Gohr        $options->registerCommand('ask', 'Ask a question');
508817535bSAndreas Gohr        $options->registerArgument('question', 'The question to ask', true, 'ask');
51c4584168SAndreas Gohr
52c4584168SAndreas Gohr        $options->registerCommand('chat', 'Start an interactive chat session');
53ad38c5fdSAndreas Gohr
54ad38c5fdSAndreas Gohr        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
55ad38c5fdSAndreas Gohr        $options->registerArgument('page', 'The page to split', true, 'split');
565786be46SAndreas Gohr
57bddd899cSAndreas Gohr        $options->registerCommand('info', 'Get Info about the vector storage');
588817535bSAndreas Gohr    }
598817535bSAndreas Gohr
608817535bSAndreas Gohr    /** @inheritDoc */
618817535bSAndreas Gohr    protected function main(Options $options)
628817535bSAndreas Gohr    {
638817535bSAndreas Gohr        switch ($options->getCmd()) {
648817535bSAndreas Gohr
658817535bSAndreas Gohr            case 'embed':
665284515dSAndreas Gohr                $this->createEmbeddings($options->getOpt('clear'));
678817535bSAndreas Gohr                break;
688817535bSAndreas Gohr            case 'similar':
698817535bSAndreas Gohr                $this->similar($options->getArgs()[0]);
708817535bSAndreas Gohr                break;
717552f1aaSAndreas Gohr            case 'ask':
727552f1aaSAndreas Gohr                $this->ask($options->getArgs()[0]);
737552f1aaSAndreas Gohr                break;
74c4584168SAndreas Gohr            case 'chat':
75c4584168SAndreas Gohr                $this->chat();
76c4584168SAndreas Gohr                break;
77ad38c5fdSAndreas Gohr            case 'split':
78ad38c5fdSAndreas Gohr                $this->split($options->getArgs()[0]);
79ad38c5fdSAndreas Gohr                break;
805786be46SAndreas Gohr            case 'info':
815786be46SAndreas Gohr                $this->treeinfo();
825786be46SAndreas Gohr                break;
838817535bSAndreas Gohr            default:
848817535bSAndreas Gohr                echo $options->help();
858817535bSAndreas Gohr        }
868817535bSAndreas Gohr    }
878817535bSAndreas Gohr
88c4584168SAndreas Gohr    /**
895786be46SAndreas Gohr     * @return void
905786be46SAndreas Gohr     */
915786be46SAndreas Gohr    protected function treeinfo()
925786be46SAndreas Gohr    {
937ee8b02dSAndreas Gohr        $stats = $this->helper->getEmbeddings()->getStorage()->statistics();
947ee8b02dSAndreas Gohr        foreach ($stats as $key => $value) {
957ee8b02dSAndreas Gohr            echo $key . ': ' . $value . "\n";
967ee8b02dSAndreas Gohr        }
975786be46SAndreas Gohr    }
985786be46SAndreas Gohr
995786be46SAndreas Gohr    /**
100ad38c5fdSAndreas Gohr     * Split the given page into chunks and print them
101ad38c5fdSAndreas Gohr     *
102ad38c5fdSAndreas Gohr     * @param string $page
103ad38c5fdSAndreas Gohr     * @return void
104ad38c5fdSAndreas Gohr     * @throws Exception
105ad38c5fdSAndreas Gohr     */
106ad38c5fdSAndreas Gohr    protected function split($page)
107ad38c5fdSAndreas Gohr    {
108ad38c5fdSAndreas Gohr        $text = rawWiki($page);
109ad38c5fdSAndreas Gohr        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
110ad38c5fdSAndreas Gohr        foreach ($chunks as $chunk) {
111ad38c5fdSAndreas Gohr            echo $chunk;
112ad38c5fdSAndreas Gohr            echo "\n";
113ad38c5fdSAndreas Gohr            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
114ad38c5fdSAndreas Gohr        }
115ad38c5fdSAndreas Gohr        $this->success('Split into ' . count($chunks) . ' chunks');
116ad38c5fdSAndreas Gohr    }
117ad38c5fdSAndreas Gohr
118ad38c5fdSAndreas Gohr    /**
119c4584168SAndreas Gohr     * Interactive Chat Session
120c4584168SAndreas Gohr     *
121c4584168SAndreas Gohr     * @return void
122c4584168SAndreas Gohr     * @throws Exception
123c4584168SAndreas Gohr     */
124c4584168SAndreas Gohr    protected function chat()
125c4584168SAndreas Gohr    {
126c4584168SAndreas Gohr        $history = [];
127c4584168SAndreas Gohr        while ($q = $this->readLine('Your Question')) {
128*55392016SAndreas Gohr            $this->helper->getOpenAI()->resetUsageStats();
129c4584168SAndreas Gohr            if ($history) {
1300337f47fSAndreas Gohr                $question = $this->helper->rephraseChatQuestion($q, $history);
131c4584168SAndreas Gohr                $this->colors->ptln("Interpretation: $question", Colors::C_LIGHTPURPLE);
132c4584168SAndreas Gohr            } else {
133c4584168SAndreas Gohr                $question = $q;
134c4584168SAndreas Gohr            }
1350337f47fSAndreas Gohr            $result = $this->helper->askQuestion($question);
136c4584168SAndreas Gohr            $history[] = [$q, $result['answer']];
137c4584168SAndreas Gohr            $this->printAnswer($result);
138c4584168SAndreas Gohr        }
139c4584168SAndreas Gohr    }
140c4584168SAndreas Gohr
141c4584168SAndreas Gohr    /**
142c4584168SAndreas Gohr     * Handle a single, standalone question
143c4584168SAndreas Gohr     *
144c4584168SAndreas Gohr     * @param string $query
145c4584168SAndreas Gohr     * @return void
146c4584168SAndreas Gohr     * @throws Exception
147c4584168SAndreas Gohr     */
148c4584168SAndreas Gohr    protected function ask($query)
149c4584168SAndreas Gohr    {
1500337f47fSAndreas Gohr        $result = $this->helper->askQuestion($query);
151c4584168SAndreas Gohr        $this->printAnswer($result);
1527552f1aaSAndreas Gohr    }
1537552f1aaSAndreas Gohr
154c4584168SAndreas Gohr    /**
155c4584168SAndreas Gohr     * Get the pages that are similar to the query
156c4584168SAndreas Gohr     *
157c4584168SAndreas Gohr     * @param string $query
158c4584168SAndreas Gohr     * @return void
159c4584168SAndreas Gohr     */
1608817535bSAndreas Gohr    protected function similar($query)
1618817535bSAndreas Gohr    {
1620337f47fSAndreas Gohr        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query);
163c4584168SAndreas Gohr        foreach ($sources as $source) {
1647ee8b02dSAndreas Gohr            $this->colors->ptln($source->getPage(), Colors::C_LIGHTBLUE);
165c4584168SAndreas Gohr        }
1668817535bSAndreas Gohr    }
1678817535bSAndreas Gohr
168c4584168SAndreas Gohr    /**
169c4584168SAndreas Gohr     * Recreate chunks and embeddings for all pages
170c4584168SAndreas Gohr     *
171c4584168SAndreas Gohr     * @return void
172ad38c5fdSAndreas Gohr     * @todo make skip regex configurable
173c4584168SAndreas Gohr     */
1745284515dSAndreas Gohr    protected function createEmbeddings($clear)
1758817535bSAndreas Gohr    {
176ad38c5fdSAndreas Gohr        ini_set('memory_limit', -1); // we may need a lot of memory here
1775284515dSAndreas Gohr        $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/', $clear);
178ad38c5fdSAndreas Gohr        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
1798817535bSAndreas Gohr    }
1808817535bSAndreas Gohr
181c4584168SAndreas Gohr    /**
182*55392016SAndreas Gohr     * Print the given detailed answer in a nice way
183*55392016SAndreas Gohr     *
184*55392016SAndreas Gohr     * @param array $answer
185*55392016SAndreas Gohr     * @return void
186*55392016SAndreas Gohr     */
187*55392016SAndreas Gohr    protected function printAnswer($answer)
188*55392016SAndreas Gohr    {
189*55392016SAndreas Gohr        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
190*55392016SAndreas Gohr        echo "\n";
191*55392016SAndreas Gohr        foreach ($answer['sources'] as $source) {
192*55392016SAndreas Gohr            /** @var Chunk $source */
193*55392016SAndreas Gohr            $this->colors->ptln("\t" . $source->getPage(), Colors::C_LIGHTBLUE);
194*55392016SAndreas Gohr        }
195*55392016SAndreas Gohr        echo "\n";
196*55392016SAndreas Gohr        $this->printUsage();
197*55392016SAndreas Gohr    }
198*55392016SAndreas Gohr
199*55392016SAndreas Gohr    /**
200*55392016SAndreas Gohr     * Print the usage statistics for OpenAI
201*55392016SAndreas Gohr     *
202*55392016SAndreas Gohr     * @return void
203*55392016SAndreas Gohr     */
204*55392016SAndreas Gohr    protected function printUsage() {
205*55392016SAndreas Gohr        $this->info(
206*55392016SAndreas Gohr            'Made {requests} requests in {time}s to OpenAI. Used {tokens} tokens for about ${cost}.',
207*55392016SAndreas Gohr            $this->helper->getOpenAI()->getUsageStats()
208*55392016SAndreas Gohr        );
209*55392016SAndreas Gohr    }
210*55392016SAndreas Gohr
211*55392016SAndreas Gohr    /**
212c4584168SAndreas Gohr     * Interactively ask for a value from the user
213c4584168SAndreas Gohr     *
214c4584168SAndreas Gohr     * @param string $prompt
215c4584168SAndreas Gohr     * @return string
216c4584168SAndreas Gohr     */
217c4584168SAndreas Gohr    protected function readLine($prompt)
218c4584168SAndreas Gohr    {
219c4584168SAndreas Gohr        $value = '';
2208817535bSAndreas Gohr
221c4584168SAndreas Gohr        while ($value === '') {
222c4584168SAndreas Gohr            echo $prompt;
223c4584168SAndreas Gohr            echo ': ';
224c4584168SAndreas Gohr
225c4584168SAndreas Gohr            $fh = fopen('php://stdin', 'r');
226c4584168SAndreas Gohr            $value = trim(fgets($fh));
227c4584168SAndreas Gohr            fclose($fh);
228c4584168SAndreas Gohr        }
229c4584168SAndreas Gohr
230c4584168SAndreas Gohr        return $value;
231c4584168SAndreas Gohr    }
2328817535bSAndreas Gohr}
2338817535bSAndreas Gohr
234