xref: /plugin/aichat/cli.php (revision bddd899cfd072ee6cc5aab9bb9d767fea8dd1115)
18817535bSAndreas Gohr<?php
28817535bSAndreas Gohr
3*bddd899cSAndreas Gohruse dokuwiki\plugin\aichat\backend\Chunk;
4c4584168SAndreas Gohruse splitbrain\phpcli\Colors;
58817535bSAndreas Gohruse splitbrain\phpcli\Options;
68817535bSAndreas Gohr
78817535bSAndreas Gohr
88817535bSAndreas Gohr/**
98817535bSAndreas Gohr * DokuWiki Plugin aichat (CLI Component)
108817535bSAndreas Gohr *
118817535bSAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
128817535bSAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
138817535bSAndreas Gohr */
148817535bSAndreas Gohrclass cli_plugin_aichat extends \dokuwiki\Extension\CLIPlugin
158817535bSAndreas Gohr{
160337f47fSAndreas Gohr    /** @var helper_plugin_aichat */
170337f47fSAndreas Gohr    protected $helper;
180337f47fSAndreas Gohr
190337f47fSAndreas Gohr    public function __construct($autocatch = true)
200337f47fSAndreas Gohr    {
210337f47fSAndreas Gohr        parent::__construct($autocatch);
220337f47fSAndreas Gohr        $this->helper = plugin_load('helper', 'aichat');
232ecc089aSAndreas Gohr        $this->helper->getEmbeddings()->setLogger($this);
240337f47fSAndreas Gohr    }
250337f47fSAndreas Gohr
268817535bSAndreas Gohr
278817535bSAndreas Gohr    /** @inheritDoc */
288817535bSAndreas Gohr    protected function setup(Options $options)
298817535bSAndreas Gohr    {
30*bddd899cSAndreas Gohr        $options->useCompactHelp();
31*bddd899cSAndreas Gohr
329da5f0dfSAndreas Gohr        $options->setHelp('Manage and query the AI chatbot data');
338817535bSAndreas Gohr
348817535bSAndreas Gohr        $options->registerCommand('embed', 'Create embeddings for all pages');
358817535bSAndreas Gohr
368817535bSAndreas Gohr        $options->registerCommand('similar', 'Search for similar pages');
378817535bSAndreas Gohr        $options->registerArgument('query', 'Look up chunks similar to this query', true, 'similar');
388817535bSAndreas Gohr
398817535bSAndreas Gohr        $options->registerCommand('ask', 'Ask a question');
408817535bSAndreas Gohr        $options->registerArgument('question', 'The question to ask', true, 'ask');
41c4584168SAndreas Gohr
42c4584168SAndreas Gohr        $options->registerCommand('chat', 'Start an interactive chat session');
43ad38c5fdSAndreas Gohr
44ad38c5fdSAndreas Gohr        $options->registerCommand('split', 'Split a page into chunks (for debugging)');
45ad38c5fdSAndreas Gohr        $options->registerArgument('page', 'The page to split', true, 'split');
465786be46SAndreas Gohr
47*bddd899cSAndreas Gohr        $options->registerCommand('info', 'Get Info about the vector storage');
488817535bSAndreas Gohr    }
498817535bSAndreas Gohr
508817535bSAndreas Gohr    /** @inheritDoc */
518817535bSAndreas Gohr    protected function main(Options $options)
528817535bSAndreas Gohr    {
538817535bSAndreas Gohr        switch ($options->getCmd()) {
548817535bSAndreas Gohr
558817535bSAndreas Gohr            case 'embed':
568817535bSAndreas Gohr                $this->createEmbeddings();
578817535bSAndreas Gohr                break;
588817535bSAndreas Gohr            case 'similar':
598817535bSAndreas Gohr                $this->similar($options->getArgs()[0]);
608817535bSAndreas Gohr                break;
617552f1aaSAndreas Gohr            case 'ask':
627552f1aaSAndreas Gohr                $this->ask($options->getArgs()[0]);
637552f1aaSAndreas Gohr                break;
64c4584168SAndreas Gohr            case 'chat':
65c4584168SAndreas Gohr                $this->chat();
66c4584168SAndreas Gohr                break;
67ad38c5fdSAndreas Gohr            case 'split':
68ad38c5fdSAndreas Gohr                $this->split($options->getArgs()[0]);
69ad38c5fdSAndreas Gohr                break;
705786be46SAndreas Gohr            case 'info':
715786be46SAndreas Gohr                $this->treeinfo();
725786be46SAndreas Gohr                break;
738817535bSAndreas Gohr            default:
748817535bSAndreas Gohr                echo $options->help();
758817535bSAndreas Gohr        }
768817535bSAndreas Gohr    }
778817535bSAndreas Gohr
78c4584168SAndreas Gohr    /**
795786be46SAndreas Gohr     * @return void
805786be46SAndreas Gohr     */
815786be46SAndreas Gohr    protected function treeinfo()
825786be46SAndreas Gohr    {
837ee8b02dSAndreas Gohr        $stats = $this->helper->getEmbeddings()->getStorage()->statistics();
847ee8b02dSAndreas Gohr        foreach($stats as $key => $value) {
857ee8b02dSAndreas Gohr            echo $key . ': ' . $value. "\n";
867ee8b02dSAndreas Gohr        }
875786be46SAndreas Gohr    }
885786be46SAndreas Gohr
895786be46SAndreas Gohr    /**
90ad38c5fdSAndreas Gohr     * Split the given page into chunks and print them
91ad38c5fdSAndreas Gohr     *
92ad38c5fdSAndreas Gohr     * @param string $page
93ad38c5fdSAndreas Gohr     * @return void
94ad38c5fdSAndreas Gohr     * @throws Exception
95ad38c5fdSAndreas Gohr     */
96ad38c5fdSAndreas Gohr    protected function split($page)
97ad38c5fdSAndreas Gohr    {
98ad38c5fdSAndreas Gohr        $text = rawWiki($page);
99ad38c5fdSAndreas Gohr        $chunks = $this->helper->getEmbeddings()->splitIntoChunks($text);
100ad38c5fdSAndreas Gohr        foreach ($chunks as $chunk) {
101ad38c5fdSAndreas Gohr            echo $chunk;
102ad38c5fdSAndreas Gohr            echo "\n";
103ad38c5fdSAndreas Gohr            $this->colors->ptln('--------------------------------', Colors::C_LIGHTPURPLE);
104ad38c5fdSAndreas Gohr        }
105ad38c5fdSAndreas Gohr        $this->success('Split into ' . count($chunks) . ' chunks');
106ad38c5fdSAndreas Gohr    }
107ad38c5fdSAndreas Gohr
108ad38c5fdSAndreas Gohr    /**
109c4584168SAndreas Gohr     * Interactive Chat Session
110c4584168SAndreas Gohr     *
111c4584168SAndreas Gohr     * @return void
112c4584168SAndreas Gohr     * @throws Exception
113c4584168SAndreas Gohr     */
114c4584168SAndreas Gohr    protected function chat()
115c4584168SAndreas Gohr    {
116c4584168SAndreas Gohr        $history = [];
117c4584168SAndreas Gohr        while ($q = $this->readLine('Your Question')) {
118c4584168SAndreas Gohr            if ($history) {
1190337f47fSAndreas Gohr                $question = $this->helper->rephraseChatQuestion($q, $history);
120c4584168SAndreas Gohr                $this->colors->ptln("Interpretation: $question", Colors::C_LIGHTPURPLE);
121c4584168SAndreas Gohr            } else {
122c4584168SAndreas Gohr                $question = $q;
123c4584168SAndreas Gohr            }
1240337f47fSAndreas Gohr            $result = $this->helper->askQuestion($question);
125c4584168SAndreas Gohr            $history[] = [$q, $result['answer']];
126c4584168SAndreas Gohr            $this->printAnswer($result);
127c4584168SAndreas Gohr        }
128c4584168SAndreas Gohr    }
129c4584168SAndreas Gohr
130c4584168SAndreas Gohr    /**
131c4584168SAndreas Gohr     * Print the given detailed answer in a nice way
132c4584168SAndreas Gohr     *
133c4584168SAndreas Gohr     * @param array $answer
134c4584168SAndreas Gohr     * @return void
135c4584168SAndreas Gohr     */
136c4584168SAndreas Gohr    protected function printAnswer($answer)
137c4584168SAndreas Gohr    {
138c4584168SAndreas Gohr        $this->colors->ptln($answer['answer'], Colors::C_LIGHTCYAN);
139c4584168SAndreas Gohr        echo "\n";
140c4584168SAndreas Gohr        foreach ($answer['sources'] as $source) {
141*bddd899cSAndreas Gohr            /** @var Chunk $source */
142*bddd899cSAndreas Gohr            $this->colors->ptln("\t" . $source->getPage(), Colors::C_LIGHTBLUE);
143c4584168SAndreas Gohr        }
144c4584168SAndreas Gohr        echo "\n";
145c4584168SAndreas Gohr    }
146c4584168SAndreas Gohr
147c4584168SAndreas Gohr    /**
148c4584168SAndreas Gohr     * Handle a single, standalone question
149c4584168SAndreas Gohr     *
150c4584168SAndreas Gohr     * @param string $query
151c4584168SAndreas Gohr     * @return void
152c4584168SAndreas Gohr     * @throws Exception
153c4584168SAndreas Gohr     */
154c4584168SAndreas Gohr    protected function ask($query)
155c4584168SAndreas Gohr    {
1560337f47fSAndreas Gohr        $result = $this->helper->askQuestion($query);
157c4584168SAndreas Gohr        $this->printAnswer($result);
1587552f1aaSAndreas Gohr    }
1597552f1aaSAndreas Gohr
160c4584168SAndreas Gohr    /**
161c4584168SAndreas Gohr     * Get the pages that are similar to the query
162c4584168SAndreas Gohr     *
163c4584168SAndreas Gohr     * @param string $query
164c4584168SAndreas Gohr     * @return void
165c4584168SAndreas Gohr     */
1668817535bSAndreas Gohr    protected function similar($query)
1678817535bSAndreas Gohr    {
1680337f47fSAndreas Gohr        $sources = $this->helper->getEmbeddings()->getSimilarChunks($query);
169c4584168SAndreas Gohr        foreach ($sources as $source) {
1707ee8b02dSAndreas Gohr            $this->colors->ptln($source->getPage(), Colors::C_LIGHTBLUE);
171c4584168SAndreas Gohr        }
1728817535bSAndreas Gohr    }
1738817535bSAndreas Gohr
174c4584168SAndreas Gohr    /**
175c4584168SAndreas Gohr     * Recreate chunks and embeddings for all pages
176c4584168SAndreas Gohr     *
177c4584168SAndreas Gohr     * @return void
178ad38c5fdSAndreas Gohr     * @todo make skip regex configurable
179c4584168SAndreas Gohr     */
1808817535bSAndreas Gohr    protected function createEmbeddings()
1818817535bSAndreas Gohr    {
182ad38c5fdSAndreas Gohr        ini_set('memory_limit', -1); // we may need a lot of memory here
183ad38c5fdSAndreas Gohr        $this->helper->getEmbeddings()->createNewIndex('/(^|:)(playground|sandbox)(:|$)/');
184ad38c5fdSAndreas Gohr        $this->notice('Peak memory used: {memory}', ['memory' => filesize_h(memory_get_peak_usage(true))]);
1858817535bSAndreas Gohr    }
1868817535bSAndreas Gohr
187c4584168SAndreas Gohr    /**
188c4584168SAndreas Gohr     * Interactively ask for a value from the user
189c4584168SAndreas Gohr     *
190c4584168SAndreas Gohr     * @param string $prompt
191c4584168SAndreas Gohr     * @return string
192c4584168SAndreas Gohr     */
193c4584168SAndreas Gohr    protected function readLine($prompt)
194c4584168SAndreas Gohr    {
195c4584168SAndreas Gohr        $value = '';
1968817535bSAndreas Gohr
197c4584168SAndreas Gohr        while ($value === '') {
198c4584168SAndreas Gohr            echo $prompt;
199c4584168SAndreas Gohr            echo ': ';
200c4584168SAndreas Gohr
201c4584168SAndreas Gohr            $fh = fopen('php://stdin', 'r');
202c4584168SAndreas Gohr            $value = trim(fgets($fh));
203c4584168SAndreas Gohr            fclose($fh);
204c4584168SAndreas Gohr        }
205c4584168SAndreas Gohr
206c4584168SAndreas Gohr        return $value;
207c4584168SAndreas Gohr    }
2088817535bSAndreas Gohr}
2098817535bSAndreas Gohr
210