xref: /plugin/aichat/cli/simulate.php (revision 2071dced6f96936ea7b9bf5dbe8a117eef598448)
1c2b7a1f7SAndreas Gohr<?php
2c2b7a1f7SAndreas Gohr
3*2071dcedSAndreas Gohruse dokuwiki\Extension\CLIPlugin;
4c2b7a1f7SAndreas Gohruse dokuwiki\plugin\aichat\ModelFactory;
5c2b7a1f7SAndreas Gohruse splitbrain\phpcli\Colors;
6c2b7a1f7SAndreas Gohruse splitbrain\phpcli\Options;
7c2b7a1f7SAndreas Gohr
8c2b7a1f7SAndreas Gohr/**
9c2b7a1f7SAndreas Gohr * DokuWiki Plugin aichat (CLI Component)
10c2b7a1f7SAndreas Gohr *
11c2b7a1f7SAndreas Gohr * @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
12c2b7a1f7SAndreas Gohr * @author  Andreas Gohr <gohr@cosmocode.de>
13c2b7a1f7SAndreas Gohr */
14*2071dcedSAndreas Gohrclass cli_plugin_aichat_simulate extends CLIPlugin
15c2b7a1f7SAndreas Gohr{
16c2b7a1f7SAndreas Gohr    /** @var helper_plugin_aichat */
17c2b7a1f7SAndreas Gohr    protected $helper;
18c2b7a1f7SAndreas Gohr
19c2b7a1f7SAndreas Gohr    /** @inheritdoc */
20c2b7a1f7SAndreas Gohr    public function __construct($autocatch = true)
21c2b7a1f7SAndreas Gohr    {
22c2b7a1f7SAndreas Gohr        parent::__construct($autocatch);
23c2b7a1f7SAndreas Gohr        $this->helper = plugin_load('helper', 'aichat');
24c2b7a1f7SAndreas Gohr        $this->helper->setLogger($this);
25c2b7a1f7SAndreas Gohr        $this->loadConfig();
26c2b7a1f7SAndreas Gohr    }
27c2b7a1f7SAndreas Gohr
28c2b7a1f7SAndreas Gohr
29c2b7a1f7SAndreas Gohr    /** @inheritDoc */
30c2b7a1f7SAndreas Gohr    protected function setup(Options $options)
31c2b7a1f7SAndreas Gohr    {
32c2b7a1f7SAndreas Gohr        $options->setHelp('Run a prpared chat session against multiple models');
33c2b7a1f7SAndreas Gohr        $options->registerArgument('input', 'A file with the chat questions. Each question separated by two newlines');
34c2b7a1f7SAndreas Gohr        $options->registerArgument('output', 'Where to write the result CSV to');
35c2b7a1f7SAndreas Gohr
36c2b7a1f7SAndreas Gohr        $options->registerOption(
37c2b7a1f7SAndreas Gohr            'filter',
38c2b7a1f7SAndreas Gohr            'Use only models matching this case-insensitive regex (no delimiters)',
39c2b7a1f7SAndreas Gohr            'f',
40c2b7a1f7SAndreas Gohr            'regex'
41c2b7a1f7SAndreas Gohr        );
42c2b7a1f7SAndreas Gohr    }
43c2b7a1f7SAndreas Gohr
44c2b7a1f7SAndreas Gohr    /** @inheritDoc */
45c2b7a1f7SAndreas Gohr    protected function main(Options $options)
46c2b7a1f7SAndreas Gohr    {
47c2b7a1f7SAndreas Gohr        if ($this->loglevel['debug']['enabled']) {
48c2b7a1f7SAndreas Gohr            $this->helper->factory->setDebug(true);
49c2b7a1f7SAndreas Gohr        }
50c2b7a1f7SAndreas Gohr
51c2b7a1f7SAndreas Gohr        [$input, $output] = $options->getArgs();
52c2b7a1f7SAndreas Gohr        $questions = $this->readInputFile($input);
53c2b7a1f7SAndreas Gohr        $outfh = @fopen($output, 'w');
54c2b7a1f7SAndreas Gohr        if (!$outfh) throw new \Exception("Could not open $output for writing");
55c2b7a1f7SAndreas Gohr
56c2b7a1f7SAndreas Gohr        $models = $this->helper->factory->getModels(true, 'chat');
57c2b7a1f7SAndreas Gohr
58c2b7a1f7SAndreas Gohr        $results = [];
59c2b7a1f7SAndreas Gohr        foreach ($models as $name => $info) {
60c2b7a1f7SAndreas Gohr            if ($options->getOpt('filter') && !preg_match('/' . $options->getOpt('filter') . '/i', $name)) {
61c2b7a1f7SAndreas Gohr                continue;
62c2b7a1f7SAndreas Gohr            }
63c2b7a1f7SAndreas Gohr            $this->success("Running on $name...");
64c2b7a1f7SAndreas Gohr            $results[$name] = $this->simulate($questions, $info);
65c2b7a1f7SAndreas Gohr        }
66c2b7a1f7SAndreas Gohr
67c2b7a1f7SAndreas Gohr        foreach ($this->records2rows($results) as $row) {
68c2b7a1f7SAndreas Gohr            fputcsv($outfh, $row);
69c2b7a1f7SAndreas Gohr        }
70c2b7a1f7SAndreas Gohr        fclose($outfh);
71c2b7a1f7SAndreas Gohr        $this->success("Results written to $output");
72c2b7a1f7SAndreas Gohr    }
73c2b7a1f7SAndreas Gohr
74c2b7a1f7SAndreas Gohr    protected function simulate($questions, $model)
75c2b7a1f7SAndreas Gohr    {
76c2b7a1f7SAndreas Gohr        // override models
77c2b7a1f7SAndreas Gohr        $this->helper->factory->chatModel = $model['instance'];
78c2b7a1f7SAndreas Gohr        $this->helper->factory->rephraseModel = clone $model['instance'];
79c2b7a1f7SAndreas Gohr
80c2b7a1f7SAndreas Gohr        $records = [];
81c2b7a1f7SAndreas Gohr
82c2b7a1f7SAndreas Gohr        $history = [];
83c2b7a1f7SAndreas Gohr        foreach ($questions as $q) {
84c2b7a1f7SAndreas Gohr            $this->helper->getChatModel()->resetUsageStats();
85c2b7a1f7SAndreas Gohr            $this->helper->getRephraseModel()->resetUsageStats();
86c2b7a1f7SAndreas Gohr            $this->helper->getEmbeddingModel()->resetUsageStats();
87c2b7a1f7SAndreas Gohr
88c2b7a1f7SAndreas Gohr            $this->colors->ptln($q, Colors::C_LIGHTPURPLE);
89c2b7a1f7SAndreas Gohr            $result = $this->helper->askChatQuestion($q, $history);
90c2b7a1f7SAndreas Gohr            $history[] = [$result['question'], $result['answer']];
91c2b7a1f7SAndreas Gohr
92c2b7a1f7SAndreas Gohr            $record = [
93c2b7a1f7SAndreas Gohr                'question' => $q,
94c2b7a1f7SAndreas Gohr                'rephrased' => $result['question'],
95c2b7a1f7SAndreas Gohr                'answer' => $result['answer'],
96*2071dcedSAndreas Gohr                'source.list' => implode("\n", $result['sources']),
97c2b7a1f7SAndreas Gohr                'source.time' => $this->helper->getEmbeddings()->timeSpent,
98c2b7a1f7SAndreas Gohr                ...$this->flattenStats('stats.embedding', $this->helper->getEmbeddingModel()->getUsageStats()),
99c2b7a1f7SAndreas Gohr                ...$this->flattenStats('stats.rephrase', $this->helper->getRephraseModel()->getUsageStats()),
100c2b7a1f7SAndreas Gohr                ...$this->flattenStats('stats.chat', $this->helper->getChatModel()->getUsageStats()),
101c2b7a1f7SAndreas Gohr            ];
102c2b7a1f7SAndreas Gohr            $records[] = $record;
103c2b7a1f7SAndreas Gohr            $this->colors->ptln($result['answer'], Colors::C_LIGHTCYAN);
104c2b7a1f7SAndreas Gohr        }
105c2b7a1f7SAndreas Gohr
106c2b7a1f7SAndreas Gohr        return $records;
107c2b7a1f7SAndreas Gohr    }
108c2b7a1f7SAndreas Gohr
109c2b7a1f7SAndreas Gohr    /**
110c2b7a1f7SAndreas Gohr     * Reformat the result array into a CSV friendly array
111c2b7a1f7SAndreas Gohr     */
112c2b7a1f7SAndreas Gohr    protected function records2rows(array $result): array
113c2b7a1f7SAndreas Gohr    {
114c2b7a1f7SAndreas Gohr        $rowkeys = [
115c2b7a1f7SAndreas Gohr            'question' => ['question', 'stats.embedding.cost', 'stats.embedding.time'],
116c2b7a1f7SAndreas Gohr            'rephrased' => ['rephrased', 'stats.rephrase.cost', 'stats.rephrase.time'],
117c2b7a1f7SAndreas Gohr            'sources' => ['source.list', '', 'source.time'],
118c2b7a1f7SAndreas Gohr            'answer' => ['answer', 'stats.chat.cost', 'stats.chat.time'],
119c2b7a1f7SAndreas Gohr        ];
120c2b7a1f7SAndreas Gohr
121c2b7a1f7SAndreas Gohr        $models = array_keys($result);
122c2b7a1f7SAndreas Gohr        $numberOfRecords = count($result[$models[0]]);
123c2b7a1f7SAndreas Gohr        $rows = [];
124c2b7a1f7SAndreas Gohr
125c2b7a1f7SAndreas Gohr        // write headers
126c2b7a1f7SAndreas Gohr        $row = [];
127c2b7a1f7SAndreas Gohr        $row[] = 'type';
128c2b7a1f7SAndreas Gohr        foreach ($models as $model) {
129c2b7a1f7SAndreas Gohr            $row[] = $model;
130c2b7a1f7SAndreas Gohr            $row[] = 'Cost USD';
131c2b7a1f7SAndreas Gohr            $row[] = 'Time s';
132c2b7a1f7SAndreas Gohr        }
133c2b7a1f7SAndreas Gohr        $rows[] = $row;
134c2b7a1f7SAndreas Gohr
135c2b7a1f7SAndreas Gohr        // write rows
136c2b7a1f7SAndreas Gohr        for ($i = 0; $i < $numberOfRecords; $i++) {
137c2b7a1f7SAndreas Gohr            foreach ($rowkeys as $type => $keys) {
138c2b7a1f7SAndreas Gohr                $row = [];
139c2b7a1f7SAndreas Gohr                $row[] = $type;
140c2b7a1f7SAndreas Gohr                foreach ($models as $model) {
141c2b7a1f7SAndreas Gohr                    foreach ($keys as $key) {
142c2b7a1f7SAndreas Gohr                        if ($key) {
143c2b7a1f7SAndreas Gohr                            $row[] = $result[$model][$i][$key];
144c2b7a1f7SAndreas Gohr                        } else {
145c2b7a1f7SAndreas Gohr                            $row[] = '';
146c2b7a1f7SAndreas Gohr                        }
147c2b7a1f7SAndreas Gohr                    }
148c2b7a1f7SAndreas Gohr                }
149c2b7a1f7SAndreas Gohr                $rows[] = $row;
150c2b7a1f7SAndreas Gohr            }
151c2b7a1f7SAndreas Gohr        }
152c2b7a1f7SAndreas Gohr
153c2b7a1f7SAndreas Gohr
154c2b7a1f7SAndreas Gohr        return $rows;
155c2b7a1f7SAndreas Gohr    }
156c2b7a1f7SAndreas Gohr
157c2b7a1f7SAndreas Gohr
158c2b7a1f7SAndreas Gohr    /**
159c2b7a1f7SAndreas Gohr     * Prefix each key in the given stats array to be merged with a larger array
160c2b7a1f7SAndreas Gohr     *
161c2b7a1f7SAndreas Gohr     * @param string $prefix
162c2b7a1f7SAndreas Gohr     * @param array $stats
163c2b7a1f7SAndreas Gohr     * @return array
164c2b7a1f7SAndreas Gohr     */
165*2071dcedSAndreas Gohr    protected function flattenStats(string $prefix, array $stats)
166*2071dcedSAndreas Gohr    {
167c2b7a1f7SAndreas Gohr        $result = [];
168c2b7a1f7SAndreas Gohr        foreach ($stats as $key => $value) {
169c2b7a1f7SAndreas Gohr            $result["$prefix.$key"] = $value;
170c2b7a1f7SAndreas Gohr        }
171c2b7a1f7SAndreas Gohr        return $result;
172c2b7a1f7SAndreas Gohr    }
173c2b7a1f7SAndreas Gohr
174c2b7a1f7SAndreas Gohr    /**
175c2b7a1f7SAndreas Gohr     * @param string $file
176c2b7a1f7SAndreas Gohr     * @return array
177c2b7a1f7SAndreas Gohr     * @throws Exception
178c2b7a1f7SAndreas Gohr     */
179c2b7a1f7SAndreas Gohr    protected function readInputFile(string $file): array
180c2b7a1f7SAndreas Gohr    {
181c2b7a1f7SAndreas Gohr        if (!file_exists($file)) throw new \Exception("File not found: $file");
182c2b7a1f7SAndreas Gohr        $lines = file_get_contents($file);
183c2b7a1f7SAndreas Gohr        $questions = explode("\n\n", $lines);
184c2b7a1f7SAndreas Gohr        $questions = array_map('trim', $questions);
185c2b7a1f7SAndreas Gohr        return $questions;
186c2b7a1f7SAndreas Gohr    }
187c2b7a1f7SAndreas Gohr}
188