xref: /plugin/dokullm/cli.php (revision fdbf4cdb106502d4760e771d7d4a977a7d28206d)
1*fdbf4cdbSCostin Stroie<?php
2*fdbf4cdbSCostin Stroie
3*fdbf4cdbSCostin Stroieuse dokuwiki\Extension\CLIPlugin;
4*fdbf4cdbSCostin Stroieuse splitbrain\phpcli\Options;
5*fdbf4cdbSCostin Stroie
6*fdbf4cdbSCostin Stroieif(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../../../') . '/');
7*fdbf4cdbSCostin Stroie
8*fdbf4cdbSCostin Stroie/**
9*fdbf4cdbSCostin Stroie * DokuWiki CLI plugin for ChromaDB operations
10*fdbf4cdbSCostin Stroie */
11*fdbf4cdbSCostin Stroieclass cli_plugin_dokullm_chromadb extends CLIPlugin {
12*fdbf4cdbSCostin Stroie
13*fdbf4cdbSCostin Stroie    /**
14*fdbf4cdbSCostin Stroie     * Register options and arguments
15*fdbf4cdbSCostin Stroie     *
16*fdbf4cdbSCostin Stroie     * @param Options $options
17*fdbf4cdbSCostin Stroie     */
18*fdbf4cdbSCostin Stroie    protected function setup(Options $options) {
19*fdbf4cdbSCostin Stroie        // Set help text
20*fdbf4cdbSCostin Stroie        $options->setHelp(
21*fdbf4cdbSCostin Stroie            "ChromaDB CLI plugin for DokuWiki\n\n" .
22*fdbf4cdbSCostin Stroie            "Usage: ./bin/plugin.php dokullm_chromadb [action] [options]\n\n" .
23*fdbf4cdbSCostin Stroie            "Actions:\n" .
24*fdbf4cdbSCostin Stroie            "  send       Send a file or directory to ChromaDB\n" .
25*fdbf4cdbSCostin Stroie            "  query      Query ChromaDB\n" .
26*fdbf4cdbSCostin Stroie            "  heartbeat  Check if ChromaDB server is alive\n" .
27*fdbf4cdbSCostin Stroie            "  identity   Get authentication and identity information\n" .
28*fdbf4cdbSCostin Stroie            "  list       List all collections\n" .
29*fdbf4cdbSCostin Stroie            "  get        Get a document by its ID\n"
30*fdbf4cdbSCostin Stroie        );
31*fdbf4cdbSCostin Stroie
32*fdbf4cdbSCostin Stroie        // Global options
33*fdbf4cdbSCostin Stroie        $options->registerOption('host', 'ChromaDB server host', 'h', 'host', 'localhost');
34*fdbf4cdbSCostin Stroie        $options->registerOption('port', 'ChromaDB server port', 'p', 'port', '8000');
35*fdbf4cdbSCostin Stroie        $options->registerOption('tenant', 'ChromaDB tenant', null, 'tenant', 'default_tenant');
36*fdbf4cdbSCostin Stroie        $options->registerOption('database', 'ChromaDB database', null, 'database', 'default_database');
37*fdbf4cdbSCostin Stroie        $options->registerOption('ollama-host', 'Ollama server host', null, 'ollama-host', 'localhost');
38*fdbf4cdbSCostin Stroie        $options->registerOption('ollama-port', 'Ollama server port', null, 'ollama-port', '11434');
39*fdbf4cdbSCostin Stroie        $options->registerOption('ollama-model', 'Ollama embeddings model', null, 'ollama-model', 'nomic-embed-text');
40*fdbf4cdbSCostin Stroie        $options->registerOption('verbose', 'Enable verbose output', 'v');
41*fdbf4cdbSCostin Stroie
42*fdbf4cdbSCostin Stroie        // Action-specific options
43*fdbf4cdbSCostin Stroie        $options->registerCommand('send', 'Send a file or directory to ChromaDB');
44*fdbf4cdbSCostin Stroie        $options->registerArgument('path', 'File or directory path', true, 'send');
45*fdbf4cdbSCostin Stroie
46*fdbf4cdbSCostin Stroie        $options->registerCommand('query', 'Query ChromaDB');
47*fdbf4cdbSCostin Stroie        $options->registerOption('collection', 'Collection name to query', 'c', 'collection', 'documents', 'query');
48*fdbf4cdbSCostin Stroie        $options->registerOption('limit', 'Number of results to return', 'l', 'limit', '5', 'query');
49*fdbf4cdbSCostin Stroie        $options->registerArgument('search', 'Search terms', true, 'query');
50*fdbf4cdbSCostin Stroie
51*fdbf4cdbSCostin Stroie        $options->registerCommand('heartbeat', 'Check if ChromaDB server is alive');
52*fdbf4cdbSCostin Stroie
53*fdbf4cdbSCostin Stroie        $options->registerCommand('identity', 'Get authentication and identity information');
54*fdbf4cdbSCostin Stroie
55*fdbf4cdbSCostin Stroie        $options->registerCommand('list', 'List all collections');
56*fdbf4cdbSCostin Stroie
57*fdbf4cdbSCostin Stroie        $options->registerCommand('get', 'Get a document by its ID');
58*fdbf4cdbSCostin Stroie        $options->registerOption('collection', 'Collection name', 'c', 'collection', 'documents', 'get');
59*fdbf4cdbSCostin Stroie        $options->registerArgument('id', 'Document ID', true, 'get');
60*fdbf4cdbSCostin Stroie    }
61*fdbf4cdbSCostin Stroie
62*fdbf4cdbSCostin Stroie    /**
63*fdbf4cdbSCostin Stroie     * Main plugin logic
64*fdbf4cdbSCostin Stroie     *
65*fdbf4cdbSCostin Stroie     * @param Options $options
66*fdbf4cdbSCostin Stroie     */
67*fdbf4cdbSCostin Stroie    protected function main(Options $options) {
68*fdbf4cdbSCostin Stroie        // Include the ChromaDBClient class
69*fdbf4cdbSCostin Stroie        require_once dirname(__FILE__) . '/ChromaDBClient.php';
70*fdbf4cdbSCostin Stroie
71*fdbf4cdbSCostin Stroie        $action = $options->getCmd();
72*fdbf4cdbSCostin Stroie        $verbose = $options->getOpt('verbose');
73*fdbf4cdbSCostin Stroie
74*fdbf4cdbSCostin Stroie        // Get global options with defaults
75*fdbf4cdbSCostin Stroie        $host = $options->getOpt('host', 'localhost');
76*fdbf4cdbSCostin Stroie        $port = (int)$options->getOpt('port', 8000);
77*fdbf4cdbSCostin Stroie        $tenant = $options->getOpt('tenant', 'default_tenant');
78*fdbf4cdbSCostin Stroie        $database = $options->getOpt('database', 'default_database');
79*fdbf4cdbSCostin Stroie        $ollamaHost = $options->getOpt('ollama-host', 'localhost');
80*fdbf4cdbSCostin Stroie        $ollamaPort = (int)$options->getOpt('ollama-port', 11434);
81*fdbf4cdbSCostin Stroie        $ollamaModel = $options->getOpt('ollama-model', 'nomic-embed-text');
82*fdbf4cdbSCostin Stroie
83*fdbf4cdbSCostin Stroie        switch ($action) {
84*fdbf4cdbSCostin Stroie            case 'send':
85*fdbf4cdbSCostin Stroie                $path = $options->getArgs()[0] ?? null;
86*fdbf4cdbSCostin Stroie                if (!$path) {
87*fdbf4cdbSCostin Stroie                    $this->fatal('Missing file path for send action');
88*fdbf4cdbSCostin Stroie                }
89*fdbf4cdbSCostin Stroie                $this->sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
90*fdbf4cdbSCostin Stroie                break;
91*fdbf4cdbSCostin Stroie
92*fdbf4cdbSCostin Stroie            case 'query':
93*fdbf4cdbSCostin Stroie                $searchTerms = $options->getArgs()[0] ?? null;
94*fdbf4cdbSCostin Stroie                if (!$searchTerms) {
95*fdbf4cdbSCostin Stroie                    $this->fatal('Missing search terms for query action');
96*fdbf4cdbSCostin Stroie                }
97*fdbf4cdbSCostin Stroie                $collection = $options->getOpt('collection', 'documents');
98*fdbf4cdbSCostin Stroie                $limit = (int)$options->getOpt('limit', 5);
99*fdbf4cdbSCostin Stroie                $this->queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
100*fdbf4cdbSCostin Stroie                break;
101*fdbf4cdbSCostin Stroie
102*fdbf4cdbSCostin Stroie            case 'heartbeat':
103*fdbf4cdbSCostin Stroie                $this->checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
104*fdbf4cdbSCostin Stroie                break;
105*fdbf4cdbSCostin Stroie
106*fdbf4cdbSCostin Stroie            case 'identity':
107*fdbf4cdbSCostin Stroie                $this->checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
108*fdbf4cdbSCostin Stroie                break;
109*fdbf4cdbSCostin Stroie
110*fdbf4cdbSCostin Stroie            case 'list':
111*fdbf4cdbSCostin Stroie                $this->listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
112*fdbf4cdbSCostin Stroie                break;
113*fdbf4cdbSCostin Stroie
114*fdbf4cdbSCostin Stroie            case 'get':
115*fdbf4cdbSCostin Stroie                $documentId = $options->getArgs()[0] ?? null;
116*fdbf4cdbSCostin Stroie                if (!$documentId) {
117*fdbf4cdbSCostin Stroie                    $this->fatal('Missing document ID for get action');
118*fdbf4cdbSCostin Stroie                }
119*fdbf4cdbSCostin Stroie                $collection = $options->getOpt('collection', null);
120*fdbf4cdbSCostin Stroie                $this->getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
121*fdbf4cdbSCostin Stroie                break;
122*fdbf4cdbSCostin Stroie
123*fdbf4cdbSCostin Stroie            default:
124*fdbf4cdbSCostin Stroie                echo $options->help();
125*fdbf4cdbSCostin Stroie                exit(1);
126*fdbf4cdbSCostin Stroie        }
127*fdbf4cdbSCostin Stroie    }
128*fdbf4cdbSCostin Stroie
129*fdbf4cdbSCostin Stroie    /**
130*fdbf4cdbSCostin Stroie     * Send a file or directory of files to ChromaDB
131*fdbf4cdbSCostin Stroie     */
132*fdbf4cdbSCostin Stroie    private function sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
133*fdbf4cdbSCostin Stroie        // Create ChromaDB client
134*fdbf4cdbSCostin Stroie        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel);
135*fdbf4cdbSCostin Stroie
136*fdbf4cdbSCostin Stroie        if (is_dir($path)) {
137*fdbf4cdbSCostin Stroie            // Process directory
138*fdbf4cdbSCostin Stroie            $this->processDirectory($path, $chroma, $host, $port, $tenant, $database, $verbose);
139*fdbf4cdbSCostin Stroie        } else {
140*fdbf4cdbSCostin Stroie            // Process single file
141*fdbf4cdbSCostin Stroie            if (!file_exists($path)) {
142*fdbf4cdbSCostin Stroie                $this->error("File does not exist: $path");
143*fdbf4cdbSCostin Stroie                return;
144*fdbf4cdbSCostin Stroie            }
145*fdbf4cdbSCostin Stroie
146*fdbf4cdbSCostin Stroie            // Skip files that start with underscore
147*fdbf4cdbSCostin Stroie            $filename = basename($path);
148*fdbf4cdbSCostin Stroie            if ($filename[0] === '_') {
149*fdbf4cdbSCostin Stroie                if ($verbose) {
150*fdbf4cdbSCostin Stroie                    $this->info("Skipping file (starts with underscore): $path");
151*fdbf4cdbSCostin Stroie                }
152*fdbf4cdbSCostin Stroie                return;
153*fdbf4cdbSCostin Stroie            }
154*fdbf4cdbSCostin Stroie
155*fdbf4cdbSCostin Stroie            $this->processSingleFile($path, $chroma, $host, $port, $tenant, $database, false, $verbose);
156*fdbf4cdbSCostin Stroie        }
157*fdbf4cdbSCostin Stroie    }
158*fdbf4cdbSCostin Stroie
159*fdbf4cdbSCostin Stroie    /**
160*fdbf4cdbSCostin Stroie     * Process a single DokuWiki file and send it to ChromaDB
161*fdbf4cdbSCostin Stroie     */
162*fdbf4cdbSCostin Stroie    private function processSingleFile($filePath, $chroma, $host, $port, $tenant, $database, $collectionChecked = false, $verbose = false) {
163*fdbf4cdbSCostin Stroie        // Parse file path to extract metadata
164*fdbf4cdbSCostin Stroie        $id = \dokuwiki\plugin\dokullm\parseFilePath($filePath);
165*fdbf4cdbSCostin Stroie
166*fdbf4cdbSCostin Stroie        // Use the first part of the document ID as collection name, fallback to 'documents'
167*fdbf4cdbSCostin Stroie        $idParts = explode(':', $id);
168*fdbf4cdbSCostin Stroie        $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents';
169*fdbf4cdbSCostin Stroie
170*fdbf4cdbSCostin Stroie        // Clean the ID and check ACL
171*fdbf4cdbSCostin Stroie        $cleanId = cleanID($id);
172*fdbf4cdbSCostin Stroie        if (auth_quickaclcheck($cleanId) < AUTH_READ) {
173*fdbf4cdbSCostin Stroie            $this->error("You are not allowed to read this file: $id");
174*fdbf4cdbSCostin Stroie            return;
175*fdbf4cdbSCostin Stroie        }
176*fdbf4cdbSCostin Stroie
177*fdbf4cdbSCostin Stroie        try {
178*fdbf4cdbSCostin Stroie            // Process the file using the class method
179*fdbf4cdbSCostin Stroie            $result = $chroma->processSingleFile($filePath, $collectionName, $collectionChecked);
180*fdbf4cdbSCostin Stroie
181*fdbf4cdbSCostin Stroie            // Handle the result with verbose output
182*fdbf4cdbSCostin Stroie            if ($verbose && !empty($result['collection_status'])) {
183*fdbf4cdbSCostin Stroie                $this->info($result['collection_status']);
184*fdbf4cdbSCostin Stroie            }
185*fdbf4cdbSCostin Stroie
186*fdbf4cdbSCostin Stroie            switch ($result['status']) {
187*fdbf4cdbSCostin Stroie                case 'success':
188*fdbf4cdbSCostin Stroie                    if ($verbose) {
189*fdbf4cdbSCostin Stroie                        $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB...");
190*fdbf4cdbSCostin Stroie                    }
191*fdbf4cdbSCostin Stroie                    $this->success("Successfully sent file to ChromaDB:");
192*fdbf4cdbSCostin Stroie                    $this->info("  Document ID: " . $result['details']['document_id']);
193*fdbf4cdbSCostin Stroie                    if ($verbose) {
194*fdbf4cdbSCostin Stroie                        $this->info("  Chunks: " . $result['details']['chunks']);
195*fdbf4cdbSCostin Stroie                        $this->info("  Host: $host:$port");
196*fdbf4cdbSCostin Stroie                        $this->info("  Tenant: $tenant");
197*fdbf4cdbSCostin Stroie                        $this->info("  Database: $database");
198*fdbf4cdbSCostin Stroie                        $this->info("  Collection: " . $result['details']['collection']);
199*fdbf4cdbSCostin Stroie                    }
200*fdbf4cdbSCostin Stroie                    break;
201*fdbf4cdbSCostin Stroie
202*fdbf4cdbSCostin Stroie                case 'skipped':
203*fdbf4cdbSCostin Stroie                    if ($verbose) {
204*fdbf4cdbSCostin Stroie                        $this->info($result['message']);
205*fdbf4cdbSCostin Stroie                    }
206*fdbf4cdbSCostin Stroie                    break;
207*fdbf4cdbSCostin Stroie
208*fdbf4cdbSCostin Stroie                case 'error':
209*fdbf4cdbSCostin Stroie                    $this->error($result['message']);
210*fdbf4cdbSCostin Stroie                    break;
211*fdbf4cdbSCostin Stroie            }
212*fdbf4cdbSCostin Stroie        } catch (Exception $e) {
213*fdbf4cdbSCostin Stroie            $this->error("Error sending file to ChromaDB: " . $e->getMessage());
214*fdbf4cdbSCostin Stroie            return;
215*fdbf4cdbSCostin Stroie        }
216*fdbf4cdbSCostin Stroie    }
217*fdbf4cdbSCostin Stroie
218*fdbf4cdbSCostin Stroie    /**
219*fdbf4cdbSCostin Stroie     * Process all DokuWiki files in a directory and send them to ChromaDB
220*fdbf4cdbSCostin Stroie     */
221*fdbf4cdbSCostin Stroie    private function processDirectory($dirPath, $chroma, $host, $port, $tenant, $database, $verbose = false) {
222*fdbf4cdbSCostin Stroie        if ($verbose) {
223*fdbf4cdbSCostin Stroie            $this->info("Processing directory: $dirPath");
224*fdbf4cdbSCostin Stroie        }
225*fdbf4cdbSCostin Stroie
226*fdbf4cdbSCostin Stroie        // Check if directory exists
227*fdbf4cdbSCostin Stroie        if (!is_dir($dirPath)) {
228*fdbf4cdbSCostin Stroie            $this->error("Directory does not exist: $dirPath");
229*fdbf4cdbSCostin Stroie            return;
230*fdbf4cdbSCostin Stroie        }
231*fdbf4cdbSCostin Stroie
232*fdbf4cdbSCostin Stroie        // Create RecursiveIteratorIterator to process directories recursively
233*fdbf4cdbSCostin Stroie        $iterator = new RecursiveIteratorIterator(
234*fdbf4cdbSCostin Stroie            new RecursiveDirectoryIterator($dirPath, RecursiveDirectoryIterator::SKIP_DOTS),
235*fdbf4cdbSCostin Stroie            RecursiveIteratorIterator::LEAVES_ONLY
236*fdbf4cdbSCostin Stroie        );
237*fdbf4cdbSCostin Stroie
238*fdbf4cdbSCostin Stroie        $files = [];
239*fdbf4cdbSCostin Stroie        foreach ($iterator as $file) {
240*fdbf4cdbSCostin Stroie            // Process only .txt files that don't start with underscore
241*fdbf4cdbSCostin Stroie            if ($file->isFile() && $file->getExtension() === 'txt' && $file->getFilename()[0] !== '_') {
242*fdbf4cdbSCostin Stroie                $files[] = $file->getPathname();
243*fdbf4cdbSCostin Stroie            }
244*fdbf4cdbSCostin Stroie        }
245*fdbf4cdbSCostin Stroie
246*fdbf4cdbSCostin Stroie        // Skip if no files
247*fdbf4cdbSCostin Stroie        if (empty($files)) {
248*fdbf4cdbSCostin Stroie            if ($verbose) {
249*fdbf4cdbSCostin Stroie                $this->info("No .txt files found in directory: $dirPath");
250*fdbf4cdbSCostin Stroie            }
251*fdbf4cdbSCostin Stroie            return;
252*fdbf4cdbSCostin Stroie        }
253*fdbf4cdbSCostin Stroie
254*fdbf4cdbSCostin Stroie        if ($verbose) {
255*fdbf4cdbSCostin Stroie            $this->info("Found " . count($files) . " files to process.");
256*fdbf4cdbSCostin Stroie        }
257*fdbf4cdbSCostin Stroie
258*fdbf4cdbSCostin Stroie        // Use the first part of the document ID as collection name, fallback to 'documents'
259*fdbf4cdbSCostin Stroie        $sampleFile = $files[0];
260*fdbf4cdbSCostin Stroie        $id = \dokuwiki\plugin\dokullm\parseFilePath($sampleFile);
261*fdbf4cdbSCostin Stroie        $idParts = explode(':', $id);
262*fdbf4cdbSCostin Stroie        $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents';
263*fdbf4cdbSCostin Stroie
264*fdbf4cdbSCostin Stroie        try {
265*fdbf4cdbSCostin Stroie            $collectionStatus = $chroma->ensureCollectionExists($collectionName);
266*fdbf4cdbSCostin Stroie            if ($verbose) {
267*fdbf4cdbSCostin Stroie                $this->info($collectionStatus);
268*fdbf4cdbSCostin Stroie            }
269*fdbf4cdbSCostin Stroie            $collectionChecked = true;
270*fdbf4cdbSCostin Stroie        } catch (Exception $e) {
271*fdbf4cdbSCostin Stroie            $collectionChecked = true;
272*fdbf4cdbSCostin Stroie        }
273*fdbf4cdbSCostin Stroie
274*fdbf4cdbSCostin Stroie        // Process each file
275*fdbf4cdbSCostin Stroie        $processedCount = 0;
276*fdbf4cdbSCostin Stroie        $skippedCount = 0;
277*fdbf4cdbSCostin Stroie        $errorCount = 0;
278*fdbf4cdbSCostin Stroie
279*fdbf4cdbSCostin Stroie        foreach ($files as $file) {
280*fdbf4cdbSCostin Stroie            if ($verbose) {
281*fdbf4cdbSCostin Stroie                $this->info("\nProcessing file: $file");
282*fdbf4cdbSCostin Stroie            }
283*fdbf4cdbSCostin Stroie
284*fdbf4cdbSCostin Stroie            try {
285*fdbf4cdbSCostin Stroie                $result = $chroma->processSingleFile($file, $collectionName, $collectionChecked);
286*fdbf4cdbSCostin Stroie
287*fdbf4cdbSCostin Stroie                // Handle the result with verbose output
288*fdbf4cdbSCostin Stroie                if ($verbose && !empty($result['collection_status'])) {
289*fdbf4cdbSCostin Stroie                    $this->info($result['collection_status']);
290*fdbf4cdbSCostin Stroie                }
291*fdbf4cdbSCostin Stroie
292*fdbf4cdbSCostin Stroie                switch ($result['status']) {
293*fdbf4cdbSCostin Stroie                    case 'success':
294*fdbf4cdbSCostin Stroie                        $processedCount++;
295*fdbf4cdbSCostin Stroie                        if ($verbose) {
296*fdbf4cdbSCostin Stroie                            $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB...");
297*fdbf4cdbSCostin Stroie                        }
298*fdbf4cdbSCostin Stroie                        $this->success("Successfully sent file to ChromaDB:");
299*fdbf4cdbSCostin Stroie                        $this->info("  Document ID: " . $result['details']['document_id']);
300*fdbf4cdbSCostin Stroie                        if ($verbose) {
301*fdbf4cdbSCostin Stroie                            $this->info("  Chunks: " . $result['details']['chunks']);
302*fdbf4cdbSCostin Stroie                            $this->info("  Host: $host:$port");
303*fdbf4cdbSCostin Stroie                            $this->info("  Tenant: $tenant");
304*fdbf4cdbSCostin Stroie                            $this->info("  Database: $database");
305*fdbf4cdbSCostin Stroie                            $this->info("  Collection: " . $result['details']['collection']);
306*fdbf4cdbSCostin Stroie                        }
307*fdbf4cdbSCostin Stroie                        break;
308*fdbf4cdbSCostin Stroie
309*fdbf4cdbSCostin Stroie                    case 'skipped':
310*fdbf4cdbSCostin Stroie                        $skippedCount++;
311*fdbf4cdbSCostin Stroie                        if ($verbose) {
312*fdbf4cdbSCostin Stroie                            $this->info($result['message']);
313*fdbf4cdbSCostin Stroie                        }
314*fdbf4cdbSCostin Stroie                        break;
315*fdbf4cdbSCostin Stroie
316*fdbf4cdbSCostin Stroie                    case 'error':
317*fdbf4cdbSCostin Stroie                        $errorCount++;
318*fdbf4cdbSCostin Stroie                        $this->error($result['message']);
319*fdbf4cdbSCostin Stroie                        break;
320*fdbf4cdbSCostin Stroie                }
321*fdbf4cdbSCostin Stroie            } catch (Exception $e) {
322*fdbf4cdbSCostin Stroie                $errorCount++;
323*fdbf4cdbSCostin Stroie                $this->error("Error processing file $file: " . $e->getMessage());
324*fdbf4cdbSCostin Stroie            }
325*fdbf4cdbSCostin Stroie        }
326*fdbf4cdbSCostin Stroie
327*fdbf4cdbSCostin Stroie        if ($verbose) {
328*fdbf4cdbSCostin Stroie            $this->info("\nFinished processing directory.");
329*fdbf4cdbSCostin Stroie            $this->info("Processing summary:");
330*fdbf4cdbSCostin Stroie            $this->info("  Processed: $processedCount files");
331*fdbf4cdbSCostin Stroie            $this->info("  Skipped: $skippedCount files");
332*fdbf4cdbSCostin Stroie            $this->info("  Errors: $errorCount files");
333*fdbf4cdbSCostin Stroie        } else {
334*fdbf4cdbSCostin Stroie            // Even in non-verbose mode, show summary stats if there were processed files
335*fdbf4cdbSCostin Stroie            if ($processedCount > 0 || $skippedCount > 0 || $errorCount > 0) {
336*fdbf4cdbSCostin Stroie                $this->info("Processing summary:");
337*fdbf4cdbSCostin Stroie                if ($processedCount > 0) {
338*fdbf4cdbSCostin Stroie                    $this->info("  Processed: $processedCount files");
339*fdbf4cdbSCostin Stroie                }
340*fdbf4cdbSCostin Stroie                if ($skippedCount > 0) {
341*fdbf4cdbSCostin Stroie                    $this->info("  Skipped: $skippedCount files");
342*fdbf4cdbSCostin Stroie                }
343*fdbf4cdbSCostin Stroie                if ($errorCount > 0) {
344*fdbf4cdbSCostin Stroie                    $this->info("  Errors: $errorCount files");
345*fdbf4cdbSCostin Stroie                }
346*fdbf4cdbSCostin Stroie            }
347*fdbf4cdbSCostin Stroie        }
348*fdbf4cdbSCostin Stroie    }
349*fdbf4cdbSCostin Stroie
350*fdbf4cdbSCostin Stroie    /**
351*fdbf4cdbSCostin Stroie     * Query ChromaDB for similar documents
352*fdbf4cdbSCostin Stroie     */
353*fdbf4cdbSCostin Stroie    private function queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
354*fdbf4cdbSCostin Stroie        // Create ChromaDB client
355*fdbf4cdbSCostin Stroie        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel);
356*fdbf4cdbSCostin Stroie
357*fdbf4cdbSCostin Stroie        try {
358*fdbf4cdbSCostin Stroie            // Query the specified collection by collection
359*fdbf4cdbSCostin Stroie            $results = $chroma->queryCollection($collection, [$searchTerms], $limit);
360*fdbf4cdbSCostin Stroie
361*fdbf4cdbSCostin Stroie            $this->info("Query results for: \"$searchTerms\"");
362*fdbf4cdbSCostin Stroie            $this->info("Host: $host:$port");
363*fdbf4cdbSCostin Stroie            $this->info("Tenant: $tenant");
364*fdbf4cdbSCostin Stroie            $this->info("Database: $database");
365*fdbf4cdbSCostin Stroie            $this->info("Collection: $collection");
366*fdbf4cdbSCostin Stroie            $this->info("==========================================");
367*fdbf4cdbSCostin Stroie
368*fdbf4cdbSCostin Stroie            if (empty($results['ids'][0])) {
369*fdbf4cdbSCostin Stroie                $this->info("No results found.");
370*fdbf4cdbSCostin Stroie                return;
371*fdbf4cdbSCostin Stroie            }
372*fdbf4cdbSCostin Stroie
373*fdbf4cdbSCostin Stroie            for ($i = 0; $i < count($results['ids'][0]); $i++) {
374*fdbf4cdbSCostin Stroie                $this->info("Result " . ($i + 1) . ":");
375*fdbf4cdbSCostin Stroie                $this->info("  ID: " . $results['ids'][0][$i]);
376*fdbf4cdbSCostin Stroie                $this->info("  Distance: " . $results['distances'][0][$i]);
377*fdbf4cdbSCostin Stroie                $this->info("  Document: " . substr($results['documents'][0][$i], 0, 255) . "...");
378*fdbf4cdbSCostin Stroie
379*fdbf4cdbSCostin Stroie                if (isset($results['metadatas'][0][$i])) {
380*fdbf4cdbSCostin Stroie                    $this->info("  Metadata: " . json_encode($results['metadatas'][0][$i]));
381*fdbf4cdbSCostin Stroie                }
382*fdbf4cdbSCostin Stroie                $this->info("");
383*fdbf4cdbSCostin Stroie            }
384*fdbf4cdbSCostin Stroie        } catch (Exception $e) {
385*fdbf4cdbSCostin Stroie            $this->error("Error querying ChromaDB: " . $e->getMessage());
386*fdbf4cdbSCostin Stroie            return;
387*fdbf4cdbSCostin Stroie        }
388*fdbf4cdbSCostin Stroie    }
389*fdbf4cdbSCostin Stroie
390*fdbf4cdbSCostin Stroie    /**
391*fdbf4cdbSCostin Stroie     * Check if the ChromaDB server is alive
392*fdbf4cdbSCostin Stroie     */
393*fdbf4cdbSCostin Stroie    private function checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
394*fdbf4cdbSCostin Stroie        // Create ChromaDB client
395*fdbf4cdbSCostin Stroie        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel);
396*fdbf4cdbSCostin Stroie
397*fdbf4cdbSCostin Stroie        try {
398*fdbf4cdbSCostin Stroie            if ($verbose) {
399*fdbf4cdbSCostin Stroie                $this->info("Checking ChromaDB server status...");
400*fdbf4cdbSCostin Stroie                $this->info("Host: $host:$port");
401*fdbf4cdbSCostin Stroie                $this->info("Tenant: $tenant");
402*fdbf4cdbSCostin Stroie                $this->info("Database: $database");
403*fdbf4cdbSCostin Stroie                $this->info("==========================================");
404*fdbf4cdbSCostin Stroie            }
405*fdbf4cdbSCostin Stroie
406*fdbf4cdbSCostin Stroie            $result = $chroma->heartbeat();
407*fdbf4cdbSCostin Stroie
408*fdbf4cdbSCostin Stroie            $this->success("Server is alive!");
409*fdbf4cdbSCostin Stroie            $this->info("Response: " . json_encode($result));
410*fdbf4cdbSCostin Stroie        } catch (Exception $e) {
411*fdbf4cdbSCostin Stroie            $this->error("Error checking ChromaDB server status: " . $e->getMessage());
412*fdbf4cdbSCostin Stroie            return;
413*fdbf4cdbSCostin Stroie        }
414*fdbf4cdbSCostin Stroie    }
415*fdbf4cdbSCostin Stroie
416*fdbf4cdbSCostin Stroie    /**
417*fdbf4cdbSCostin Stroie     * Get authentication and identity information from ChromaDB
418*fdbf4cdbSCostin Stroie     */
419*fdbf4cdbSCostin Stroie    private function checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
420*fdbf4cdbSCostin Stroie        // Create ChromaDB client
421*fdbf4cdbSCostin Stroie        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel);
422*fdbf4cdbSCostin Stroie
423*fdbf4cdbSCostin Stroie        try {
424*fdbf4cdbSCostin Stroie            if ($verbose) {
425*fdbf4cdbSCostin Stroie                $this->info("Checking ChromaDB identity...");
426*fdbf4cdbSCostin Stroie                $this->info("Host: $host:$port");
427*fdbf4cdbSCostin Stroie                $this->info("Tenant: $tenant");
428*fdbf4cdbSCostin Stroie                $this->info("Database: $database");
429*fdbf4cdbSCostin Stroie                $this->info("==========================================");
430*fdbf4cdbSCostin Stroie            }
431*fdbf4cdbSCostin Stroie
432*fdbf4cdbSCostin Stroie            $result = $chroma->getIdentity();
433*fdbf4cdbSCostin Stroie
434*fdbf4cdbSCostin Stroie            $this->info("Identity information:");
435*fdbf4cdbSCostin Stroie            $this->info("Response: " . json_encode($result, JSON_PRETTY_PRINT));
436*fdbf4cdbSCostin Stroie        } catch (Exception $e) {
437*fdbf4cdbSCostin Stroie            $this->error("Error checking ChromaDB identity: " . $e->getMessage());
438*fdbf4cdbSCostin Stroie            return;
439*fdbf4cdbSCostin Stroie        }
440*fdbf4cdbSCostin Stroie    }
441*fdbf4cdbSCostin Stroie
442*fdbf4cdbSCostin Stroie    /**
443*fdbf4cdbSCostin Stroie     * List all collections in the ChromaDB database
444*fdbf4cdbSCostin Stroie     */
445*fdbf4cdbSCostin Stroie    private function listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
446*fdbf4cdbSCostin Stroie        // Create ChromaDB client
447*fdbf4cdbSCostin Stroie        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel);
448*fdbf4cdbSCostin Stroie
449*fdbf4cdbSCostin Stroie        try {
450*fdbf4cdbSCostin Stroie            if ($verbose) {
451*fdbf4cdbSCostin Stroie                $this->info("Listing ChromaDB collections...");
452*fdbf4cdbSCostin Stroie                $this->info("Host: $host:$port");
453*fdbf4cdbSCostin Stroie                $this->info("Tenant: $tenant");
454*fdbf4cdbSCostin Stroie                $this->info("Database: $database");
455*fdbf4cdbSCostin Stroie                $this->info("==========================================");
456*fdbf4cdbSCostin Stroie            }
457*fdbf4cdbSCostin Stroie
458*fdbf4cdbSCostin Stroie            $result = $chroma->listCollections();
459*fdbf4cdbSCostin Stroie
460*fdbf4cdbSCostin Stroie            if (empty($result)) {
461*fdbf4cdbSCostin Stroie                $this->info("No collections found.");
462*fdbf4cdbSCostin Stroie                return;
463*fdbf4cdbSCostin Stroie            }
464*fdbf4cdbSCostin Stroie
465*fdbf4cdbSCostin Stroie            $this->info("Collections:");
466*fdbf4cdbSCostin Stroie            foreach ($result as $collection) {
467*fdbf4cdbSCostin Stroie                $this->info("  - " . (isset($collection['name']) ? $collection['name'] : json_encode($collection)));
468*fdbf4cdbSCostin Stroie            }
469*fdbf4cdbSCostin Stroie        } catch (Exception $e) {
470*fdbf4cdbSCostin Stroie            $this->error("Error listing ChromaDB collections: " . $e->getMessage());
471*fdbf4cdbSCostin Stroie            return;
472*fdbf4cdbSCostin Stroie        }
473*fdbf4cdbSCostin Stroie    }
474*fdbf4cdbSCostin Stroie
475*fdbf4cdbSCostin Stroie    /**
476*fdbf4cdbSCostin Stroie     * Get a document by its ID from ChromaDB
477*fdbf4cdbSCostin Stroie     */
478*fdbf4cdbSCostin Stroie    private function getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
479*fdbf4cdbSCostin Stroie        // If no collection specified, derive it from the first part of the document ID
480*fdbf4cdbSCostin Stroie        if (empty($collection)) {
481*fdbf4cdbSCostin Stroie            $idParts = explode(':', $documentId);
482*fdbf4cdbSCostin Stroie            $collection = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents';
483*fdbf4cdbSCostin Stroie        }
484*fdbf4cdbSCostin Stroie
485*fdbf4cdbSCostin Stroie        // Create ChromaDB client
486*fdbf4cdbSCostin Stroie        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel);
487*fdbf4cdbSCostin Stroie
488*fdbf4cdbSCostin Stroie        try {
489*fdbf4cdbSCostin Stroie            // Get the specified document by ID
490*fdbf4cdbSCostin Stroie            $results = $chroma->getDocument($collection, $documentId);
491*fdbf4cdbSCostin Stroie
492*fdbf4cdbSCostin Stroie            if ($verbose) {
493*fdbf4cdbSCostin Stroie                $this->info("Document retrieval results for: \"$documentId\"");
494*fdbf4cdbSCostin Stroie                $this->info("Host: $host:$port");
495*fdbf4cdbSCostin Stroie                $this->info("Tenant: $tenant");
496*fdbf4cdbSCostin Stroie                $this->info("Database: $database");
497*fdbf4cdbSCostin Stroie                $this->info("Collection: $collection");
498*fdbf4cdbSCostin Stroie                $this->info("==========================================");
499*fdbf4cdbSCostin Stroie            }
500*fdbf4cdbSCostin Stroie
501*fdbf4cdbSCostin Stroie            if (empty($results['ids'])) {
502*fdbf4cdbSCostin Stroie                $this->info("No document found with ID: $documentId");
503*fdbf4cdbSCostin Stroie                return;
504*fdbf4cdbSCostin Stroie            }
505*fdbf4cdbSCostin Stroie
506*fdbf4cdbSCostin Stroie            for ($i = 0; $i < count($results['ids']); $i++) {
507*fdbf4cdbSCostin Stroie                $this->info("Document " . ($i + 1) . ":");
508*fdbf4cdbSCostin Stroie                $this->info("  ID: " . $results['ids'][$i]);
509*fdbf4cdbSCostin Stroie
510*fdbf4cdbSCostin Stroie                if (isset($results['documents'][$i])) {
511*fdbf4cdbSCostin Stroie                    $this->info("  Content: " . $results['documents'][$i]);
512*fdbf4cdbSCostin Stroie                }
513*fdbf4cdbSCostin Stroie
514*fdbf4cdbSCostin Stroie                if (isset($results['metadatas'][$i])) {
515*fdbf4cdbSCostin Stroie                    $this->info("  Metadata: " . json_encode($results['metadatas'][$i], JSON_PRETTY_PRINT));
516*fdbf4cdbSCostin Stroie                }
517*fdbf4cdbSCostin Stroie                $this->info("");
518*fdbf4cdbSCostin Stroie            }
519*fdbf4cdbSCostin Stroie        } catch (Exception $e) {
520*fdbf4cdbSCostin Stroie            $this->error("Error retrieving document from ChromaDB: " . $e->getMessage());
521*fdbf4cdbSCostin Stroie            return;
522*fdbf4cdbSCostin Stroie        }
523*fdbf4cdbSCostin Stroie    }
524*fdbf4cdbSCostin Stroie}
525