xref: /plugin/dokullm/cli.php (revision 17e138999dac575a86cf345a102e785801382874)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use splitbrain\phpcli\Options;
5
6if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../../../') . '/');
7
8/**
9 * DokuWiki CLI plugin for ChromaDB operations
10 */
11class cli_plugin_dokullm extends CLIPlugin {
12
13    /**
14     * Register options and arguments
15     *
16     * @param Options $options
17     */
18    protected function setup(Options $options) {
19        // Set help text
20        $options->setHelp(
21            "ChromaDB CLI plugin for DokuLLM\n\n" .
22            "Usage: ./bin/plugin.php dokullm [action] [options]\n\n" .
23            "Actions:\n" .
24            "  send       Send a file or directory to ChromaDB\n" .
25            "  query      Query ChromaDB\n" .
26            "  heartbeat  Check if ChromaDB server is alive\n" .
27            "  identity   Get authentication and identity information\n" .
28            "  list       List all collections\n" .
29            "  get        Get a document by its ID\n"
30        );
31
32        // Global options
33        $options->registerOption('verbose', 'Enable verbose output', 'v');
34
35        // Action-specific options
36        $options->registerCommand('send', 'Send a file or directory to ChromaDB');
37        $options->registerArgument('path', 'File or directory path', true, 'send');
38
39        $options->registerCommand('query', 'Query ChromaDB');
40        //$options->registerOption('collection', 'Collection name to query', 'c', 'collection', 'documents', 'query');
41        //$options->registerOption('limit', 'Number of results to return', 'l', 'limit', '5', 'query');
42        $options->registerArgument('search', 'Search terms', true, 'query');
43
44        $options->registerCommand('heartbeat', 'Check if ChromaDB server is alive');
45
46        $options->registerCommand('identity', 'Get authentication and identity information');
47
48        $options->registerCommand('list', 'List all collections');
49
50        $options->registerCommand('get', 'Get a document by its ID');
51        //$options->registerOption('collection', 'Collection name', 'c', 'collection', 'documents', 'get');
52        $options->registerArgument('id', 'Document ID', true, 'get');
53    }
54
55    /**
56     * Main plugin logic
57     *
58     * @param Options $options
59     */
60    protected function main(Options $options) {
61        // Include the ChromaDBClient class
62        require_once dirname(__FILE__) . '/ChromaDBClient.php';
63
64        // Get values from DokuWiki settings
65        $host = $this->getConf('chroma_host');
66        $port = (int)$this->getConf('chroma_port');
67        $tenant = $this->getConf('chroma_tenant');
68        $database = $this->getConf('chroma_database');
69        $ollamaHost = $this->getConf('ollama_host');
70        $ollamaPort = (int)$this->getConf('ollama_port');
71        $ollamaModel = $this->getConf('ollama_model');
72        $verbose = $options->getOpt('verbose');
73
74        $action = $options->getCmd();
75
76        switch ($action) {
77            case 'send':
78                $path = $options->getArgs()[0] ?? null;
79                if (!$path) {
80                    $this->fatal('Missing file path for send action');
81                }
82                $this->sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
83                break;
84
85            case 'query':
86                $searchTerms = $options->getArgs()[0] ?? null;
87                if (!$searchTerms) {
88                    $this->fatal('Missing search terms for query action');
89                }
90                $collection = $options->getOpt('collection', 'documents');
91                $limit = (int)$options->getOpt('limit', 5);
92                $this->queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
93                break;
94
95            case 'heartbeat':
96                $this->checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
97                break;
98
99            case 'identity':
100                $this->checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
101                break;
102
103            case 'list':
104                $this->listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
105                break;
106
107            case 'get':
108                $documentId = $options->getArgs()[0] ?? null;
109                if (!$documentId) {
110                    $this->fatal('Missing document ID for get action');
111                }
112                $collection = $options->getOpt('collection', null);
113                $this->getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
114                break;
115
116            default:
117                echo $options->help();
118                exit(1);
119        }
120    }
121
122    /**
123     * Send a file or directory of files to ChromaDB
124     */
125    private function sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
126        // Create ChromaDB client
127        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel);
128
129        if (is_dir($path)) {
130            // Process directory
131            $this->processDirectory($path, $chroma, $host, $port, $tenant, $database, $verbose);
132        } else {
133            // Process single file
134            if (!file_exists($path)) {
135                $this->error("File does not exist: $path");
136                return;
137            }
138
139            // Skip files that start with underscore
140            $filename = basename($path);
141            if ($filename[0] === '_') {
142                if ($verbose) {
143                    $this->info("Skipping file (starts with underscore): $path");
144                }
145                return;
146            }
147
148            $this->processSingleFile($path, $chroma, $host, $port, $tenant, $database, false, $verbose);
149        }
150    }
151
152    /**
153     * Process a single DokuWiki file and send it to ChromaDB
154     */
155    private function processSingleFile($filePath, $chroma, $host, $port, $tenant, $database, $collectionChecked = false, $verbose = false) {
156        // Parse file path to extract metadata
157        $id = \dokuwiki\plugin\dokullm\parseFilePath($filePath);
158
159        // Use the first part of the document ID as collection name, fallback to 'documents'
160        $idParts = explode(':', $id);
161        $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents';
162
163        // Clean the ID and check ACL
164        $cleanId = cleanID($id);
165        //if (auth_quickaclcheck($cleanId) < AUTH_READ) {
166        //    $this->error("You are not allowed to read this file: $id");
167        //    return;
168        //}
169
170        try {
171            // Process the file using the class method
172            $result = $chroma->processSingleFile($filePath, $collectionName, $collectionChecked);
173
174            // Handle the result with verbose output
175            if ($verbose && !empty($result['collection_status'])) {
176                $this->info($result['collection_status']);
177            }
178
179            switch ($result['status']) {
180                case 'success':
181                    if ($verbose) {
182                        $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB...");
183                    }
184                    $this->success("Successfully sent file to ChromaDB:");
185                    $this->info("  Document ID: " . $result['details']['document_id']);
186                    if ($verbose) {
187                        $this->info("  Chunks: " . $result['details']['chunks']);
188                        $this->info("  Host: $host:$port");
189                        $this->info("  Tenant: $tenant");
190                        $this->info("  Database: $database");
191                        $this->info("  Collection: " . $result['details']['collection']);
192                    }
193                    break;
194
195                case 'skipped':
196                    if ($verbose) {
197                        $this->info($result['message']);
198                    }
199                    break;
200
201                case 'error':
202                    $this->error($result['message']);
203                    break;
204            }
205        } catch (Exception $e) {
206            $this->error("Error sending file to ChromaDB: " . $e->getMessage());
207            return;
208        }
209    }
210
211    /**
212     * Process all DokuWiki files in a directory and send them to ChromaDB
213     */
214    private function processDirectory($dirPath, $chroma, $host, $port, $tenant, $database, $verbose = false) {
215        if ($verbose) {
216            $this->info("Processing directory: $dirPath");
217        }
218
219        // Check if directory exists
220        if (!is_dir($dirPath)) {
221            $this->error("Directory does not exist: $dirPath");
222            return;
223        }
224
225        // Create RecursiveIteratorIterator to process directories recursively
226        $iterator = new RecursiveIteratorIterator(
227            new RecursiveDirectoryIterator($dirPath, RecursiveDirectoryIterator::SKIP_DOTS),
228            RecursiveIteratorIterator::LEAVES_ONLY
229        );
230
231        $files = [];
232        foreach ($iterator as $file) {
233            // Process only .txt files that don't start with underscore
234            if ($file->isFile() && $file->getExtension() === 'txt' && $file->getFilename()[0] !== '_') {
235                $files[] = $file->getPathname();
236            }
237        }
238
239        // Skip if no files
240        if (empty($files)) {
241            if ($verbose) {
242                $this->info("No .txt files found in directory: $dirPath");
243            }
244            return;
245        }
246
247        if ($verbose) {
248            $this->info("Found " . count($files) . " files to process.");
249        }
250
251        // Use the first part of the document ID as collection name, fallback to 'documents'
252        $sampleFile = $files[0];
253        $id = \dokuwiki\plugin\dokullm\parseFilePath($sampleFile);
254        $idParts = explode(':', $id);
255        $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents';
256
257        try {
258            $collectionStatus = $chroma->ensureCollectionExists($collectionName);
259            if ($verbose) {
260                $this->info($collectionStatus);
261            }
262            $collectionChecked = true;
263        } catch (Exception $e) {
264            $collectionChecked = true;
265        }
266
267        // Process each file
268        $processedCount = 0;
269        $skippedCount = 0;
270        $errorCount = 0;
271
272        foreach ($files as $file) {
273            if ($verbose) {
274                $this->info("\nProcessing file: $file");
275            }
276
277            try {
278                $result = $chroma->processSingleFile($file, $collectionName, $collectionChecked);
279
280                // Handle the result with verbose output
281                if ($verbose && !empty($result['collection_status'])) {
282                    $this->info($result['collection_status']);
283                }
284
285                switch ($result['status']) {
286                    case 'success':
287                        $processedCount++;
288                        if ($verbose) {
289                            $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB...");
290                        }
291                        $this->success("Successfully sent file to ChromaDB:");
292                        $this->info("  Document ID: " . $result['details']['document_id']);
293                        if ($verbose) {
294                            $this->info("  Chunks: " . $result['details']['chunks']);
295                            $this->info("  Host: $host:$port");
296                            $this->info("  Tenant: $tenant");
297                            $this->info("  Database: $database");
298                            $this->info("  Collection: " . $result['details']['collection']);
299                        }
300                        break;
301
302                    case 'skipped':
303                        $skippedCount++;
304                        if ($verbose) {
305                            $this->info($result['message']);
306                        }
307                        break;
308
309                    case 'error':
310                        $errorCount++;
311                        $this->error($result['message']);
312                        break;
313                }
314            } catch (Exception $e) {
315                $errorCount++;
316                $this->error("Error processing file $file: " . $e->getMessage());
317            }
318        }
319
320        if ($verbose) {
321            $this->info("\nFinished processing directory.");
322            $this->info("Processing summary:");
323            $this->info("  Processed: $processedCount files");
324            $this->info("  Skipped: $skippedCount files");
325            $this->info("  Errors: $errorCount files");
326        } else {
327            // Even in non-verbose mode, show summary stats if there were processed files
328            if ($processedCount > 0 || $skippedCount > 0 || $errorCount > 0) {
329                $this->info("Processing summary:");
330                if ($processedCount > 0) {
331                    $this->info("  Processed: $processedCount files");
332                }
333                if ($skippedCount > 0) {
334                    $this->info("  Skipped: $skippedCount files");
335                }
336                if ($errorCount > 0) {
337                    $this->info("  Errors: $errorCount files");
338                }
339            }
340        }
341    }
342
343    /**
344     * Query ChromaDB for similar documents
345     */
346    private function queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
347        // Create ChromaDB client
348        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel);
349
350        try {
351            // Query the specified collection by collection
352            $results = $chroma->queryCollection($collection, [$searchTerms], $limit);
353
354            $this->info("Query results for: \"$searchTerms\"");
355            $this->info("Host: $host:$port");
356            $this->info("Tenant: $tenant");
357            $this->info("Database: $database");
358            $this->info("Collection: $collection");
359            $this->info("==========================================");
360
361            if (empty($results['ids'][0])) {
362                $this->info("No results found.");
363                return;
364            }
365
366            for ($i = 0; $i < count($results['ids'][0]); $i++) {
367                $this->info("Result " . ($i + 1) . ":");
368                $this->info("  ID: " . $results['ids'][0][$i]);
369                $this->info("  Distance: " . $results['distances'][0][$i]);
370                $this->info("  Document: " . substr($results['documents'][0][$i], 0, 255) . "...");
371
372                if (isset($results['metadatas'][0][$i])) {
373                    $this->info("  Metadata: " . json_encode($results['metadatas'][0][$i]));
374                }
375                $this->info("");
376            }
377        } catch (Exception $e) {
378            $this->error("Error querying ChromaDB: " . $e->getMessage());
379            return;
380        }
381    }
382
383    /**
384     * Check if the ChromaDB server is alive
385     */
386    private function checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
387        // Create ChromaDB client
388        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel);
389
390        try {
391            if ($verbose) {
392                $this->info("Checking ChromaDB server status...");
393                $this->info("Host: $host:$port");
394                $this->info("Tenant: $tenant");
395                $this->info("Database: $database");
396                $this->info("==========================================");
397            }
398
399            $result = $chroma->heartbeat();
400
401            $this->success("Server is alive!");
402            $this->info("Response: " . json_encode($result));
403        } catch (Exception $e) {
404            $this->error("Error checking ChromaDB server status: " . $e->getMessage());
405            return;
406        }
407    }
408
409    /**
410     * Get authentication and identity information from ChromaDB
411     */
412    private function checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
413        // Create ChromaDB client
414        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel);
415
416        try {
417            if ($verbose) {
418                $this->info("Checking ChromaDB identity...");
419                $this->info("Host: $host:$port");
420                $this->info("Tenant: $tenant");
421                $this->info("Database: $database");
422                $this->info("==========================================");
423            }
424
425            $result = $chroma->getIdentity();
426
427            $this->info("Identity information:");
428            $this->info("Response: " . json_encode($result, JSON_PRETTY_PRINT));
429        } catch (Exception $e) {
430            $this->error("Error checking ChromaDB identity: " . $e->getMessage());
431            return;
432        }
433    }
434
435    /**
436     * List all collections in the ChromaDB database
437     */
438    private function listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
439        // Create ChromaDB client
440        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel);
441
442        try {
443            if ($verbose) {
444                $this->info("Listing ChromaDB collections...");
445                $this->info("Host: $host:$port");
446                $this->info("Tenant: $tenant");
447                $this->info("Database: $database");
448                $this->info("==========================================");
449            }
450
451            $result = $chroma->listCollections();
452
453            if (empty($result)) {
454                $this->info("No collections found.");
455                return;
456            }
457
458            $this->info("Collections:");
459            foreach ($result as $collection) {
460                $this->info("  - " . (isset($collection['name']) ? $collection['name'] : json_encode($collection)));
461            }
462        } catch (Exception $e) {
463            $this->error("Error listing ChromaDB collections: " . $e->getMessage());
464            return;
465        }
466    }
467
468    /**
469     * Get a document by its ID from ChromaDB
470     */
471    private function getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
472        // If no collection specified, derive it from the first part of the document ID
473        if (empty($collection)) {
474            $idParts = explode(':', $documentId);
475            $collection = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents';
476        }
477
478        // Create ChromaDB client
479        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel);
480
481        try {
482            // Get the specified document by ID
483            $results = $chroma->getDocument($collection, $documentId);
484
485            if ($verbose) {
486                $this->info("Document retrieval results for: \"$documentId\"");
487                $this->info("Host: $host:$port");
488                $this->info("Tenant: $tenant");
489                $this->info("Database: $database");
490                $this->info("Collection: $collection");
491                $this->info("==========================================");
492            }
493
494            if (empty($results['ids'])) {
495                $this->info("No document found with ID: $documentId");
496                return;
497            }
498
499            for ($i = 0; $i < count($results['ids']); $i++) {
500                $this->info("Document " . ($i + 1) . ":");
501                $this->info("  ID: " . $results['ids'][$i]);
502
503                if (isset($results['documents'][$i])) {
504                    $this->info("  Content: " . $results['documents'][$i]);
505                }
506
507                if (isset($results['metadatas'][$i])) {
508                    $this->info("  Metadata: " . json_encode($results['metadatas'][$i], JSON_PRETTY_PRINT));
509                }
510                $this->info("");
511            }
512        } catch (Exception $e) {
513            $this->error("Error retrieving document from ChromaDB: " . $e->getMessage());
514            return;
515        }
516    }
517}
518