xref: /plugin/dokullm/cli.php (revision a15292f0b247578d5a39202498206e2373e7d9e3)
1<?php
2
3use dokuwiki\Extension\CLIPlugin;
4use splitbrain\phpcli\Options;
5
6if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../../../') . '/');
7
8/**
9 * DokuWiki CLI plugin for ChromaDB operations
10 */
11class cli_plugin_dokullm extends CLIPlugin {
12
13    /**
14     * Register options and arguments
15     *
16     * @param Options $options
17     */
18    protected function setup(Options $options) {
19        // Set help text
20        $options->setHelp(
21            "ChromaDB CLI plugin for DokuLLM\n\n" .
22            "Usage: ./bin/plugin.php dokullm [action] [options]\n\n" .
23            "Actions:\n" .
24            "  send       Send a file or directory to ChromaDB\n" .
25            "  query      Query ChromaDB\n" .
26            "  heartbeat  Check if ChromaDB server is alive\n" .
27            "  identity   Get authentication and identity information\n" .
28            "  list       List all collections\n" .
29            "  get        Get a document by its ID\n"
30        );
31
32        // Global options
33        $options->registerOption('host', 'ChromaDB server host', 'h', 'host', 'localhost');
34        $options->registerOption('port', 'ChromaDB server port', 'p', 'port', '8000');
35        $options->registerOption('tenant', 'ChromaDB tenant', null, 'tenant', 'default_tenant');
36        $options->registerOption('database', 'ChromaDB database', null, 'database', 'default_database');
37        $options->registerOption('ollama-host', 'Ollama server host', null, 'ollama-host', 'localhost');
38        $options->registerOption('ollama-port', 'Ollama server port', null, 'ollama-port', '11434');
39        $options->registerOption('ollama-model', 'Ollama embeddings model', null, 'ollama-model', 'nomic-embed-text');
40        $options->registerOption('verbose', 'Enable verbose output', 'v');
41
42        // Action-specific options
43        $options->registerCommand('send', 'Send a file or directory to ChromaDB');
44        $options->registerArgument('path', 'File or directory path', true, 'send');
45
46        $options->registerCommand('query', 'Query ChromaDB');
47        $options->registerOption('collection', 'Collection name to query', 'c', 'collection', 'documents', 'query');
48        $options->registerOption('limit', 'Number of results to return', 'l', 'limit', '5', 'query');
49        $options->registerArgument('search', 'Search terms', true, 'query');
50
51        $options->registerCommand('heartbeat', 'Check if ChromaDB server is alive');
52
53        $options->registerCommand('identity', 'Get authentication and identity information');
54
55        $options->registerCommand('list', 'List all collections');
56
57        $options->registerCommand('get', 'Get a document by its ID');
58        $options->registerOption('collection', 'Collection name', 'c', 'collection', 'documents', 'get');
59        $options->registerArgument('id', 'Document ID', true, 'get');
60    }
61
62    /**
63     * Main plugin logic
64     *
65     * @param Options $options
66     */
67    protected function main(Options $options) {
68        // Include the ChromaDBClient class
69        require_once dirname(__FILE__) . '/ChromaDBClient.php';
70
71        // Get global options with defaults
72        $host = $options->getOpt('host', 'localhost');
73        $port = (int)$options->getOpt('port', 8000);
74        $tenant = $options->getOpt('tenant', 'default_tenant');
75        $database = $options->getOpt('database', 'default_database');
76        $ollamaHost = $options->getOpt('ollama-host', 'localhost');
77        $ollamaPort = (int)$options->getOpt('ollama-port', 11434);
78        $ollamaModel = $options->getOpt('ollama-model', 'nomic-embed-text');
79        $verbose = $options->getOpt('verbose');
80
81        $action = $options->getCmd();
82
83        switch ($action) {
84            case 'send':
85                $path = $options->getArgs()[0] ?? null;
86                if (!$path) {
87                    $this->fatal('Missing file path for send action');
88                }
89                $this->sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
90                break;
91
92            case 'query':
93                $searchTerms = $options->getArgs()[0] ?? null;
94                if (!$searchTerms) {
95                    $this->fatal('Missing search terms for query action');
96                }
97                $collection = $options->getOpt('collection', 'documents');
98                $limit = (int)$options->getOpt('limit', 5);
99                $this->queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
100                break;
101
102            case 'heartbeat':
103                $this->checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
104                break;
105
106            case 'identity':
107                $this->checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
108                break;
109
110            case 'list':
111                $this->listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
112                break;
113
114            case 'get':
115                $documentId = $options->getArgs()[0] ?? null;
116                if (!$documentId) {
117                    $this->fatal('Missing document ID for get action');
118                }
119                $collection = $options->getOpt('collection', null);
120                $this->getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose);
121                break;
122
123            default:
124                echo $options->help();
125                exit(1);
126        }
127    }
128
129    /**
130     * Send a file or directory of files to ChromaDB
131     */
132    private function sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
133        // Create ChromaDB client
134        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel);
135
136        if (is_dir($path)) {
137            // Process directory
138            $this->processDirectory($path, $chroma, $host, $port, $tenant, $database, $verbose);
139        } else {
140            // Process single file
141            if (!file_exists($path)) {
142                $this->error("File does not exist: $path");
143                return;
144            }
145
146            // Skip files that start with underscore
147            $filename = basename($path);
148            if ($filename[0] === '_') {
149                if ($verbose) {
150                    $this->info("Skipping file (starts with underscore): $path");
151                }
152                return;
153            }
154
155            $this->processSingleFile($path, $chroma, $host, $port, $tenant, $database, false, $verbose);
156        }
157    }
158
159    /**
160     * Process a single DokuWiki file and send it to ChromaDB
161     */
162    private function processSingleFile($filePath, $chroma, $host, $port, $tenant, $database, $collectionChecked = false, $verbose = false) {
163        // Parse file path to extract metadata
164        $id = \dokuwiki\plugin\dokullm\parseFilePath($filePath);
165
166        // Use the first part of the document ID as collection name, fallback to 'documents'
167        $idParts = explode(':', $id);
168        $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents';
169
170        // Clean the ID and check ACL
171        $cleanId = cleanID($id);
172        if (auth_quickaclcheck($cleanId) < AUTH_READ) {
173            $this->error("You are not allowed to read this file: $id");
174            return;
175        }
176
177        try {
178            // Process the file using the class method
179            $result = $chroma->processSingleFile($filePath, $collectionName, $collectionChecked);
180
181            // Handle the result with verbose output
182            if ($verbose && !empty($result['collection_status'])) {
183                $this->info($result['collection_status']);
184            }
185
186            switch ($result['status']) {
187                case 'success':
188                    if ($verbose) {
189                        $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB...");
190                    }
191                    $this->success("Successfully sent file to ChromaDB:");
192                    $this->info("  Document ID: " . $result['details']['document_id']);
193                    if ($verbose) {
194                        $this->info("  Chunks: " . $result['details']['chunks']);
195                        $this->info("  Host: $host:$port");
196                        $this->info("  Tenant: $tenant");
197                        $this->info("  Database: $database");
198                        $this->info("  Collection: " . $result['details']['collection']);
199                    }
200                    break;
201
202                case 'skipped':
203                    if ($verbose) {
204                        $this->info($result['message']);
205                    }
206                    break;
207
208                case 'error':
209                    $this->error($result['message']);
210                    break;
211            }
212        } catch (Exception $e) {
213            $this->error("Error sending file to ChromaDB: " . $e->getMessage());
214            return;
215        }
216    }
217
218    /**
219     * Process all DokuWiki files in a directory and send them to ChromaDB
220     */
221    private function processDirectory($dirPath, $chroma, $host, $port, $tenant, $database, $verbose = false) {
222        if ($verbose) {
223            $this->info("Processing directory: $dirPath");
224        }
225
226        // Check if directory exists
227        if (!is_dir($dirPath)) {
228            $this->error("Directory does not exist: $dirPath");
229            return;
230        }
231
232        // Create RecursiveIteratorIterator to process directories recursively
233        $iterator = new RecursiveIteratorIterator(
234            new RecursiveDirectoryIterator($dirPath, RecursiveDirectoryIterator::SKIP_DOTS),
235            RecursiveIteratorIterator::LEAVES_ONLY
236        );
237
238        $files = [];
239        foreach ($iterator as $file) {
240            // Process only .txt files that don't start with underscore
241            if ($file->isFile() && $file->getExtension() === 'txt' && $file->getFilename()[0] !== '_') {
242                $files[] = $file->getPathname();
243            }
244        }
245
246        // Skip if no files
247        if (empty($files)) {
248            if ($verbose) {
249                $this->info("No .txt files found in directory: $dirPath");
250            }
251            return;
252        }
253
254        if ($verbose) {
255            $this->info("Found " . count($files) . " files to process.");
256        }
257
258        // Use the first part of the document ID as collection name, fallback to 'documents'
259        $sampleFile = $files[0];
260        $id = \dokuwiki\plugin\dokullm\parseFilePath($sampleFile);
261        $idParts = explode(':', $id);
262        $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents';
263
264        try {
265            $collectionStatus = $chroma->ensureCollectionExists($collectionName);
266            if ($verbose) {
267                $this->info($collectionStatus);
268            }
269            $collectionChecked = true;
270        } catch (Exception $e) {
271            $collectionChecked = true;
272        }
273
274        // Process each file
275        $processedCount = 0;
276        $skippedCount = 0;
277        $errorCount = 0;
278
279        foreach ($files as $file) {
280            if ($verbose) {
281                $this->info("\nProcessing file: $file");
282            }
283
284            try {
285                $result = $chroma->processSingleFile($file, $collectionName, $collectionChecked);
286
287                // Handle the result with verbose output
288                if ($verbose && !empty($result['collection_status'])) {
289                    $this->info($result['collection_status']);
290                }
291
292                switch ($result['status']) {
293                    case 'success':
294                        $processedCount++;
295                        if ($verbose) {
296                            $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB...");
297                        }
298                        $this->success("Successfully sent file to ChromaDB:");
299                        $this->info("  Document ID: " . $result['details']['document_id']);
300                        if ($verbose) {
301                            $this->info("  Chunks: " . $result['details']['chunks']);
302                            $this->info("  Host: $host:$port");
303                            $this->info("  Tenant: $tenant");
304                            $this->info("  Database: $database");
305                            $this->info("  Collection: " . $result['details']['collection']);
306                        }
307                        break;
308
309                    case 'skipped':
310                        $skippedCount++;
311                        if ($verbose) {
312                            $this->info($result['message']);
313                        }
314                        break;
315
316                    case 'error':
317                        $errorCount++;
318                        $this->error($result['message']);
319                        break;
320                }
321            } catch (Exception $e) {
322                $errorCount++;
323                $this->error("Error processing file $file: " . $e->getMessage());
324            }
325        }
326
327        if ($verbose) {
328            $this->info("\nFinished processing directory.");
329            $this->info("Processing summary:");
330            $this->info("  Processed: $processedCount files");
331            $this->info("  Skipped: $skippedCount files");
332            $this->info("  Errors: $errorCount files");
333        } else {
334            // Even in non-verbose mode, show summary stats if there were processed files
335            if ($processedCount > 0 || $skippedCount > 0 || $errorCount > 0) {
336                $this->info("Processing summary:");
337                if ($processedCount > 0) {
338                    $this->info("  Processed: $processedCount files");
339                }
340                if ($skippedCount > 0) {
341                    $this->info("  Skipped: $skippedCount files");
342                }
343                if ($errorCount > 0) {
344                    $this->info("  Errors: $errorCount files");
345                }
346            }
347        }
348    }
349
350    /**
351     * Query ChromaDB for similar documents
352     */
353    private function queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
354        // Create ChromaDB client
355        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel);
356
357        try {
358            // Query the specified collection by collection
359            $results = $chroma->queryCollection($collection, [$searchTerms], $limit);
360
361            $this->info("Query results for: \"$searchTerms\"");
362            $this->info("Host: $host:$port");
363            $this->info("Tenant: $tenant");
364            $this->info("Database: $database");
365            $this->info("Collection: $collection");
366            $this->info("==========================================");
367
368            if (empty($results['ids'][0])) {
369                $this->info("No results found.");
370                return;
371            }
372
373            for ($i = 0; $i < count($results['ids'][0]); $i++) {
374                $this->info("Result " . ($i + 1) . ":");
375                $this->info("  ID: " . $results['ids'][0][$i]);
376                $this->info("  Distance: " . $results['distances'][0][$i]);
377                $this->info("  Document: " . substr($results['documents'][0][$i], 0, 255) . "...");
378
379                if (isset($results['metadatas'][0][$i])) {
380                    $this->info("  Metadata: " . json_encode($results['metadatas'][0][$i]));
381                }
382                $this->info("");
383            }
384        } catch (Exception $e) {
385            $this->error("Error querying ChromaDB: " . $e->getMessage());
386            return;
387        }
388    }
389
390    /**
391     * Check if the ChromaDB server is alive
392     */
393    private function checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
394        // Create ChromaDB client
395        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel);
396
397        try {
398            if ($verbose) {
399                $this->info("Checking ChromaDB server status...");
400                $this->info("Host: $host:$port");
401                $this->info("Tenant: $tenant");
402                $this->info("Database: $database");
403                $this->info("==========================================");
404            }
405
406            $result = $chroma->heartbeat();
407
408            $this->success("Server is alive!");
409            $this->info("Response: " . json_encode($result));
410        } catch (Exception $e) {
411            $this->error("Error checking ChromaDB server status: " . $e->getMessage());
412            return;
413        }
414    }
415
416    /**
417     * Get authentication and identity information from ChromaDB
418     */
419    private function checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
420        // Create ChromaDB client
421        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel);
422
423        try {
424            if ($verbose) {
425                $this->info("Checking ChromaDB identity...");
426                $this->info("Host: $host:$port");
427                $this->info("Tenant: $tenant");
428                $this->info("Database: $database");
429                $this->info("==========================================");
430            }
431
432            $result = $chroma->getIdentity();
433
434            $this->info("Identity information:");
435            $this->info("Response: " . json_encode($result, JSON_PRETTY_PRINT));
436        } catch (Exception $e) {
437            $this->error("Error checking ChromaDB identity: " . $e->getMessage());
438            return;
439        }
440    }
441
442    /**
443     * List all collections in the ChromaDB database
444     */
445    private function listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
446        // Create ChromaDB client
447        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel);
448
449        try {
450            if ($verbose) {
451                $this->info("Listing ChromaDB collections...");
452                $this->info("Host: $host:$port");
453                $this->info("Tenant: $tenant");
454                $this->info("Database: $database");
455                $this->info("==========================================");
456            }
457
458            $result = $chroma->listCollections();
459
460            if (empty($result)) {
461                $this->info("No collections found.");
462                return;
463            }
464
465            $this->info("Collections:");
466            foreach ($result as $collection) {
467                $this->info("  - " . (isset($collection['name']) ? $collection['name'] : json_encode($collection)));
468            }
469        } catch (Exception $e) {
470            $this->error("Error listing ChromaDB collections: " . $e->getMessage());
471            return;
472        }
473    }
474
475    /**
476     * Get a document by its ID from ChromaDB
477     */
478    private function getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) {
479        // If no collection specified, derive it from the first part of the document ID
480        if (empty($collection)) {
481            $idParts = explode(':', $documentId);
482            $collection = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents';
483        }
484
485        // Create ChromaDB client
486        $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel);
487
488        try {
489            // Get the specified document by ID
490            $results = $chroma->getDocument($collection, $documentId);
491
492            if ($verbose) {
493                $this->info("Document retrieval results for: \"$documentId\"");
494                $this->info("Host: $host:$port");
495                $this->info("Tenant: $tenant");
496                $this->info("Database: $database");
497                $this->info("Collection: $collection");
498                $this->info("==========================================");
499            }
500
501            if (empty($results['ids'])) {
502                $this->info("No document found with ID: $documentId");
503                return;
504            }
505
506            for ($i = 0; $i < count($results['ids']); $i++) {
507                $this->info("Document " . ($i + 1) . ":");
508                $this->info("  ID: " . $results['ids'][$i]);
509
510                if (isset($results['documents'][$i])) {
511                    $this->info("  Content: " . $results['documents'][$i]);
512                }
513
514                if (isset($results['metadatas'][$i])) {
515                    $this->info("  Metadata: " . json_encode($results['metadatas'][$i], JSON_PRETTY_PRINT));
516                }
517                $this->info("");
518            }
519        } catch (Exception $e) {
520            $this->error("Error retrieving document from ChromaDB: " . $e->getMessage());
521            return;
522        }
523    }
524}
525