1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use splitbrain\phpcli\Options; 5 6if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../../../') . '/'); 7 8/** 9 * DokuWiki CLI plugin for ChromaDB operations 10 */ 11class cli_plugin_dokullm extends CLIPlugin { 12 13 /** 14 * Register options and arguments 15 * 16 * @param Options $options 17 */ 18 protected function setup(Options $options) { 19 // Set help text 20 $options->setHelp( 21 "ChromaDB CLI plugin for DokuLLM\n\n" . 22 "Usage: ./bin/plugin.php dokullm [action] [options]\n\n" . 23 "Actions:\n" . 24 " send Send a file or directory to ChromaDB\n" . 25 " query Query ChromaDB\n" . 26 " heartbeat Check if ChromaDB server is alive\n" . 27 " identity Get authentication and identity information\n" . 28 " list List all collections\n" . 29 " get Get a document by its ID\n" 30 ); 31 32 // Global options 33 $options->registerOption('host', 'ChromaDB server host', 'h', 'host', 'localhost'); 34 $options->registerOption('port', 'ChromaDB server port', 'p', 'port', '8000'); 35 $options->registerOption('tenant', 'ChromaDB tenant', null, 'tenant', 'default_tenant'); 36 $options->registerOption('database', 'ChromaDB database', null, 'database', 'default_database'); 37 $options->registerOption('ollama-host', 'Ollama server host', null, 'ollama-host', 'localhost'); 38 $options->registerOption('ollama-port', 'Ollama server port', null, 'ollama-port', '11434'); 39 $options->registerOption('ollama-model', 'Ollama embeddings model', null, 'ollama-model', 'nomic-embed-text'); 40 $options->registerOption('verbose', 'Enable verbose output', 'v'); 41 42 // Action-specific options 43 $options->registerCommand('send', 'Send a file or directory to ChromaDB'); 44 $options->registerArgument('path', 'File or directory path', true, 'send'); 45 46 $options->registerCommand('query', 'Query ChromaDB'); 47 $options->registerOption('collection', 'Collection name to query', 'c', 'collection', 'documents', 'query'); 48 $options->registerOption('limit', 'Number of results to return', 'l', 'limit', '5', 'query'); 49 $options->registerArgument('search', 'Search terms', true, 'query'); 50 51 $options->registerCommand('heartbeat', 'Check if ChromaDB server is alive'); 52 53 $options->registerCommand('identity', 'Get authentication and identity information'); 54 55 $options->registerCommand('list', 'List all collections'); 56 57 $options->registerCommand('get', 'Get a document by its ID'); 58 $options->registerOption('collection', 'Collection name', 'c', 'collection', 'documents', 'get'); 59 $options->registerArgument('id', 'Document ID', true, 'get'); 60 } 61 62 /** 63 * Main plugin logic 64 * 65 * @param Options $options 66 */ 67 protected function main(Options $options) { 68 // Include the ChromaDBClient class 69 require_once dirname(__FILE__) . '/ChromaDBClient.php'; 70 71 $action = $options->getCmd(); 72 $verbose = $options->getOpt('verbose'); 73 74 // Get global options with defaults 75 $host = $options->getOpt('host', 'localhost'); 76 $port = (int)$options->getOpt('port', 8000); 77 $tenant = $options->getOpt('tenant', 'default_tenant'); 78 $database = $options->getOpt('database', 'default_database'); 79 $ollamaHost = $options->getOpt('ollama-host', 'localhost'); 80 $ollamaPort = (int)$options->getOpt('ollama-port', 11434); 81 $ollamaModel = $options->getOpt('ollama-model', 'nomic-embed-text'); 82 83 switch ($action) { 84 case 'send': 85 $path = $options->getArgs()[0] ?? null; 86 if (!$path) { 87 $this->fatal('Missing file path for send action'); 88 } 89 $this->sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 90 break; 91 92 case 'query': 93 $searchTerms = $options->getArgs()[0] ?? null; 94 if (!$searchTerms) { 95 $this->fatal('Missing search terms for query action'); 96 } 97 $collection = $options->getOpt('collection', 'documents'); 98 $limit = (int)$options->getOpt('limit', 5); 99 $this->queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 100 break; 101 102 case 'heartbeat': 103 $this->checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 104 break; 105 106 case 'identity': 107 $this->checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 108 break; 109 110 case 'list': 111 $this->listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 112 break; 113 114 case 'get': 115 $documentId = $options->getArgs()[0] ?? null; 116 if (!$documentId) { 117 $this->fatal('Missing document ID for get action'); 118 } 119 $collection = $options->getOpt('collection', null); 120 $this->getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 121 break; 122 123 default: 124 echo $options->help(); 125 exit(1); 126 } 127 } 128 129 /** 130 * Send a file or directory of files to ChromaDB 131 */ 132 private function sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 133 // Create ChromaDB client 134 $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 135 136 if (is_dir($path)) { 137 // Process directory 138 $this->processDirectory($path, $chroma, $host, $port, $tenant, $database, $verbose); 139 } else { 140 // Process single file 141 if (!file_exists($path)) { 142 $this->error("File does not exist: $path"); 143 return; 144 } 145 146 // Skip files that start with underscore 147 $filename = basename($path); 148 if ($filename[0] === '_') { 149 if ($verbose) { 150 $this->info("Skipping file (starts with underscore): $path"); 151 } 152 return; 153 } 154 155 $this->processSingleFile($path, $chroma, $host, $port, $tenant, $database, false, $verbose); 156 } 157 } 158 159 /** 160 * Process a single DokuWiki file and send it to ChromaDB 161 */ 162 private function processSingleFile($filePath, $chroma, $host, $port, $tenant, $database, $collectionChecked = false, $verbose = false) { 163 // Parse file path to extract metadata 164 $id = \dokuwiki\plugin\dokullm\parseFilePath($filePath); 165 166 // Use the first part of the document ID as collection name, fallback to 'documents' 167 $idParts = explode(':', $id); 168 $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents'; 169 170 // Clean the ID and check ACL 171 $cleanId = cleanID($id); 172 if (auth_quickaclcheck($cleanId) < AUTH_READ) { 173 $this->error("You are not allowed to read this file: $id"); 174 return; 175 } 176 177 try { 178 // Process the file using the class method 179 $result = $chroma->processSingleFile($filePath, $collectionName, $collectionChecked); 180 181 // Handle the result with verbose output 182 if ($verbose && !empty($result['collection_status'])) { 183 $this->info($result['collection_status']); 184 } 185 186 switch ($result['status']) { 187 case 'success': 188 if ($verbose) { 189 $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB..."); 190 } 191 $this->success("Successfully sent file to ChromaDB:"); 192 $this->info(" Document ID: " . $result['details']['document_id']); 193 if ($verbose) { 194 $this->info(" Chunks: " . $result['details']['chunks']); 195 $this->info(" Host: $host:$port"); 196 $this->info(" Tenant: $tenant"); 197 $this->info(" Database: $database"); 198 $this->info(" Collection: " . $result['details']['collection']); 199 } 200 break; 201 202 case 'skipped': 203 if ($verbose) { 204 $this->info($result['message']); 205 } 206 break; 207 208 case 'error': 209 $this->error($result['message']); 210 break; 211 } 212 } catch (Exception $e) { 213 $this->error("Error sending file to ChromaDB: " . $e->getMessage()); 214 return; 215 } 216 } 217 218 /** 219 * Process all DokuWiki files in a directory and send them to ChromaDB 220 */ 221 private function processDirectory($dirPath, $chroma, $host, $port, $tenant, $database, $verbose = false) { 222 if ($verbose) { 223 $this->info("Processing directory: $dirPath"); 224 } 225 226 // Check if directory exists 227 if (!is_dir($dirPath)) { 228 $this->error("Directory does not exist: $dirPath"); 229 return; 230 } 231 232 // Create RecursiveIteratorIterator to process directories recursively 233 $iterator = new RecursiveIteratorIterator( 234 new RecursiveDirectoryIterator($dirPath, RecursiveDirectoryIterator::SKIP_DOTS), 235 RecursiveIteratorIterator::LEAVES_ONLY 236 ); 237 238 $files = []; 239 foreach ($iterator as $file) { 240 // Process only .txt files that don't start with underscore 241 if ($file->isFile() && $file->getExtension() === 'txt' && $file->getFilename()[0] !== '_') { 242 $files[] = $file->getPathname(); 243 } 244 } 245 246 // Skip if no files 247 if (empty($files)) { 248 if ($verbose) { 249 $this->info("No .txt files found in directory: $dirPath"); 250 } 251 return; 252 } 253 254 if ($verbose) { 255 $this->info("Found " . count($files) . " files to process."); 256 } 257 258 // Use the first part of the document ID as collection name, fallback to 'documents' 259 $sampleFile = $files[0]; 260 $id = \dokuwiki\plugin\dokullm\parseFilePath($sampleFile); 261 $idParts = explode(':', $id); 262 $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents'; 263 264 try { 265 $collectionStatus = $chroma->ensureCollectionExists($collectionName); 266 if ($verbose) { 267 $this->info($collectionStatus); 268 } 269 $collectionChecked = true; 270 } catch (Exception $e) { 271 $collectionChecked = true; 272 } 273 274 // Process each file 275 $processedCount = 0; 276 $skippedCount = 0; 277 $errorCount = 0; 278 279 foreach ($files as $file) { 280 if ($verbose) { 281 $this->info("\nProcessing file: $file"); 282 } 283 284 try { 285 $result = $chroma->processSingleFile($file, $collectionName, $collectionChecked); 286 287 // Handle the result with verbose output 288 if ($verbose && !empty($result['collection_status'])) { 289 $this->info($result['collection_status']); 290 } 291 292 switch ($result['status']) { 293 case 'success': 294 $processedCount++; 295 if ($verbose) { 296 $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB..."); 297 } 298 $this->success("Successfully sent file to ChromaDB:"); 299 $this->info(" Document ID: " . $result['details']['document_id']); 300 if ($verbose) { 301 $this->info(" Chunks: " . $result['details']['chunks']); 302 $this->info(" Host: $host:$port"); 303 $this->info(" Tenant: $tenant"); 304 $this->info(" Database: $database"); 305 $this->info(" Collection: " . $result['details']['collection']); 306 } 307 break; 308 309 case 'skipped': 310 $skippedCount++; 311 if ($verbose) { 312 $this->info($result['message']); 313 } 314 break; 315 316 case 'error': 317 $errorCount++; 318 $this->error($result['message']); 319 break; 320 } 321 } catch (Exception $e) { 322 $errorCount++; 323 $this->error("Error processing file $file: " . $e->getMessage()); 324 } 325 } 326 327 if ($verbose) { 328 $this->info("\nFinished processing directory."); 329 $this->info("Processing summary:"); 330 $this->info(" Processed: $processedCount files"); 331 $this->info(" Skipped: $skippedCount files"); 332 $this->info(" Errors: $errorCount files"); 333 } else { 334 // Even in non-verbose mode, show summary stats if there were processed files 335 if ($processedCount > 0 || $skippedCount > 0 || $errorCount > 0) { 336 $this->info("Processing summary:"); 337 if ($processedCount > 0) { 338 $this->info(" Processed: $processedCount files"); 339 } 340 if ($skippedCount > 0) { 341 $this->info(" Skipped: $skippedCount files"); 342 } 343 if ($errorCount > 0) { 344 $this->info(" Errors: $errorCount files"); 345 } 346 } 347 } 348 } 349 350 /** 351 * Query ChromaDB for similar documents 352 */ 353 private function queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 354 // Create ChromaDB client 355 $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel); 356 357 try { 358 // Query the specified collection by collection 359 $results = $chroma->queryCollection($collection, [$searchTerms], $limit); 360 361 $this->info("Query results for: \"$searchTerms\""); 362 $this->info("Host: $host:$port"); 363 $this->info("Tenant: $tenant"); 364 $this->info("Database: $database"); 365 $this->info("Collection: $collection"); 366 $this->info("=========================================="); 367 368 if (empty($results['ids'][0])) { 369 $this->info("No results found."); 370 return; 371 } 372 373 for ($i = 0; $i < count($results['ids'][0]); $i++) { 374 $this->info("Result " . ($i + 1) . ":"); 375 $this->info(" ID: " . $results['ids'][0][$i]); 376 $this->info(" Distance: " . $results['distances'][0][$i]); 377 $this->info(" Document: " . substr($results['documents'][0][$i], 0, 255) . "..."); 378 379 if (isset($results['metadatas'][0][$i])) { 380 $this->info(" Metadata: " . json_encode($results['metadatas'][0][$i])); 381 } 382 $this->info(""); 383 } 384 } catch (Exception $e) { 385 $this->error("Error querying ChromaDB: " . $e->getMessage()); 386 return; 387 } 388 } 389 390 /** 391 * Check if the ChromaDB server is alive 392 */ 393 private function checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 394 // Create ChromaDB client 395 $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 396 397 try { 398 if ($verbose) { 399 $this->info("Checking ChromaDB server status..."); 400 $this->info("Host: $host:$port"); 401 $this->info("Tenant: $tenant"); 402 $this->info("Database: $database"); 403 $this->info("=========================================="); 404 } 405 406 $result = $chroma->heartbeat(); 407 408 $this->success("Server is alive!"); 409 $this->info("Response: " . json_encode($result)); 410 } catch (Exception $e) { 411 $this->error("Error checking ChromaDB server status: " . $e->getMessage()); 412 return; 413 } 414 } 415 416 /** 417 * Get authentication and identity information from ChromaDB 418 */ 419 private function checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 420 // Create ChromaDB client 421 $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 422 423 try { 424 if ($verbose) { 425 $this->info("Checking ChromaDB identity..."); 426 $this->info("Host: $host:$port"); 427 $this->info("Tenant: $tenant"); 428 $this->info("Database: $database"); 429 $this->info("=========================================="); 430 } 431 432 $result = $chroma->getIdentity(); 433 434 $this->info("Identity information:"); 435 $this->info("Response: " . json_encode($result, JSON_PRETTY_PRINT)); 436 } catch (Exception $e) { 437 $this->error("Error checking ChromaDB identity: " . $e->getMessage()); 438 return; 439 } 440 } 441 442 /** 443 * List all collections in the ChromaDB database 444 */ 445 private function listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 446 // Create ChromaDB client 447 $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 448 449 try { 450 if ($verbose) { 451 $this->info("Listing ChromaDB collections..."); 452 $this->info("Host: $host:$port"); 453 $this->info("Tenant: $tenant"); 454 $this->info("Database: $database"); 455 $this->info("=========================================="); 456 } 457 458 $result = $chroma->listCollections(); 459 460 if (empty($result)) { 461 $this->info("No collections found."); 462 return; 463 } 464 465 $this->info("Collections:"); 466 foreach ($result as $collection) { 467 $this->info(" - " . (isset($collection['name']) ? $collection['name'] : json_encode($collection))); 468 } 469 } catch (Exception $e) { 470 $this->error("Error listing ChromaDB collections: " . $e->getMessage()); 471 return; 472 } 473 } 474 475 /** 476 * Get a document by its ID from ChromaDB 477 */ 478 private function getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 479 // If no collection specified, derive it from the first part of the document ID 480 if (empty($collection)) { 481 $idParts = explode(':', $documentId); 482 $collection = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents'; 483 } 484 485 // Create ChromaDB client 486 $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel); 487 488 try { 489 // Get the specified document by ID 490 $results = $chroma->getDocument($collection, $documentId); 491 492 if ($verbose) { 493 $this->info("Document retrieval results for: \"$documentId\""); 494 $this->info("Host: $host:$port"); 495 $this->info("Tenant: $tenant"); 496 $this->info("Database: $database"); 497 $this->info("Collection: $collection"); 498 $this->info("=========================================="); 499 } 500 501 if (empty($results['ids'])) { 502 $this->info("No document found with ID: $documentId"); 503 return; 504 } 505 506 for ($i = 0; $i < count($results['ids']); $i++) { 507 $this->info("Document " . ($i + 1) . ":"); 508 $this->info(" ID: " . $results['ids'][$i]); 509 510 if (isset($results['documents'][$i])) { 511 $this->info(" Content: " . $results['documents'][$i]); 512 } 513 514 if (isset($results['metadatas'][$i])) { 515 $this->info(" Metadata: " . json_encode($results['metadatas'][$i], JSON_PRETTY_PRINT)); 516 } 517 $this->info(""); 518 } 519 } catch (Exception $e) { 520 $this->error("Error retrieving document from ChromaDB: " . $e->getMessage()); 521 return; 522 } 523 } 524} 525