1<?php 2 3use dokuwiki\Extension\CLIPlugin; 4use splitbrain\phpcli\Options; 5 6if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../../../') . '/'); 7 8/** 9 * DokuWiki CLI plugin for ChromaDB operations 10 */ 11class cli_plugin_dokullm extends CLIPlugin { 12 13 /** 14 * Register options and arguments 15 * 16 * @param Options $options 17 */ 18 protected function setup(Options $options) { 19 // Set help text 20 $options->setHelp( 21 "ChromaDB CLI plugin for DokuLLM\n\n" . 22 "Usage: ./bin/plugin.php dokullm [action] [options]\n\n" . 23 "Actions:\n" . 24 " send Send a file or directory to ChromaDB\n" . 25 " query Query ChromaDB\n" . 26 " heartbeat Check if ChromaDB server is alive\n" . 27 " identity Get authentication and identity information\n" . 28 " list List all collections\n" . 29 " get Get a document by its ID\n" 30 ); 31 32 // Global options 33 $options->registerOption('verbose', 'Enable verbose output', 'v'); 34 35 // Action-specific options 36 $options->registerCommand('send', 'Send a file or directory to ChromaDB'); 37 $options->registerArgument('path', 'File or directory path', true, 'send'); 38 39 $options->registerCommand('query', 'Query ChromaDB'); 40 //$options->registerOption('collection', 'Collection name to query', 'c', 'collection', 'documents', 'query'); 41 //$options->registerOption('limit', 'Number of results to return', 'l', 'limit', '5', 'query'); 42 $options->registerArgument('search', 'Search terms', true, 'query'); 43 44 $options->registerCommand('heartbeat', 'Check if ChromaDB server is alive'); 45 46 $options->registerCommand('identity', 'Get authentication and identity information'); 47 48 $options->registerCommand('list', 'List all collections'); 49 50 $options->registerCommand('get', 'Get a document by its ID'); 51 //$options->registerOption('collection', 'Collection name', 'c', 'collection', 'documents', 'get'); 52 $options->registerArgument('id', 'Document ID', true, 'get'); 53 } 54 55 /** 56 * Main plugin logic 57 * 58 * @param Options $options 59 */ 60 protected function main(Options $options) { 61 // Include the ChromaDBClient class 62 require_once dirname(__FILE__) . '/ChromaDBClient.php'; 63 64 // Get values from DokuWiki settings 65 $host = $this->getConf('chroma_host'); 66 $port = (int)$this->getConf('chroma_port'); 67 $tenant = $this->getConf('chroma_tenant'); 68 $database = $this->getConf('chroma_database'); 69 $ollamaHost = $this->getConf('ollama_host'); 70 $ollamaPort = (int)$this->getConf('ollama_port'); 71 $ollamaModel = $this->getConf('ollama_model'); 72 $verbose = $options->getOpt('verbose'); 73 74 $action = $options->getCmd(); 75 76 switch ($action) { 77 case 'send': 78 $path = $options->getArgs()[0] ?? null; 79 if (!$path) { 80 $this->fatal('Missing file path for send action'); 81 } 82 $this->sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 83 break; 84 85 case 'query': 86 $searchTerms = $options->getArgs()[0] ?? null; 87 if (!$searchTerms) { 88 $this->fatal('Missing search terms for query action'); 89 } 90 $collection = $options->getOpt('collection', 'documents'); 91 $limit = (int)$options->getOpt('limit', 5); 92 $this->queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 93 break; 94 95 case 'heartbeat': 96 $this->checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 97 break; 98 99 case 'identity': 100 $this->checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 101 break; 102 103 case 'list': 104 $this->listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 105 break; 106 107 case 'get': 108 $documentId = $options->getArgs()[0] ?? null; 109 if (!$documentId) { 110 $this->fatal('Missing document ID for get action'); 111 } 112 $collection = $options->getOpt('collection', null); 113 $this->getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 114 break; 115 116 default: 117 echo $options->help(); 118 exit(1); 119 } 120 } 121 122 /** 123 * Send a file or directory of files to ChromaDB 124 */ 125 private function sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 126 // Create ChromaDB client 127 $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 128 129 if (is_dir($path)) { 130 // Process directory 131 $this->processDirectory($path, $chroma, $host, $port, $tenant, $database, $verbose); 132 } else { 133 // Process single file 134 if (!file_exists($path)) { 135 $this->error("File does not exist: $path"); 136 return; 137 } 138 139 // Skip files that start with underscore 140 $filename = basename($path); 141 if ($filename[0] === '_') { 142 if ($verbose) { 143 $this->info("Skipping file (starts with underscore): $path"); 144 } 145 return; 146 } 147 148 $this->processSingleFile($path, $chroma, $host, $port, $tenant, $database, false, $verbose); 149 } 150 } 151 152 /** 153 * Process a single DokuWiki file and send it to ChromaDB 154 */ 155 private function processSingleFile($filePath, $chroma, $host, $port, $tenant, $database, $collectionChecked = false, $verbose = false) { 156 // Parse file path to extract metadata 157 $id = \dokuwiki\plugin\dokullm\parseFilePath($filePath); 158 159 // Use the first part of the document ID as collection name, fallback to 'documents' 160 $idParts = explode(':', $id); 161 $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents'; 162 163 // Clean the ID and check ACL 164 $cleanId = cleanID($id); 165 //if (auth_quickaclcheck($cleanId) < AUTH_READ) { 166 // $this->error("You are not allowed to read this file: $id"); 167 // return; 168 //} 169 170 try { 171 // Process the file using the class method 172 $result = $chroma->processSingleFile($filePath, $collectionName, $collectionChecked); 173 174 // Handle the result with verbose output 175 if ($verbose && !empty($result['collection_status'])) { 176 $this->info($result['collection_status']); 177 } 178 179 switch ($result['status']) { 180 case 'success': 181 if ($verbose) { 182 $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB..."); 183 } 184 $this->success("Successfully sent file to ChromaDB:"); 185 $this->info(" Document ID: " . $result['details']['document_id']); 186 if ($verbose) { 187 $this->info(" Chunks: " . $result['details']['chunks']); 188 $this->info(" Host: $host:$port"); 189 $this->info(" Tenant: $tenant"); 190 $this->info(" Database: $database"); 191 $this->info(" Collection: " . $result['details']['collection']); 192 } 193 break; 194 195 case 'skipped': 196 if ($verbose) { 197 $this->info($result['message']); 198 } 199 break; 200 201 case 'error': 202 $this->error($result['message']); 203 break; 204 } 205 } catch (Exception $e) { 206 $this->error("Error sending file to ChromaDB: " . $e->getMessage()); 207 return; 208 } 209 } 210 211 /** 212 * Process all DokuWiki files in a directory and send them to ChromaDB 213 */ 214 private function processDirectory($dirPath, $chroma, $host, $port, $tenant, $database, $verbose = false) { 215 if ($verbose) { 216 $this->info("Processing directory: $dirPath"); 217 } 218 219 // Check if directory exists 220 if (!is_dir($dirPath)) { 221 $this->error("Directory does not exist: $dirPath"); 222 return; 223 } 224 225 // Create RecursiveIteratorIterator to process directories recursively 226 $iterator = new RecursiveIteratorIterator( 227 new RecursiveDirectoryIterator($dirPath, RecursiveDirectoryIterator::SKIP_DOTS), 228 RecursiveIteratorIterator::LEAVES_ONLY 229 ); 230 231 $files = []; 232 foreach ($iterator as $file) { 233 // Process only .txt files that don't start with underscore 234 if ($file->isFile() && $file->getExtension() === 'txt' && $file->getFilename()[0] !== '_') { 235 $files[] = $file->getPathname(); 236 } 237 } 238 239 // Skip if no files 240 if (empty($files)) { 241 if ($verbose) { 242 $this->info("No .txt files found in directory: $dirPath"); 243 } 244 return; 245 } 246 247 if ($verbose) { 248 $this->info("Found " . count($files) . " files to process."); 249 } 250 251 // Use the first part of the document ID as collection name, fallback to 'documents' 252 $sampleFile = $files[0]; 253 $id = \dokuwiki\plugin\dokullm\parseFilePath($sampleFile); 254 $idParts = explode(':', $id); 255 $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents'; 256 257 try { 258 $collectionStatus = $chroma->ensureCollectionExists($collectionName); 259 if ($verbose) { 260 $this->info($collectionStatus); 261 } 262 $collectionChecked = true; 263 } catch (Exception $e) { 264 $collectionChecked = true; 265 } 266 267 // Process each file 268 $processedCount = 0; 269 $skippedCount = 0; 270 $errorCount = 0; 271 272 foreach ($files as $file) { 273 if ($verbose) { 274 $this->info("\nProcessing file: $file"); 275 } 276 277 try { 278 $result = $chroma->processSingleFile($file, $collectionName, $collectionChecked); 279 280 // Handle the result with verbose output 281 if ($verbose && !empty($result['collection_status'])) { 282 $this->info($result['collection_status']); 283 } 284 285 switch ($result['status']) { 286 case 'success': 287 $processedCount++; 288 if ($verbose) { 289 $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB..."); 290 } 291 $this->success("Successfully sent file to ChromaDB:"); 292 $this->info(" Document ID: " . $result['details']['document_id']); 293 if ($verbose) { 294 $this->info(" Chunks: " . $result['details']['chunks']); 295 $this->info(" Host: $host:$port"); 296 $this->info(" Tenant: $tenant"); 297 $this->info(" Database: $database"); 298 $this->info(" Collection: " . $result['details']['collection']); 299 } 300 break; 301 302 case 'skipped': 303 $skippedCount++; 304 if ($verbose) { 305 $this->info($result['message']); 306 } 307 break; 308 309 case 'error': 310 $errorCount++; 311 $this->error($result['message']); 312 break; 313 } 314 } catch (Exception $e) { 315 $errorCount++; 316 $this->error("Error processing file $file: " . $e->getMessage()); 317 } 318 } 319 320 if ($verbose) { 321 $this->info("\nFinished processing directory."); 322 $this->info("Processing summary:"); 323 $this->info(" Processed: $processedCount files"); 324 $this->info(" Skipped: $skippedCount files"); 325 $this->info(" Errors: $errorCount files"); 326 } else { 327 // Even in non-verbose mode, show summary stats if there were processed files 328 if ($processedCount > 0 || $skippedCount > 0 || $errorCount > 0) { 329 $this->info("Processing summary:"); 330 if ($processedCount > 0) { 331 $this->info(" Processed: $processedCount files"); 332 } 333 if ($skippedCount > 0) { 334 $this->info(" Skipped: $skippedCount files"); 335 } 336 if ($errorCount > 0) { 337 $this->info(" Errors: $errorCount files"); 338 } 339 } 340 } 341 } 342 343 /** 344 * Query ChromaDB for similar documents 345 */ 346 private function queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 347 // Create ChromaDB client 348 $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel); 349 350 try { 351 // Query the specified collection by collection 352 $results = $chroma->queryCollection($collection, [$searchTerms], $limit); 353 354 $this->info("Query results for: \"$searchTerms\""); 355 $this->info("Host: $host:$port"); 356 $this->info("Tenant: $tenant"); 357 $this->info("Database: $database"); 358 $this->info("Collection: $collection"); 359 $this->info("=========================================="); 360 361 if (empty($results['ids'][0])) { 362 $this->info("No results found."); 363 return; 364 } 365 366 for ($i = 0; $i < count($results['ids'][0]); $i++) { 367 $this->info("Result " . ($i + 1) . ":"); 368 $this->info(" ID: " . $results['ids'][0][$i]); 369 $this->info(" Distance: " . $results['distances'][0][$i]); 370 $this->info(" Document: " . substr($results['documents'][0][$i], 0, 255) . "..."); 371 372 if (isset($results['metadatas'][0][$i])) { 373 $this->info(" Metadata: " . json_encode($results['metadatas'][0][$i])); 374 } 375 $this->info(""); 376 } 377 } catch (Exception $e) { 378 $this->error("Error querying ChromaDB: " . $e->getMessage()); 379 return; 380 } 381 } 382 383 /** 384 * Check if the ChromaDB server is alive 385 */ 386 private function checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 387 // Create ChromaDB client 388 $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 389 390 try { 391 if ($verbose) { 392 $this->info("Checking ChromaDB server status..."); 393 $this->info("Host: $host:$port"); 394 $this->info("Tenant: $tenant"); 395 $this->info("Database: $database"); 396 $this->info("=========================================="); 397 } 398 399 $result = $chroma->heartbeat(); 400 401 $this->success("Server is alive!"); 402 $this->info("Response: " . json_encode($result)); 403 } catch (Exception $e) { 404 $this->error("Error checking ChromaDB server status: " . $e->getMessage()); 405 return; 406 } 407 } 408 409 /** 410 * Get authentication and identity information from ChromaDB 411 */ 412 private function checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 413 // Create ChromaDB client 414 $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 415 416 try { 417 if ($verbose) { 418 $this->info("Checking ChromaDB identity..."); 419 $this->info("Host: $host:$port"); 420 $this->info("Tenant: $tenant"); 421 $this->info("Database: $database"); 422 $this->info("=========================================="); 423 } 424 425 $result = $chroma->getIdentity(); 426 427 $this->info("Identity information:"); 428 $this->info("Response: " . json_encode($result, JSON_PRETTY_PRINT)); 429 } catch (Exception $e) { 430 $this->error("Error checking ChromaDB identity: " . $e->getMessage()); 431 return; 432 } 433 } 434 435 /** 436 * List all collections in the ChromaDB database 437 */ 438 private function listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 439 // Create ChromaDB client 440 $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 441 442 try { 443 if ($verbose) { 444 $this->info("Listing ChromaDB collections..."); 445 $this->info("Host: $host:$port"); 446 $this->info("Tenant: $tenant"); 447 $this->info("Database: $database"); 448 $this->info("=========================================="); 449 } 450 451 $result = $chroma->listCollections(); 452 453 if (empty($result)) { 454 $this->info("No collections found."); 455 return; 456 } 457 458 $this->info("Collections:"); 459 foreach ($result as $collection) { 460 $this->info(" - " . (isset($collection['name']) ? $collection['name'] : json_encode($collection))); 461 } 462 } catch (Exception $e) { 463 $this->error("Error listing ChromaDB collections: " . $e->getMessage()); 464 return; 465 } 466 } 467 468 /** 469 * Get a document by its ID from ChromaDB 470 */ 471 private function getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 472 // If no collection specified, derive it from the first part of the document ID 473 if (empty($collection)) { 474 $idParts = explode(':', $documentId); 475 $collection = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents'; 476 } 477 478 // Create ChromaDB client 479 $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel); 480 481 try { 482 // Get the specified document by ID 483 $results = $chroma->getDocument($collection, $documentId); 484 485 if ($verbose) { 486 $this->info("Document retrieval results for: \"$documentId\""); 487 $this->info("Host: $host:$port"); 488 $this->info("Tenant: $tenant"); 489 $this->info("Database: $database"); 490 $this->info("Collection: $collection"); 491 $this->info("=========================================="); 492 } 493 494 if (empty($results['ids'])) { 495 $this->info("No document found with ID: $documentId"); 496 return; 497 } 498 499 for ($i = 0; $i < count($results['ids']); $i++) { 500 $this->info("Document " . ($i + 1) . ":"); 501 $this->info(" ID: " . $results['ids'][$i]); 502 503 if (isset($results['documents'][$i])) { 504 $this->info(" Content: " . $results['documents'][$i]); 505 } 506 507 if (isset($results['metadatas'][$i])) { 508 $this->info(" Metadata: " . json_encode($results['metadatas'][$i], JSON_PRETTY_PRINT)); 509 } 510 $this->info(""); 511 } 512 } catch (Exception $e) { 513 $this->error("Error retrieving document from ChromaDB: " . $e->getMessage()); 514 return; 515 } 516 } 517} 518