1fdbf4cdbSCostin Stroie<?php 2fdbf4cdbSCostin Stroie 3fdbf4cdbSCostin Stroieuse dokuwiki\Extension\CLIPlugin; 4fdbf4cdbSCostin Stroieuse splitbrain\phpcli\Options; 5fdbf4cdbSCostin Stroie 6fdbf4cdbSCostin Stroieif(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../../../') . '/'); 7fdbf4cdbSCostin Stroie 8fdbf4cdbSCostin Stroie/** 9fdbf4cdbSCostin Stroie * DokuWiki CLI plugin for ChromaDB operations 10fdbf4cdbSCostin Stroie */ 11*4de98450SCostin Stroieclass cli_plugin_dokullm extends CLIPlugin { 12fdbf4cdbSCostin Stroie 13fdbf4cdbSCostin Stroie /** 14fdbf4cdbSCostin Stroie * Register options and arguments 15fdbf4cdbSCostin Stroie * 16fdbf4cdbSCostin Stroie * @param Options $options 17fdbf4cdbSCostin Stroie */ 18fdbf4cdbSCostin Stroie protected function setup(Options $options) { 19fdbf4cdbSCostin Stroie // Set help text 20fdbf4cdbSCostin Stroie $options->setHelp( 21*4de98450SCostin Stroie "ChromaDB CLI plugin for DokuLLM\n\n" . 22*4de98450SCostin Stroie "Usage: ./bin/plugin.php dokullm [action] [options]\n\n" . 23fdbf4cdbSCostin Stroie "Actions:\n" . 24fdbf4cdbSCostin Stroie " send Send a file or directory to ChromaDB\n" . 25fdbf4cdbSCostin Stroie " query Query ChromaDB\n" . 26fdbf4cdbSCostin Stroie " heartbeat Check if ChromaDB server is alive\n" . 27fdbf4cdbSCostin Stroie " identity Get authentication and identity information\n" . 28fdbf4cdbSCostin Stroie " list List all collections\n" . 29fdbf4cdbSCostin Stroie " get Get a document by its ID\n" 30fdbf4cdbSCostin Stroie ); 31fdbf4cdbSCostin Stroie 32fdbf4cdbSCostin Stroie // Global options 33fdbf4cdbSCostin Stroie $options->registerOption('host', 'ChromaDB server host', 'h', 'host', 'localhost'); 34fdbf4cdbSCostin Stroie $options->registerOption('port', 'ChromaDB server port', 'p', 'port', '8000'); 35fdbf4cdbSCostin Stroie $options->registerOption('tenant', 'ChromaDB tenant', null, 'tenant', 'default_tenant'); 36fdbf4cdbSCostin Stroie $options->registerOption('database', 'ChromaDB database', null, 'database', 'default_database'); 37fdbf4cdbSCostin Stroie $options->registerOption('ollama-host', 'Ollama server host', null, 'ollama-host', 'localhost'); 38fdbf4cdbSCostin Stroie $options->registerOption('ollama-port', 'Ollama server port', null, 'ollama-port', '11434'); 39fdbf4cdbSCostin Stroie $options->registerOption('ollama-model', 'Ollama embeddings model', null, 'ollama-model', 'nomic-embed-text'); 40fdbf4cdbSCostin Stroie $options->registerOption('verbose', 'Enable verbose output', 'v'); 41fdbf4cdbSCostin Stroie 42fdbf4cdbSCostin Stroie // Action-specific options 43fdbf4cdbSCostin Stroie $options->registerCommand('send', 'Send a file or directory to ChromaDB'); 44fdbf4cdbSCostin Stroie $options->registerArgument('path', 'File or directory path', true, 'send'); 45fdbf4cdbSCostin Stroie 46fdbf4cdbSCostin Stroie $options->registerCommand('query', 'Query ChromaDB'); 47fdbf4cdbSCostin Stroie $options->registerOption('collection', 'Collection name to query', 'c', 'collection', 'documents', 'query'); 48fdbf4cdbSCostin Stroie $options->registerOption('limit', 'Number of results to return', 'l', 'limit', '5', 'query'); 49fdbf4cdbSCostin Stroie $options->registerArgument('search', 'Search terms', true, 'query'); 50fdbf4cdbSCostin Stroie 51fdbf4cdbSCostin Stroie $options->registerCommand('heartbeat', 'Check if ChromaDB server is alive'); 52fdbf4cdbSCostin Stroie 53fdbf4cdbSCostin Stroie $options->registerCommand('identity', 'Get authentication and identity information'); 54fdbf4cdbSCostin Stroie 55fdbf4cdbSCostin Stroie $options->registerCommand('list', 'List all collections'); 56fdbf4cdbSCostin Stroie 57fdbf4cdbSCostin Stroie $options->registerCommand('get', 'Get a document by its ID'); 58fdbf4cdbSCostin Stroie $options->registerOption('collection', 'Collection name', 'c', 'collection', 'documents', 'get'); 59fdbf4cdbSCostin Stroie $options->registerArgument('id', 'Document ID', true, 'get'); 60fdbf4cdbSCostin Stroie } 61fdbf4cdbSCostin Stroie 62fdbf4cdbSCostin Stroie /** 63fdbf4cdbSCostin Stroie * Main plugin logic 64fdbf4cdbSCostin Stroie * 65fdbf4cdbSCostin Stroie * @param Options $options 66fdbf4cdbSCostin Stroie */ 67fdbf4cdbSCostin Stroie protected function main(Options $options) { 68fdbf4cdbSCostin Stroie // Include the ChromaDBClient class 69fdbf4cdbSCostin Stroie require_once dirname(__FILE__) . '/ChromaDBClient.php'; 70fdbf4cdbSCostin Stroie 71fdbf4cdbSCostin Stroie $action = $options->getCmd(); 72fdbf4cdbSCostin Stroie $verbose = $options->getOpt('verbose'); 73fdbf4cdbSCostin Stroie 74fdbf4cdbSCostin Stroie // Get global options with defaults 75fdbf4cdbSCostin Stroie $host = $options->getOpt('host', 'localhost'); 76fdbf4cdbSCostin Stroie $port = (int)$options->getOpt('port', 8000); 77fdbf4cdbSCostin Stroie $tenant = $options->getOpt('tenant', 'default_tenant'); 78fdbf4cdbSCostin Stroie $database = $options->getOpt('database', 'default_database'); 79fdbf4cdbSCostin Stroie $ollamaHost = $options->getOpt('ollama-host', 'localhost'); 80fdbf4cdbSCostin Stroie $ollamaPort = (int)$options->getOpt('ollama-port', 11434); 81fdbf4cdbSCostin Stroie $ollamaModel = $options->getOpt('ollama-model', 'nomic-embed-text'); 82fdbf4cdbSCostin Stroie 83fdbf4cdbSCostin Stroie switch ($action) { 84fdbf4cdbSCostin Stroie case 'send': 85fdbf4cdbSCostin Stroie $path = $options->getArgs()[0] ?? null; 86fdbf4cdbSCostin Stroie if (!$path) { 87fdbf4cdbSCostin Stroie $this->fatal('Missing file path for send action'); 88fdbf4cdbSCostin Stroie } 89fdbf4cdbSCostin Stroie $this->sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 90fdbf4cdbSCostin Stroie break; 91fdbf4cdbSCostin Stroie 92fdbf4cdbSCostin Stroie case 'query': 93fdbf4cdbSCostin Stroie $searchTerms = $options->getArgs()[0] ?? null; 94fdbf4cdbSCostin Stroie if (!$searchTerms) { 95fdbf4cdbSCostin Stroie $this->fatal('Missing search terms for query action'); 96fdbf4cdbSCostin Stroie } 97fdbf4cdbSCostin Stroie $collection = $options->getOpt('collection', 'documents'); 98fdbf4cdbSCostin Stroie $limit = (int)$options->getOpt('limit', 5); 99fdbf4cdbSCostin Stroie $this->queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 100fdbf4cdbSCostin Stroie break; 101fdbf4cdbSCostin Stroie 102fdbf4cdbSCostin Stroie case 'heartbeat': 103fdbf4cdbSCostin Stroie $this->checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 104fdbf4cdbSCostin Stroie break; 105fdbf4cdbSCostin Stroie 106fdbf4cdbSCostin Stroie case 'identity': 107fdbf4cdbSCostin Stroie $this->checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 108fdbf4cdbSCostin Stroie break; 109fdbf4cdbSCostin Stroie 110fdbf4cdbSCostin Stroie case 'list': 111fdbf4cdbSCostin Stroie $this->listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 112fdbf4cdbSCostin Stroie break; 113fdbf4cdbSCostin Stroie 114fdbf4cdbSCostin Stroie case 'get': 115fdbf4cdbSCostin Stroie $documentId = $options->getArgs()[0] ?? null; 116fdbf4cdbSCostin Stroie if (!$documentId) { 117fdbf4cdbSCostin Stroie $this->fatal('Missing document ID for get action'); 118fdbf4cdbSCostin Stroie } 119fdbf4cdbSCostin Stroie $collection = $options->getOpt('collection', null); 120fdbf4cdbSCostin Stroie $this->getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 121fdbf4cdbSCostin Stroie break; 122fdbf4cdbSCostin Stroie 123fdbf4cdbSCostin Stroie default: 124fdbf4cdbSCostin Stroie echo $options->help(); 125fdbf4cdbSCostin Stroie exit(1); 126fdbf4cdbSCostin Stroie } 127fdbf4cdbSCostin Stroie } 128fdbf4cdbSCostin Stroie 129fdbf4cdbSCostin Stroie /** 130fdbf4cdbSCostin Stroie * Send a file or directory of files to ChromaDB 131fdbf4cdbSCostin Stroie */ 132fdbf4cdbSCostin Stroie private function sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 133fdbf4cdbSCostin Stroie // Create ChromaDB client 134fdbf4cdbSCostin Stroie $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 135fdbf4cdbSCostin Stroie 136fdbf4cdbSCostin Stroie if (is_dir($path)) { 137fdbf4cdbSCostin Stroie // Process directory 138fdbf4cdbSCostin Stroie $this->processDirectory($path, $chroma, $host, $port, $tenant, $database, $verbose); 139fdbf4cdbSCostin Stroie } else { 140fdbf4cdbSCostin Stroie // Process single file 141fdbf4cdbSCostin Stroie if (!file_exists($path)) { 142fdbf4cdbSCostin Stroie $this->error("File does not exist: $path"); 143fdbf4cdbSCostin Stroie return; 144fdbf4cdbSCostin Stroie } 145fdbf4cdbSCostin Stroie 146fdbf4cdbSCostin Stroie // Skip files that start with underscore 147fdbf4cdbSCostin Stroie $filename = basename($path); 148fdbf4cdbSCostin Stroie if ($filename[0] === '_') { 149fdbf4cdbSCostin Stroie if ($verbose) { 150fdbf4cdbSCostin Stroie $this->info("Skipping file (starts with underscore): $path"); 151fdbf4cdbSCostin Stroie } 152fdbf4cdbSCostin Stroie return; 153fdbf4cdbSCostin Stroie } 154fdbf4cdbSCostin Stroie 155fdbf4cdbSCostin Stroie $this->processSingleFile($path, $chroma, $host, $port, $tenant, $database, false, $verbose); 156fdbf4cdbSCostin Stroie } 157fdbf4cdbSCostin Stroie } 158fdbf4cdbSCostin Stroie 159fdbf4cdbSCostin Stroie /** 160fdbf4cdbSCostin Stroie * Process a single DokuWiki file and send it to ChromaDB 161fdbf4cdbSCostin Stroie */ 162fdbf4cdbSCostin Stroie private function processSingleFile($filePath, $chroma, $host, $port, $tenant, $database, $collectionChecked = false, $verbose = false) { 163fdbf4cdbSCostin Stroie // Parse file path to extract metadata 164fdbf4cdbSCostin Stroie $id = \dokuwiki\plugin\dokullm\parseFilePath($filePath); 165fdbf4cdbSCostin Stroie 166fdbf4cdbSCostin Stroie // Use the first part of the document ID as collection name, fallback to 'documents' 167fdbf4cdbSCostin Stroie $idParts = explode(':', $id); 168fdbf4cdbSCostin Stroie $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents'; 169fdbf4cdbSCostin Stroie 170fdbf4cdbSCostin Stroie // Clean the ID and check ACL 171fdbf4cdbSCostin Stroie $cleanId = cleanID($id); 172fdbf4cdbSCostin Stroie if (auth_quickaclcheck($cleanId) < AUTH_READ) { 173fdbf4cdbSCostin Stroie $this->error("You are not allowed to read this file: $id"); 174fdbf4cdbSCostin Stroie return; 175fdbf4cdbSCostin Stroie } 176fdbf4cdbSCostin Stroie 177fdbf4cdbSCostin Stroie try { 178fdbf4cdbSCostin Stroie // Process the file using the class method 179fdbf4cdbSCostin Stroie $result = $chroma->processSingleFile($filePath, $collectionName, $collectionChecked); 180fdbf4cdbSCostin Stroie 181fdbf4cdbSCostin Stroie // Handle the result with verbose output 182fdbf4cdbSCostin Stroie if ($verbose && !empty($result['collection_status'])) { 183fdbf4cdbSCostin Stroie $this->info($result['collection_status']); 184fdbf4cdbSCostin Stroie } 185fdbf4cdbSCostin Stroie 186fdbf4cdbSCostin Stroie switch ($result['status']) { 187fdbf4cdbSCostin Stroie case 'success': 188fdbf4cdbSCostin Stroie if ($verbose) { 189fdbf4cdbSCostin Stroie $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB..."); 190fdbf4cdbSCostin Stroie } 191fdbf4cdbSCostin Stroie $this->success("Successfully sent file to ChromaDB:"); 192fdbf4cdbSCostin Stroie $this->info(" Document ID: " . $result['details']['document_id']); 193fdbf4cdbSCostin Stroie if ($verbose) { 194fdbf4cdbSCostin Stroie $this->info(" Chunks: " . $result['details']['chunks']); 195fdbf4cdbSCostin Stroie $this->info(" Host: $host:$port"); 196fdbf4cdbSCostin Stroie $this->info(" Tenant: $tenant"); 197fdbf4cdbSCostin Stroie $this->info(" Database: $database"); 198fdbf4cdbSCostin Stroie $this->info(" Collection: " . $result['details']['collection']); 199fdbf4cdbSCostin Stroie } 200fdbf4cdbSCostin Stroie break; 201fdbf4cdbSCostin Stroie 202fdbf4cdbSCostin Stroie case 'skipped': 203fdbf4cdbSCostin Stroie if ($verbose) { 204fdbf4cdbSCostin Stroie $this->info($result['message']); 205fdbf4cdbSCostin Stroie } 206fdbf4cdbSCostin Stroie break; 207fdbf4cdbSCostin Stroie 208fdbf4cdbSCostin Stroie case 'error': 209fdbf4cdbSCostin Stroie $this->error($result['message']); 210fdbf4cdbSCostin Stroie break; 211fdbf4cdbSCostin Stroie } 212fdbf4cdbSCostin Stroie } catch (Exception $e) { 213fdbf4cdbSCostin Stroie $this->error("Error sending file to ChromaDB: " . $e->getMessage()); 214fdbf4cdbSCostin Stroie return; 215fdbf4cdbSCostin Stroie } 216fdbf4cdbSCostin Stroie } 217fdbf4cdbSCostin Stroie 218fdbf4cdbSCostin Stroie /** 219fdbf4cdbSCostin Stroie * Process all DokuWiki files in a directory and send them to ChromaDB 220fdbf4cdbSCostin Stroie */ 221fdbf4cdbSCostin Stroie private function processDirectory($dirPath, $chroma, $host, $port, $tenant, $database, $verbose = false) { 222fdbf4cdbSCostin Stroie if ($verbose) { 223fdbf4cdbSCostin Stroie $this->info("Processing directory: $dirPath"); 224fdbf4cdbSCostin Stroie } 225fdbf4cdbSCostin Stroie 226fdbf4cdbSCostin Stroie // Check if directory exists 227fdbf4cdbSCostin Stroie if (!is_dir($dirPath)) { 228fdbf4cdbSCostin Stroie $this->error("Directory does not exist: $dirPath"); 229fdbf4cdbSCostin Stroie return; 230fdbf4cdbSCostin Stroie } 231fdbf4cdbSCostin Stroie 232fdbf4cdbSCostin Stroie // Create RecursiveIteratorIterator to process directories recursively 233fdbf4cdbSCostin Stroie $iterator = new RecursiveIteratorIterator( 234fdbf4cdbSCostin Stroie new RecursiveDirectoryIterator($dirPath, RecursiveDirectoryIterator::SKIP_DOTS), 235fdbf4cdbSCostin Stroie RecursiveIteratorIterator::LEAVES_ONLY 236fdbf4cdbSCostin Stroie ); 237fdbf4cdbSCostin Stroie 238fdbf4cdbSCostin Stroie $files = []; 239fdbf4cdbSCostin Stroie foreach ($iterator as $file) { 240fdbf4cdbSCostin Stroie // Process only .txt files that don't start with underscore 241fdbf4cdbSCostin Stroie if ($file->isFile() && $file->getExtension() === 'txt' && $file->getFilename()[0] !== '_') { 242fdbf4cdbSCostin Stroie $files[] = $file->getPathname(); 243fdbf4cdbSCostin Stroie } 244fdbf4cdbSCostin Stroie } 245fdbf4cdbSCostin Stroie 246fdbf4cdbSCostin Stroie // Skip if no files 247fdbf4cdbSCostin Stroie if (empty($files)) { 248fdbf4cdbSCostin Stroie if ($verbose) { 249fdbf4cdbSCostin Stroie $this->info("No .txt files found in directory: $dirPath"); 250fdbf4cdbSCostin Stroie } 251fdbf4cdbSCostin Stroie return; 252fdbf4cdbSCostin Stroie } 253fdbf4cdbSCostin Stroie 254fdbf4cdbSCostin Stroie if ($verbose) { 255fdbf4cdbSCostin Stroie $this->info("Found " . count($files) . " files to process."); 256fdbf4cdbSCostin Stroie } 257fdbf4cdbSCostin Stroie 258fdbf4cdbSCostin Stroie // Use the first part of the document ID as collection name, fallback to 'documents' 259fdbf4cdbSCostin Stroie $sampleFile = $files[0]; 260fdbf4cdbSCostin Stroie $id = \dokuwiki\plugin\dokullm\parseFilePath($sampleFile); 261fdbf4cdbSCostin Stroie $idParts = explode(':', $id); 262fdbf4cdbSCostin Stroie $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents'; 263fdbf4cdbSCostin Stroie 264fdbf4cdbSCostin Stroie try { 265fdbf4cdbSCostin Stroie $collectionStatus = $chroma->ensureCollectionExists($collectionName); 266fdbf4cdbSCostin Stroie if ($verbose) { 267fdbf4cdbSCostin Stroie $this->info($collectionStatus); 268fdbf4cdbSCostin Stroie } 269fdbf4cdbSCostin Stroie $collectionChecked = true; 270fdbf4cdbSCostin Stroie } catch (Exception $e) { 271fdbf4cdbSCostin Stroie $collectionChecked = true; 272fdbf4cdbSCostin Stroie } 273fdbf4cdbSCostin Stroie 274fdbf4cdbSCostin Stroie // Process each file 275fdbf4cdbSCostin Stroie $processedCount = 0; 276fdbf4cdbSCostin Stroie $skippedCount = 0; 277fdbf4cdbSCostin Stroie $errorCount = 0; 278fdbf4cdbSCostin Stroie 279fdbf4cdbSCostin Stroie foreach ($files as $file) { 280fdbf4cdbSCostin Stroie if ($verbose) { 281fdbf4cdbSCostin Stroie $this->info("\nProcessing file: $file"); 282fdbf4cdbSCostin Stroie } 283fdbf4cdbSCostin Stroie 284fdbf4cdbSCostin Stroie try { 285fdbf4cdbSCostin Stroie $result = $chroma->processSingleFile($file, $collectionName, $collectionChecked); 286fdbf4cdbSCostin Stroie 287fdbf4cdbSCostin Stroie // Handle the result with verbose output 288fdbf4cdbSCostin Stroie if ($verbose && !empty($result['collection_status'])) { 289fdbf4cdbSCostin Stroie $this->info($result['collection_status']); 290fdbf4cdbSCostin Stroie } 291fdbf4cdbSCostin Stroie 292fdbf4cdbSCostin Stroie switch ($result['status']) { 293fdbf4cdbSCostin Stroie case 'success': 294fdbf4cdbSCostin Stroie $processedCount++; 295fdbf4cdbSCostin Stroie if ($verbose) { 296fdbf4cdbSCostin Stroie $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB..."); 297fdbf4cdbSCostin Stroie } 298fdbf4cdbSCostin Stroie $this->success("Successfully sent file to ChromaDB:"); 299fdbf4cdbSCostin Stroie $this->info(" Document ID: " . $result['details']['document_id']); 300fdbf4cdbSCostin Stroie if ($verbose) { 301fdbf4cdbSCostin Stroie $this->info(" Chunks: " . $result['details']['chunks']); 302fdbf4cdbSCostin Stroie $this->info(" Host: $host:$port"); 303fdbf4cdbSCostin Stroie $this->info(" Tenant: $tenant"); 304fdbf4cdbSCostin Stroie $this->info(" Database: $database"); 305fdbf4cdbSCostin Stroie $this->info(" Collection: " . $result['details']['collection']); 306fdbf4cdbSCostin Stroie } 307fdbf4cdbSCostin Stroie break; 308fdbf4cdbSCostin Stroie 309fdbf4cdbSCostin Stroie case 'skipped': 310fdbf4cdbSCostin Stroie $skippedCount++; 311fdbf4cdbSCostin Stroie if ($verbose) { 312fdbf4cdbSCostin Stroie $this->info($result['message']); 313fdbf4cdbSCostin Stroie } 314fdbf4cdbSCostin Stroie break; 315fdbf4cdbSCostin Stroie 316fdbf4cdbSCostin Stroie case 'error': 317fdbf4cdbSCostin Stroie $errorCount++; 318fdbf4cdbSCostin Stroie $this->error($result['message']); 319fdbf4cdbSCostin Stroie break; 320fdbf4cdbSCostin Stroie } 321fdbf4cdbSCostin Stroie } catch (Exception $e) { 322fdbf4cdbSCostin Stroie $errorCount++; 323fdbf4cdbSCostin Stroie $this->error("Error processing file $file: " . $e->getMessage()); 324fdbf4cdbSCostin Stroie } 325fdbf4cdbSCostin Stroie } 326fdbf4cdbSCostin Stroie 327fdbf4cdbSCostin Stroie if ($verbose) { 328fdbf4cdbSCostin Stroie $this->info("\nFinished processing directory."); 329fdbf4cdbSCostin Stroie $this->info("Processing summary:"); 330fdbf4cdbSCostin Stroie $this->info(" Processed: $processedCount files"); 331fdbf4cdbSCostin Stroie $this->info(" Skipped: $skippedCount files"); 332fdbf4cdbSCostin Stroie $this->info(" Errors: $errorCount files"); 333fdbf4cdbSCostin Stroie } else { 334fdbf4cdbSCostin Stroie // Even in non-verbose mode, show summary stats if there were processed files 335fdbf4cdbSCostin Stroie if ($processedCount > 0 || $skippedCount > 0 || $errorCount > 0) { 336fdbf4cdbSCostin Stroie $this->info("Processing summary:"); 337fdbf4cdbSCostin Stroie if ($processedCount > 0) { 338fdbf4cdbSCostin Stroie $this->info(" Processed: $processedCount files"); 339fdbf4cdbSCostin Stroie } 340fdbf4cdbSCostin Stroie if ($skippedCount > 0) { 341fdbf4cdbSCostin Stroie $this->info(" Skipped: $skippedCount files"); 342fdbf4cdbSCostin Stroie } 343fdbf4cdbSCostin Stroie if ($errorCount > 0) { 344fdbf4cdbSCostin Stroie $this->info(" Errors: $errorCount files"); 345fdbf4cdbSCostin Stroie } 346fdbf4cdbSCostin Stroie } 347fdbf4cdbSCostin Stroie } 348fdbf4cdbSCostin Stroie } 349fdbf4cdbSCostin Stroie 350fdbf4cdbSCostin Stroie /** 351fdbf4cdbSCostin Stroie * Query ChromaDB for similar documents 352fdbf4cdbSCostin Stroie */ 353fdbf4cdbSCostin Stroie private function queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 354fdbf4cdbSCostin Stroie // Create ChromaDB client 355fdbf4cdbSCostin Stroie $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel); 356fdbf4cdbSCostin Stroie 357fdbf4cdbSCostin Stroie try { 358fdbf4cdbSCostin Stroie // Query the specified collection by collection 359fdbf4cdbSCostin Stroie $results = $chroma->queryCollection($collection, [$searchTerms], $limit); 360fdbf4cdbSCostin Stroie 361fdbf4cdbSCostin Stroie $this->info("Query results for: \"$searchTerms\""); 362fdbf4cdbSCostin Stroie $this->info("Host: $host:$port"); 363fdbf4cdbSCostin Stroie $this->info("Tenant: $tenant"); 364fdbf4cdbSCostin Stroie $this->info("Database: $database"); 365fdbf4cdbSCostin Stroie $this->info("Collection: $collection"); 366fdbf4cdbSCostin Stroie $this->info("=========================================="); 367fdbf4cdbSCostin Stroie 368fdbf4cdbSCostin Stroie if (empty($results['ids'][0])) { 369fdbf4cdbSCostin Stroie $this->info("No results found."); 370fdbf4cdbSCostin Stroie return; 371fdbf4cdbSCostin Stroie } 372fdbf4cdbSCostin Stroie 373fdbf4cdbSCostin Stroie for ($i = 0; $i < count($results['ids'][0]); $i++) { 374fdbf4cdbSCostin Stroie $this->info("Result " . ($i + 1) . ":"); 375fdbf4cdbSCostin Stroie $this->info(" ID: " . $results['ids'][0][$i]); 376fdbf4cdbSCostin Stroie $this->info(" Distance: " . $results['distances'][0][$i]); 377fdbf4cdbSCostin Stroie $this->info(" Document: " . substr($results['documents'][0][$i], 0, 255) . "..."); 378fdbf4cdbSCostin Stroie 379fdbf4cdbSCostin Stroie if (isset($results['metadatas'][0][$i])) { 380fdbf4cdbSCostin Stroie $this->info(" Metadata: " . json_encode($results['metadatas'][0][$i])); 381fdbf4cdbSCostin Stroie } 382fdbf4cdbSCostin Stroie $this->info(""); 383fdbf4cdbSCostin Stroie } 384fdbf4cdbSCostin Stroie } catch (Exception $e) { 385fdbf4cdbSCostin Stroie $this->error("Error querying ChromaDB: " . $e->getMessage()); 386fdbf4cdbSCostin Stroie return; 387fdbf4cdbSCostin Stroie } 388fdbf4cdbSCostin Stroie } 389fdbf4cdbSCostin Stroie 390fdbf4cdbSCostin Stroie /** 391fdbf4cdbSCostin Stroie * Check if the ChromaDB server is alive 392fdbf4cdbSCostin Stroie */ 393fdbf4cdbSCostin Stroie private function checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 394fdbf4cdbSCostin Stroie // Create ChromaDB client 395fdbf4cdbSCostin Stroie $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 396fdbf4cdbSCostin Stroie 397fdbf4cdbSCostin Stroie try { 398fdbf4cdbSCostin Stroie if ($verbose) { 399fdbf4cdbSCostin Stroie $this->info("Checking ChromaDB server status..."); 400fdbf4cdbSCostin Stroie $this->info("Host: $host:$port"); 401fdbf4cdbSCostin Stroie $this->info("Tenant: $tenant"); 402fdbf4cdbSCostin Stroie $this->info("Database: $database"); 403fdbf4cdbSCostin Stroie $this->info("=========================================="); 404fdbf4cdbSCostin Stroie } 405fdbf4cdbSCostin Stroie 406fdbf4cdbSCostin Stroie $result = $chroma->heartbeat(); 407fdbf4cdbSCostin Stroie 408fdbf4cdbSCostin Stroie $this->success("Server is alive!"); 409fdbf4cdbSCostin Stroie $this->info("Response: " . json_encode($result)); 410fdbf4cdbSCostin Stroie } catch (Exception $e) { 411fdbf4cdbSCostin Stroie $this->error("Error checking ChromaDB server status: " . $e->getMessage()); 412fdbf4cdbSCostin Stroie return; 413fdbf4cdbSCostin Stroie } 414fdbf4cdbSCostin Stroie } 415fdbf4cdbSCostin Stroie 416fdbf4cdbSCostin Stroie /** 417fdbf4cdbSCostin Stroie * Get authentication and identity information from ChromaDB 418fdbf4cdbSCostin Stroie */ 419fdbf4cdbSCostin Stroie private function checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 420fdbf4cdbSCostin Stroie // Create ChromaDB client 421fdbf4cdbSCostin Stroie $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 422fdbf4cdbSCostin Stroie 423fdbf4cdbSCostin Stroie try { 424fdbf4cdbSCostin Stroie if ($verbose) { 425fdbf4cdbSCostin Stroie $this->info("Checking ChromaDB identity..."); 426fdbf4cdbSCostin Stroie $this->info("Host: $host:$port"); 427fdbf4cdbSCostin Stroie $this->info("Tenant: $tenant"); 428fdbf4cdbSCostin Stroie $this->info("Database: $database"); 429fdbf4cdbSCostin Stroie $this->info("=========================================="); 430fdbf4cdbSCostin Stroie } 431fdbf4cdbSCostin Stroie 432fdbf4cdbSCostin Stroie $result = $chroma->getIdentity(); 433fdbf4cdbSCostin Stroie 434fdbf4cdbSCostin Stroie $this->info("Identity information:"); 435fdbf4cdbSCostin Stroie $this->info("Response: " . json_encode($result, JSON_PRETTY_PRINT)); 436fdbf4cdbSCostin Stroie } catch (Exception $e) { 437fdbf4cdbSCostin Stroie $this->error("Error checking ChromaDB identity: " . $e->getMessage()); 438fdbf4cdbSCostin Stroie return; 439fdbf4cdbSCostin Stroie } 440fdbf4cdbSCostin Stroie } 441fdbf4cdbSCostin Stroie 442fdbf4cdbSCostin Stroie /** 443fdbf4cdbSCostin Stroie * List all collections in the ChromaDB database 444fdbf4cdbSCostin Stroie */ 445fdbf4cdbSCostin Stroie private function listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 446fdbf4cdbSCostin Stroie // Create ChromaDB client 447fdbf4cdbSCostin Stroie $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 448fdbf4cdbSCostin Stroie 449fdbf4cdbSCostin Stroie try { 450fdbf4cdbSCostin Stroie if ($verbose) { 451fdbf4cdbSCostin Stroie $this->info("Listing ChromaDB collections..."); 452fdbf4cdbSCostin Stroie $this->info("Host: $host:$port"); 453fdbf4cdbSCostin Stroie $this->info("Tenant: $tenant"); 454fdbf4cdbSCostin Stroie $this->info("Database: $database"); 455fdbf4cdbSCostin Stroie $this->info("=========================================="); 456fdbf4cdbSCostin Stroie } 457fdbf4cdbSCostin Stroie 458fdbf4cdbSCostin Stroie $result = $chroma->listCollections(); 459fdbf4cdbSCostin Stroie 460fdbf4cdbSCostin Stroie if (empty($result)) { 461fdbf4cdbSCostin Stroie $this->info("No collections found."); 462fdbf4cdbSCostin Stroie return; 463fdbf4cdbSCostin Stroie } 464fdbf4cdbSCostin Stroie 465fdbf4cdbSCostin Stroie $this->info("Collections:"); 466fdbf4cdbSCostin Stroie foreach ($result as $collection) { 467fdbf4cdbSCostin Stroie $this->info(" - " . (isset($collection['name']) ? $collection['name'] : json_encode($collection))); 468fdbf4cdbSCostin Stroie } 469fdbf4cdbSCostin Stroie } catch (Exception $e) { 470fdbf4cdbSCostin Stroie $this->error("Error listing ChromaDB collections: " . $e->getMessage()); 471fdbf4cdbSCostin Stroie return; 472fdbf4cdbSCostin Stroie } 473fdbf4cdbSCostin Stroie } 474fdbf4cdbSCostin Stroie 475fdbf4cdbSCostin Stroie /** 476fdbf4cdbSCostin Stroie * Get a document by its ID from ChromaDB 477fdbf4cdbSCostin Stroie */ 478fdbf4cdbSCostin Stroie private function getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 479fdbf4cdbSCostin Stroie // If no collection specified, derive it from the first part of the document ID 480fdbf4cdbSCostin Stroie if (empty($collection)) { 481fdbf4cdbSCostin Stroie $idParts = explode(':', $documentId); 482fdbf4cdbSCostin Stroie $collection = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents'; 483fdbf4cdbSCostin Stroie } 484fdbf4cdbSCostin Stroie 485fdbf4cdbSCostin Stroie // Create ChromaDB client 486fdbf4cdbSCostin Stroie $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel); 487fdbf4cdbSCostin Stroie 488fdbf4cdbSCostin Stroie try { 489fdbf4cdbSCostin Stroie // Get the specified document by ID 490fdbf4cdbSCostin Stroie $results = $chroma->getDocument($collection, $documentId); 491fdbf4cdbSCostin Stroie 492fdbf4cdbSCostin Stroie if ($verbose) { 493fdbf4cdbSCostin Stroie $this->info("Document retrieval results for: \"$documentId\""); 494fdbf4cdbSCostin Stroie $this->info("Host: $host:$port"); 495fdbf4cdbSCostin Stroie $this->info("Tenant: $tenant"); 496fdbf4cdbSCostin Stroie $this->info("Database: $database"); 497fdbf4cdbSCostin Stroie $this->info("Collection: $collection"); 498fdbf4cdbSCostin Stroie $this->info("=========================================="); 499fdbf4cdbSCostin Stroie } 500fdbf4cdbSCostin Stroie 501fdbf4cdbSCostin Stroie if (empty($results['ids'])) { 502fdbf4cdbSCostin Stroie $this->info("No document found with ID: $documentId"); 503fdbf4cdbSCostin Stroie return; 504fdbf4cdbSCostin Stroie } 505fdbf4cdbSCostin Stroie 506fdbf4cdbSCostin Stroie for ($i = 0; $i < count($results['ids']); $i++) { 507fdbf4cdbSCostin Stroie $this->info("Document " . ($i + 1) . ":"); 508fdbf4cdbSCostin Stroie $this->info(" ID: " . $results['ids'][$i]); 509fdbf4cdbSCostin Stroie 510fdbf4cdbSCostin Stroie if (isset($results['documents'][$i])) { 511fdbf4cdbSCostin Stroie $this->info(" Content: " . $results['documents'][$i]); 512fdbf4cdbSCostin Stroie } 513fdbf4cdbSCostin Stroie 514fdbf4cdbSCostin Stroie if (isset($results['metadatas'][$i])) { 515fdbf4cdbSCostin Stroie $this->info(" Metadata: " . json_encode($results['metadatas'][$i], JSON_PRETTY_PRINT)); 516fdbf4cdbSCostin Stroie } 517fdbf4cdbSCostin Stroie $this->info(""); 518fdbf4cdbSCostin Stroie } 519fdbf4cdbSCostin Stroie } catch (Exception $e) { 520fdbf4cdbSCostin Stroie $this->error("Error retrieving document from ChromaDB: " . $e->getMessage()); 521fdbf4cdbSCostin Stroie return; 522fdbf4cdbSCostin Stroie } 523fdbf4cdbSCostin Stroie } 524fdbf4cdbSCostin Stroie} 525