1*fdbf4cdbSCostin Stroie<?php 2*fdbf4cdbSCostin Stroie 3*fdbf4cdbSCostin Stroieuse dokuwiki\Extension\CLIPlugin; 4*fdbf4cdbSCostin Stroieuse splitbrain\phpcli\Options; 5*fdbf4cdbSCostin Stroie 6*fdbf4cdbSCostin Stroieif(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../../../') . '/'); 7*fdbf4cdbSCostin Stroie 8*fdbf4cdbSCostin Stroie/** 9*fdbf4cdbSCostin Stroie * DokuWiki CLI plugin for ChromaDB operations 10*fdbf4cdbSCostin Stroie */ 11*fdbf4cdbSCostin Stroieclass cli_plugin_dokullm_chromadb extends CLIPlugin { 12*fdbf4cdbSCostin Stroie 13*fdbf4cdbSCostin Stroie /** 14*fdbf4cdbSCostin Stroie * Register options and arguments 15*fdbf4cdbSCostin Stroie * 16*fdbf4cdbSCostin Stroie * @param Options $options 17*fdbf4cdbSCostin Stroie */ 18*fdbf4cdbSCostin Stroie protected function setup(Options $options) { 19*fdbf4cdbSCostin Stroie // Set help text 20*fdbf4cdbSCostin Stroie $options->setHelp( 21*fdbf4cdbSCostin Stroie "ChromaDB CLI plugin for DokuWiki\n\n" . 22*fdbf4cdbSCostin Stroie "Usage: ./bin/plugin.php dokullm_chromadb [action] [options]\n\n" . 23*fdbf4cdbSCostin Stroie "Actions:\n" . 24*fdbf4cdbSCostin Stroie " send Send a file or directory to ChromaDB\n" . 25*fdbf4cdbSCostin Stroie " query Query ChromaDB\n" . 26*fdbf4cdbSCostin Stroie " heartbeat Check if ChromaDB server is alive\n" . 27*fdbf4cdbSCostin Stroie " identity Get authentication and identity information\n" . 28*fdbf4cdbSCostin Stroie " list List all collections\n" . 29*fdbf4cdbSCostin Stroie " get Get a document by its ID\n" 30*fdbf4cdbSCostin Stroie ); 31*fdbf4cdbSCostin Stroie 32*fdbf4cdbSCostin Stroie // Global options 33*fdbf4cdbSCostin Stroie $options->registerOption('host', 'ChromaDB server host', 'h', 'host', 'localhost'); 34*fdbf4cdbSCostin Stroie $options->registerOption('port', 'ChromaDB server port', 'p', 'port', '8000'); 35*fdbf4cdbSCostin Stroie $options->registerOption('tenant', 'ChromaDB tenant', null, 'tenant', 'default_tenant'); 36*fdbf4cdbSCostin Stroie $options->registerOption('database', 'ChromaDB database', null, 'database', 'default_database'); 37*fdbf4cdbSCostin Stroie $options->registerOption('ollama-host', 'Ollama server host', null, 'ollama-host', 'localhost'); 38*fdbf4cdbSCostin Stroie $options->registerOption('ollama-port', 'Ollama server port', null, 'ollama-port', '11434'); 39*fdbf4cdbSCostin Stroie $options->registerOption('ollama-model', 'Ollama embeddings model', null, 'ollama-model', 'nomic-embed-text'); 40*fdbf4cdbSCostin Stroie $options->registerOption('verbose', 'Enable verbose output', 'v'); 41*fdbf4cdbSCostin Stroie 42*fdbf4cdbSCostin Stroie // Action-specific options 43*fdbf4cdbSCostin Stroie $options->registerCommand('send', 'Send a file or directory to ChromaDB'); 44*fdbf4cdbSCostin Stroie $options->registerArgument('path', 'File or directory path', true, 'send'); 45*fdbf4cdbSCostin Stroie 46*fdbf4cdbSCostin Stroie $options->registerCommand('query', 'Query ChromaDB'); 47*fdbf4cdbSCostin Stroie $options->registerOption('collection', 'Collection name to query', 'c', 'collection', 'documents', 'query'); 48*fdbf4cdbSCostin Stroie $options->registerOption('limit', 'Number of results to return', 'l', 'limit', '5', 'query'); 49*fdbf4cdbSCostin Stroie $options->registerArgument('search', 'Search terms', true, 'query'); 50*fdbf4cdbSCostin Stroie 51*fdbf4cdbSCostin Stroie $options->registerCommand('heartbeat', 'Check if ChromaDB server is alive'); 52*fdbf4cdbSCostin Stroie 53*fdbf4cdbSCostin Stroie $options->registerCommand('identity', 'Get authentication and identity information'); 54*fdbf4cdbSCostin Stroie 55*fdbf4cdbSCostin Stroie $options->registerCommand('list', 'List all collections'); 56*fdbf4cdbSCostin Stroie 57*fdbf4cdbSCostin Stroie $options->registerCommand('get', 'Get a document by its ID'); 58*fdbf4cdbSCostin Stroie $options->registerOption('collection', 'Collection name', 'c', 'collection', 'documents', 'get'); 59*fdbf4cdbSCostin Stroie $options->registerArgument('id', 'Document ID', true, 'get'); 60*fdbf4cdbSCostin Stroie } 61*fdbf4cdbSCostin Stroie 62*fdbf4cdbSCostin Stroie /** 63*fdbf4cdbSCostin Stroie * Main plugin logic 64*fdbf4cdbSCostin Stroie * 65*fdbf4cdbSCostin Stroie * @param Options $options 66*fdbf4cdbSCostin Stroie */ 67*fdbf4cdbSCostin Stroie protected function main(Options $options) { 68*fdbf4cdbSCostin Stroie // Include the ChromaDBClient class 69*fdbf4cdbSCostin Stroie require_once dirname(__FILE__) . '/ChromaDBClient.php'; 70*fdbf4cdbSCostin Stroie 71*fdbf4cdbSCostin Stroie $action = $options->getCmd(); 72*fdbf4cdbSCostin Stroie $verbose = $options->getOpt('verbose'); 73*fdbf4cdbSCostin Stroie 74*fdbf4cdbSCostin Stroie // Get global options with defaults 75*fdbf4cdbSCostin Stroie $host = $options->getOpt('host', 'localhost'); 76*fdbf4cdbSCostin Stroie $port = (int)$options->getOpt('port', 8000); 77*fdbf4cdbSCostin Stroie $tenant = $options->getOpt('tenant', 'default_tenant'); 78*fdbf4cdbSCostin Stroie $database = $options->getOpt('database', 'default_database'); 79*fdbf4cdbSCostin Stroie $ollamaHost = $options->getOpt('ollama-host', 'localhost'); 80*fdbf4cdbSCostin Stroie $ollamaPort = (int)$options->getOpt('ollama-port', 11434); 81*fdbf4cdbSCostin Stroie $ollamaModel = $options->getOpt('ollama-model', 'nomic-embed-text'); 82*fdbf4cdbSCostin Stroie 83*fdbf4cdbSCostin Stroie switch ($action) { 84*fdbf4cdbSCostin Stroie case 'send': 85*fdbf4cdbSCostin Stroie $path = $options->getArgs()[0] ?? null; 86*fdbf4cdbSCostin Stroie if (!$path) { 87*fdbf4cdbSCostin Stroie $this->fatal('Missing file path for send action'); 88*fdbf4cdbSCostin Stroie } 89*fdbf4cdbSCostin Stroie $this->sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 90*fdbf4cdbSCostin Stroie break; 91*fdbf4cdbSCostin Stroie 92*fdbf4cdbSCostin Stroie case 'query': 93*fdbf4cdbSCostin Stroie $searchTerms = $options->getArgs()[0] ?? null; 94*fdbf4cdbSCostin Stroie if (!$searchTerms) { 95*fdbf4cdbSCostin Stroie $this->fatal('Missing search terms for query action'); 96*fdbf4cdbSCostin Stroie } 97*fdbf4cdbSCostin Stroie $collection = $options->getOpt('collection', 'documents'); 98*fdbf4cdbSCostin Stroie $limit = (int)$options->getOpt('limit', 5); 99*fdbf4cdbSCostin Stroie $this->queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 100*fdbf4cdbSCostin Stroie break; 101*fdbf4cdbSCostin Stroie 102*fdbf4cdbSCostin Stroie case 'heartbeat': 103*fdbf4cdbSCostin Stroie $this->checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 104*fdbf4cdbSCostin Stroie break; 105*fdbf4cdbSCostin Stroie 106*fdbf4cdbSCostin Stroie case 'identity': 107*fdbf4cdbSCostin Stroie $this->checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 108*fdbf4cdbSCostin Stroie break; 109*fdbf4cdbSCostin Stroie 110*fdbf4cdbSCostin Stroie case 'list': 111*fdbf4cdbSCostin Stroie $this->listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 112*fdbf4cdbSCostin Stroie break; 113*fdbf4cdbSCostin Stroie 114*fdbf4cdbSCostin Stroie case 'get': 115*fdbf4cdbSCostin Stroie $documentId = $options->getArgs()[0] ?? null; 116*fdbf4cdbSCostin Stroie if (!$documentId) { 117*fdbf4cdbSCostin Stroie $this->fatal('Missing document ID for get action'); 118*fdbf4cdbSCostin Stroie } 119*fdbf4cdbSCostin Stroie $collection = $options->getOpt('collection', null); 120*fdbf4cdbSCostin Stroie $this->getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose); 121*fdbf4cdbSCostin Stroie break; 122*fdbf4cdbSCostin Stroie 123*fdbf4cdbSCostin Stroie default: 124*fdbf4cdbSCostin Stroie echo $options->help(); 125*fdbf4cdbSCostin Stroie exit(1); 126*fdbf4cdbSCostin Stroie } 127*fdbf4cdbSCostin Stroie } 128*fdbf4cdbSCostin Stroie 129*fdbf4cdbSCostin Stroie /** 130*fdbf4cdbSCostin Stroie * Send a file or directory of files to ChromaDB 131*fdbf4cdbSCostin Stroie */ 132*fdbf4cdbSCostin Stroie private function sendFile($path, $host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 133*fdbf4cdbSCostin Stroie // Create ChromaDB client 134*fdbf4cdbSCostin Stroie $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 135*fdbf4cdbSCostin Stroie 136*fdbf4cdbSCostin Stroie if (is_dir($path)) { 137*fdbf4cdbSCostin Stroie // Process directory 138*fdbf4cdbSCostin Stroie $this->processDirectory($path, $chroma, $host, $port, $tenant, $database, $verbose); 139*fdbf4cdbSCostin Stroie } else { 140*fdbf4cdbSCostin Stroie // Process single file 141*fdbf4cdbSCostin Stroie if (!file_exists($path)) { 142*fdbf4cdbSCostin Stroie $this->error("File does not exist: $path"); 143*fdbf4cdbSCostin Stroie return; 144*fdbf4cdbSCostin Stroie } 145*fdbf4cdbSCostin Stroie 146*fdbf4cdbSCostin Stroie // Skip files that start with underscore 147*fdbf4cdbSCostin Stroie $filename = basename($path); 148*fdbf4cdbSCostin Stroie if ($filename[0] === '_') { 149*fdbf4cdbSCostin Stroie if ($verbose) { 150*fdbf4cdbSCostin Stroie $this->info("Skipping file (starts with underscore): $path"); 151*fdbf4cdbSCostin Stroie } 152*fdbf4cdbSCostin Stroie return; 153*fdbf4cdbSCostin Stroie } 154*fdbf4cdbSCostin Stroie 155*fdbf4cdbSCostin Stroie $this->processSingleFile($path, $chroma, $host, $port, $tenant, $database, false, $verbose); 156*fdbf4cdbSCostin Stroie } 157*fdbf4cdbSCostin Stroie } 158*fdbf4cdbSCostin Stroie 159*fdbf4cdbSCostin Stroie /** 160*fdbf4cdbSCostin Stroie * Process a single DokuWiki file and send it to ChromaDB 161*fdbf4cdbSCostin Stroie */ 162*fdbf4cdbSCostin Stroie private function processSingleFile($filePath, $chroma, $host, $port, $tenant, $database, $collectionChecked = false, $verbose = false) { 163*fdbf4cdbSCostin Stroie // Parse file path to extract metadata 164*fdbf4cdbSCostin Stroie $id = \dokuwiki\plugin\dokullm\parseFilePath($filePath); 165*fdbf4cdbSCostin Stroie 166*fdbf4cdbSCostin Stroie // Use the first part of the document ID as collection name, fallback to 'documents' 167*fdbf4cdbSCostin Stroie $idParts = explode(':', $id); 168*fdbf4cdbSCostin Stroie $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents'; 169*fdbf4cdbSCostin Stroie 170*fdbf4cdbSCostin Stroie // Clean the ID and check ACL 171*fdbf4cdbSCostin Stroie $cleanId = cleanID($id); 172*fdbf4cdbSCostin Stroie if (auth_quickaclcheck($cleanId) < AUTH_READ) { 173*fdbf4cdbSCostin Stroie $this->error("You are not allowed to read this file: $id"); 174*fdbf4cdbSCostin Stroie return; 175*fdbf4cdbSCostin Stroie } 176*fdbf4cdbSCostin Stroie 177*fdbf4cdbSCostin Stroie try { 178*fdbf4cdbSCostin Stroie // Process the file using the class method 179*fdbf4cdbSCostin Stroie $result = $chroma->processSingleFile($filePath, $collectionName, $collectionChecked); 180*fdbf4cdbSCostin Stroie 181*fdbf4cdbSCostin Stroie // Handle the result with verbose output 182*fdbf4cdbSCostin Stroie if ($verbose && !empty($result['collection_status'])) { 183*fdbf4cdbSCostin Stroie $this->info($result['collection_status']); 184*fdbf4cdbSCostin Stroie } 185*fdbf4cdbSCostin Stroie 186*fdbf4cdbSCostin Stroie switch ($result['status']) { 187*fdbf4cdbSCostin Stroie case 'success': 188*fdbf4cdbSCostin Stroie if ($verbose) { 189*fdbf4cdbSCostin Stroie $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB..."); 190*fdbf4cdbSCostin Stroie } 191*fdbf4cdbSCostin Stroie $this->success("Successfully sent file to ChromaDB:"); 192*fdbf4cdbSCostin Stroie $this->info(" Document ID: " . $result['details']['document_id']); 193*fdbf4cdbSCostin Stroie if ($verbose) { 194*fdbf4cdbSCostin Stroie $this->info(" Chunks: " . $result['details']['chunks']); 195*fdbf4cdbSCostin Stroie $this->info(" Host: $host:$port"); 196*fdbf4cdbSCostin Stroie $this->info(" Tenant: $tenant"); 197*fdbf4cdbSCostin Stroie $this->info(" Database: $database"); 198*fdbf4cdbSCostin Stroie $this->info(" Collection: " . $result['details']['collection']); 199*fdbf4cdbSCostin Stroie } 200*fdbf4cdbSCostin Stroie break; 201*fdbf4cdbSCostin Stroie 202*fdbf4cdbSCostin Stroie case 'skipped': 203*fdbf4cdbSCostin Stroie if ($verbose) { 204*fdbf4cdbSCostin Stroie $this->info($result['message']); 205*fdbf4cdbSCostin Stroie } 206*fdbf4cdbSCostin Stroie break; 207*fdbf4cdbSCostin Stroie 208*fdbf4cdbSCostin Stroie case 'error': 209*fdbf4cdbSCostin Stroie $this->error($result['message']); 210*fdbf4cdbSCostin Stroie break; 211*fdbf4cdbSCostin Stroie } 212*fdbf4cdbSCostin Stroie } catch (Exception $e) { 213*fdbf4cdbSCostin Stroie $this->error("Error sending file to ChromaDB: " . $e->getMessage()); 214*fdbf4cdbSCostin Stroie return; 215*fdbf4cdbSCostin Stroie } 216*fdbf4cdbSCostin Stroie } 217*fdbf4cdbSCostin Stroie 218*fdbf4cdbSCostin Stroie /** 219*fdbf4cdbSCostin Stroie * Process all DokuWiki files in a directory and send them to ChromaDB 220*fdbf4cdbSCostin Stroie */ 221*fdbf4cdbSCostin Stroie private function processDirectory($dirPath, $chroma, $host, $port, $tenant, $database, $verbose = false) { 222*fdbf4cdbSCostin Stroie if ($verbose) { 223*fdbf4cdbSCostin Stroie $this->info("Processing directory: $dirPath"); 224*fdbf4cdbSCostin Stroie } 225*fdbf4cdbSCostin Stroie 226*fdbf4cdbSCostin Stroie // Check if directory exists 227*fdbf4cdbSCostin Stroie if (!is_dir($dirPath)) { 228*fdbf4cdbSCostin Stroie $this->error("Directory does not exist: $dirPath"); 229*fdbf4cdbSCostin Stroie return; 230*fdbf4cdbSCostin Stroie } 231*fdbf4cdbSCostin Stroie 232*fdbf4cdbSCostin Stroie // Create RecursiveIteratorIterator to process directories recursively 233*fdbf4cdbSCostin Stroie $iterator = new RecursiveIteratorIterator( 234*fdbf4cdbSCostin Stroie new RecursiveDirectoryIterator($dirPath, RecursiveDirectoryIterator::SKIP_DOTS), 235*fdbf4cdbSCostin Stroie RecursiveIteratorIterator::LEAVES_ONLY 236*fdbf4cdbSCostin Stroie ); 237*fdbf4cdbSCostin Stroie 238*fdbf4cdbSCostin Stroie $files = []; 239*fdbf4cdbSCostin Stroie foreach ($iterator as $file) { 240*fdbf4cdbSCostin Stroie // Process only .txt files that don't start with underscore 241*fdbf4cdbSCostin Stroie if ($file->isFile() && $file->getExtension() === 'txt' && $file->getFilename()[0] !== '_') { 242*fdbf4cdbSCostin Stroie $files[] = $file->getPathname(); 243*fdbf4cdbSCostin Stroie } 244*fdbf4cdbSCostin Stroie } 245*fdbf4cdbSCostin Stroie 246*fdbf4cdbSCostin Stroie // Skip if no files 247*fdbf4cdbSCostin Stroie if (empty($files)) { 248*fdbf4cdbSCostin Stroie if ($verbose) { 249*fdbf4cdbSCostin Stroie $this->info("No .txt files found in directory: $dirPath"); 250*fdbf4cdbSCostin Stroie } 251*fdbf4cdbSCostin Stroie return; 252*fdbf4cdbSCostin Stroie } 253*fdbf4cdbSCostin Stroie 254*fdbf4cdbSCostin Stroie if ($verbose) { 255*fdbf4cdbSCostin Stroie $this->info("Found " . count($files) . " files to process."); 256*fdbf4cdbSCostin Stroie } 257*fdbf4cdbSCostin Stroie 258*fdbf4cdbSCostin Stroie // Use the first part of the document ID as collection name, fallback to 'documents' 259*fdbf4cdbSCostin Stroie $sampleFile = $files[0]; 260*fdbf4cdbSCostin Stroie $id = \dokuwiki\plugin\dokullm\parseFilePath($sampleFile); 261*fdbf4cdbSCostin Stroie $idParts = explode(':', $id); 262*fdbf4cdbSCostin Stroie $collectionName = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents'; 263*fdbf4cdbSCostin Stroie 264*fdbf4cdbSCostin Stroie try { 265*fdbf4cdbSCostin Stroie $collectionStatus = $chroma->ensureCollectionExists($collectionName); 266*fdbf4cdbSCostin Stroie if ($verbose) { 267*fdbf4cdbSCostin Stroie $this->info($collectionStatus); 268*fdbf4cdbSCostin Stroie } 269*fdbf4cdbSCostin Stroie $collectionChecked = true; 270*fdbf4cdbSCostin Stroie } catch (Exception $e) { 271*fdbf4cdbSCostin Stroie $collectionChecked = true; 272*fdbf4cdbSCostin Stroie } 273*fdbf4cdbSCostin Stroie 274*fdbf4cdbSCostin Stroie // Process each file 275*fdbf4cdbSCostin Stroie $processedCount = 0; 276*fdbf4cdbSCostin Stroie $skippedCount = 0; 277*fdbf4cdbSCostin Stroie $errorCount = 0; 278*fdbf4cdbSCostin Stroie 279*fdbf4cdbSCostin Stroie foreach ($files as $file) { 280*fdbf4cdbSCostin Stroie if ($verbose) { 281*fdbf4cdbSCostin Stroie $this->info("\nProcessing file: $file"); 282*fdbf4cdbSCostin Stroie } 283*fdbf4cdbSCostin Stroie 284*fdbf4cdbSCostin Stroie try { 285*fdbf4cdbSCostin Stroie $result = $chroma->processSingleFile($file, $collectionName, $collectionChecked); 286*fdbf4cdbSCostin Stroie 287*fdbf4cdbSCostin Stroie // Handle the result with verbose output 288*fdbf4cdbSCostin Stroie if ($verbose && !empty($result['collection_status'])) { 289*fdbf4cdbSCostin Stroie $this->info($result['collection_status']); 290*fdbf4cdbSCostin Stroie } 291*fdbf4cdbSCostin Stroie 292*fdbf4cdbSCostin Stroie switch ($result['status']) { 293*fdbf4cdbSCostin Stroie case 'success': 294*fdbf4cdbSCostin Stroie $processedCount++; 295*fdbf4cdbSCostin Stroie if ($verbose) { 296*fdbf4cdbSCostin Stroie $this->info("Adding " . $result['details']['chunks'] . " chunks to ChromaDB..."); 297*fdbf4cdbSCostin Stroie } 298*fdbf4cdbSCostin Stroie $this->success("Successfully sent file to ChromaDB:"); 299*fdbf4cdbSCostin Stroie $this->info(" Document ID: " . $result['details']['document_id']); 300*fdbf4cdbSCostin Stroie if ($verbose) { 301*fdbf4cdbSCostin Stroie $this->info(" Chunks: " . $result['details']['chunks']); 302*fdbf4cdbSCostin Stroie $this->info(" Host: $host:$port"); 303*fdbf4cdbSCostin Stroie $this->info(" Tenant: $tenant"); 304*fdbf4cdbSCostin Stroie $this->info(" Database: $database"); 305*fdbf4cdbSCostin Stroie $this->info(" Collection: " . $result['details']['collection']); 306*fdbf4cdbSCostin Stroie } 307*fdbf4cdbSCostin Stroie break; 308*fdbf4cdbSCostin Stroie 309*fdbf4cdbSCostin Stroie case 'skipped': 310*fdbf4cdbSCostin Stroie $skippedCount++; 311*fdbf4cdbSCostin Stroie if ($verbose) { 312*fdbf4cdbSCostin Stroie $this->info($result['message']); 313*fdbf4cdbSCostin Stroie } 314*fdbf4cdbSCostin Stroie break; 315*fdbf4cdbSCostin Stroie 316*fdbf4cdbSCostin Stroie case 'error': 317*fdbf4cdbSCostin Stroie $errorCount++; 318*fdbf4cdbSCostin Stroie $this->error($result['message']); 319*fdbf4cdbSCostin Stroie break; 320*fdbf4cdbSCostin Stroie } 321*fdbf4cdbSCostin Stroie } catch (Exception $e) { 322*fdbf4cdbSCostin Stroie $errorCount++; 323*fdbf4cdbSCostin Stroie $this->error("Error processing file $file: " . $e->getMessage()); 324*fdbf4cdbSCostin Stroie } 325*fdbf4cdbSCostin Stroie } 326*fdbf4cdbSCostin Stroie 327*fdbf4cdbSCostin Stroie if ($verbose) { 328*fdbf4cdbSCostin Stroie $this->info("\nFinished processing directory."); 329*fdbf4cdbSCostin Stroie $this->info("Processing summary:"); 330*fdbf4cdbSCostin Stroie $this->info(" Processed: $processedCount files"); 331*fdbf4cdbSCostin Stroie $this->info(" Skipped: $skippedCount files"); 332*fdbf4cdbSCostin Stroie $this->info(" Errors: $errorCount files"); 333*fdbf4cdbSCostin Stroie } else { 334*fdbf4cdbSCostin Stroie // Even in non-verbose mode, show summary stats if there were processed files 335*fdbf4cdbSCostin Stroie if ($processedCount > 0 || $skippedCount > 0 || $errorCount > 0) { 336*fdbf4cdbSCostin Stroie $this->info("Processing summary:"); 337*fdbf4cdbSCostin Stroie if ($processedCount > 0) { 338*fdbf4cdbSCostin Stroie $this->info(" Processed: $processedCount files"); 339*fdbf4cdbSCostin Stroie } 340*fdbf4cdbSCostin Stroie if ($skippedCount > 0) { 341*fdbf4cdbSCostin Stroie $this->info(" Skipped: $skippedCount files"); 342*fdbf4cdbSCostin Stroie } 343*fdbf4cdbSCostin Stroie if ($errorCount > 0) { 344*fdbf4cdbSCostin Stroie $this->info(" Errors: $errorCount files"); 345*fdbf4cdbSCostin Stroie } 346*fdbf4cdbSCostin Stroie } 347*fdbf4cdbSCostin Stroie } 348*fdbf4cdbSCostin Stroie } 349*fdbf4cdbSCostin Stroie 350*fdbf4cdbSCostin Stroie /** 351*fdbf4cdbSCostin Stroie * Query ChromaDB for similar documents 352*fdbf4cdbSCostin Stroie */ 353*fdbf4cdbSCostin Stroie private function queryChroma($searchTerms, $limit, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 354*fdbf4cdbSCostin Stroie // Create ChromaDB client 355*fdbf4cdbSCostin Stroie $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel); 356*fdbf4cdbSCostin Stroie 357*fdbf4cdbSCostin Stroie try { 358*fdbf4cdbSCostin Stroie // Query the specified collection by collection 359*fdbf4cdbSCostin Stroie $results = $chroma->queryCollection($collection, [$searchTerms], $limit); 360*fdbf4cdbSCostin Stroie 361*fdbf4cdbSCostin Stroie $this->info("Query results for: \"$searchTerms\""); 362*fdbf4cdbSCostin Stroie $this->info("Host: $host:$port"); 363*fdbf4cdbSCostin Stroie $this->info("Tenant: $tenant"); 364*fdbf4cdbSCostin Stroie $this->info("Database: $database"); 365*fdbf4cdbSCostin Stroie $this->info("Collection: $collection"); 366*fdbf4cdbSCostin Stroie $this->info("=========================================="); 367*fdbf4cdbSCostin Stroie 368*fdbf4cdbSCostin Stroie if (empty($results['ids'][0])) { 369*fdbf4cdbSCostin Stroie $this->info("No results found."); 370*fdbf4cdbSCostin Stroie return; 371*fdbf4cdbSCostin Stroie } 372*fdbf4cdbSCostin Stroie 373*fdbf4cdbSCostin Stroie for ($i = 0; $i < count($results['ids'][0]); $i++) { 374*fdbf4cdbSCostin Stroie $this->info("Result " . ($i + 1) . ":"); 375*fdbf4cdbSCostin Stroie $this->info(" ID: " . $results['ids'][0][$i]); 376*fdbf4cdbSCostin Stroie $this->info(" Distance: " . $results['distances'][0][$i]); 377*fdbf4cdbSCostin Stroie $this->info(" Document: " . substr($results['documents'][0][$i], 0, 255) . "..."); 378*fdbf4cdbSCostin Stroie 379*fdbf4cdbSCostin Stroie if (isset($results['metadatas'][0][$i])) { 380*fdbf4cdbSCostin Stroie $this->info(" Metadata: " . json_encode($results['metadatas'][0][$i])); 381*fdbf4cdbSCostin Stroie } 382*fdbf4cdbSCostin Stroie $this->info(""); 383*fdbf4cdbSCostin Stroie } 384*fdbf4cdbSCostin Stroie } catch (Exception $e) { 385*fdbf4cdbSCostin Stroie $this->error("Error querying ChromaDB: " . $e->getMessage()); 386*fdbf4cdbSCostin Stroie return; 387*fdbf4cdbSCostin Stroie } 388*fdbf4cdbSCostin Stroie } 389*fdbf4cdbSCostin Stroie 390*fdbf4cdbSCostin Stroie /** 391*fdbf4cdbSCostin Stroie * Check if the ChromaDB server is alive 392*fdbf4cdbSCostin Stroie */ 393*fdbf4cdbSCostin Stroie private function checkHeartbeat($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 394*fdbf4cdbSCostin Stroie // Create ChromaDB client 395*fdbf4cdbSCostin Stroie $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 396*fdbf4cdbSCostin Stroie 397*fdbf4cdbSCostin Stroie try { 398*fdbf4cdbSCostin Stroie if ($verbose) { 399*fdbf4cdbSCostin Stroie $this->info("Checking ChromaDB server status..."); 400*fdbf4cdbSCostin Stroie $this->info("Host: $host:$port"); 401*fdbf4cdbSCostin Stroie $this->info("Tenant: $tenant"); 402*fdbf4cdbSCostin Stroie $this->info("Database: $database"); 403*fdbf4cdbSCostin Stroie $this->info("=========================================="); 404*fdbf4cdbSCostin Stroie } 405*fdbf4cdbSCostin Stroie 406*fdbf4cdbSCostin Stroie $result = $chroma->heartbeat(); 407*fdbf4cdbSCostin Stroie 408*fdbf4cdbSCostin Stroie $this->success("Server is alive!"); 409*fdbf4cdbSCostin Stroie $this->info("Response: " . json_encode($result)); 410*fdbf4cdbSCostin Stroie } catch (Exception $e) { 411*fdbf4cdbSCostin Stroie $this->error("Error checking ChromaDB server status: " . $e->getMessage()); 412*fdbf4cdbSCostin Stroie return; 413*fdbf4cdbSCostin Stroie } 414*fdbf4cdbSCostin Stroie } 415*fdbf4cdbSCostin Stroie 416*fdbf4cdbSCostin Stroie /** 417*fdbf4cdbSCostin Stroie * Get authentication and identity information from ChromaDB 418*fdbf4cdbSCostin Stroie */ 419*fdbf4cdbSCostin Stroie private function checkIdentity($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 420*fdbf4cdbSCostin Stroie // Create ChromaDB client 421*fdbf4cdbSCostin Stroie $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 422*fdbf4cdbSCostin Stroie 423*fdbf4cdbSCostin Stroie try { 424*fdbf4cdbSCostin Stroie if ($verbose) { 425*fdbf4cdbSCostin Stroie $this->info("Checking ChromaDB identity..."); 426*fdbf4cdbSCostin Stroie $this->info("Host: $host:$port"); 427*fdbf4cdbSCostin Stroie $this->info("Tenant: $tenant"); 428*fdbf4cdbSCostin Stroie $this->info("Database: $database"); 429*fdbf4cdbSCostin Stroie $this->info("=========================================="); 430*fdbf4cdbSCostin Stroie } 431*fdbf4cdbSCostin Stroie 432*fdbf4cdbSCostin Stroie $result = $chroma->getIdentity(); 433*fdbf4cdbSCostin Stroie 434*fdbf4cdbSCostin Stroie $this->info("Identity information:"); 435*fdbf4cdbSCostin Stroie $this->info("Response: " . json_encode($result, JSON_PRETTY_PRINT)); 436*fdbf4cdbSCostin Stroie } catch (Exception $e) { 437*fdbf4cdbSCostin Stroie $this->error("Error checking ChromaDB identity: " . $e->getMessage()); 438*fdbf4cdbSCostin Stroie return; 439*fdbf4cdbSCostin Stroie } 440*fdbf4cdbSCostin Stroie } 441*fdbf4cdbSCostin Stroie 442*fdbf4cdbSCostin Stroie /** 443*fdbf4cdbSCostin Stroie * List all collections in the ChromaDB database 444*fdbf4cdbSCostin Stroie */ 445*fdbf4cdbSCostin Stroie private function listCollections($host, $port, $tenant, $database, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 446*fdbf4cdbSCostin Stroie // Create ChromaDB client 447*fdbf4cdbSCostin Stroie $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, 'documents', $ollamaHost, $ollamaPort, $ollamaModel); 448*fdbf4cdbSCostin Stroie 449*fdbf4cdbSCostin Stroie try { 450*fdbf4cdbSCostin Stroie if ($verbose) { 451*fdbf4cdbSCostin Stroie $this->info("Listing ChromaDB collections..."); 452*fdbf4cdbSCostin Stroie $this->info("Host: $host:$port"); 453*fdbf4cdbSCostin Stroie $this->info("Tenant: $tenant"); 454*fdbf4cdbSCostin Stroie $this->info("Database: $database"); 455*fdbf4cdbSCostin Stroie $this->info("=========================================="); 456*fdbf4cdbSCostin Stroie } 457*fdbf4cdbSCostin Stroie 458*fdbf4cdbSCostin Stroie $result = $chroma->listCollections(); 459*fdbf4cdbSCostin Stroie 460*fdbf4cdbSCostin Stroie if (empty($result)) { 461*fdbf4cdbSCostin Stroie $this->info("No collections found."); 462*fdbf4cdbSCostin Stroie return; 463*fdbf4cdbSCostin Stroie } 464*fdbf4cdbSCostin Stroie 465*fdbf4cdbSCostin Stroie $this->info("Collections:"); 466*fdbf4cdbSCostin Stroie foreach ($result as $collection) { 467*fdbf4cdbSCostin Stroie $this->info(" - " . (isset($collection['name']) ? $collection['name'] : json_encode($collection))); 468*fdbf4cdbSCostin Stroie } 469*fdbf4cdbSCostin Stroie } catch (Exception $e) { 470*fdbf4cdbSCostin Stroie $this->error("Error listing ChromaDB collections: " . $e->getMessage()); 471*fdbf4cdbSCostin Stroie return; 472*fdbf4cdbSCostin Stroie } 473*fdbf4cdbSCostin Stroie } 474*fdbf4cdbSCostin Stroie 475*fdbf4cdbSCostin Stroie /** 476*fdbf4cdbSCostin Stroie * Get a document by its ID from ChromaDB 477*fdbf4cdbSCostin Stroie */ 478*fdbf4cdbSCostin Stroie private function getDocument($documentId, $host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel, $verbose = false) { 479*fdbf4cdbSCostin Stroie // If no collection specified, derive it from the first part of the document ID 480*fdbf4cdbSCostin Stroie if (empty($collection)) { 481*fdbf4cdbSCostin Stroie $idParts = explode(':', $documentId); 482*fdbf4cdbSCostin Stroie $collection = isset($idParts[0]) && !empty($idParts[0]) ? $idParts[0] : 'documents'; 483*fdbf4cdbSCostin Stroie } 484*fdbf4cdbSCostin Stroie 485*fdbf4cdbSCostin Stroie // Create ChromaDB client 486*fdbf4cdbSCostin Stroie $chroma = new \dokuwiki\plugin\dokullm\ChromaDBClient($host, $port, $tenant, $database, $collection, $ollamaHost, $ollamaPort, $ollamaModel); 487*fdbf4cdbSCostin Stroie 488*fdbf4cdbSCostin Stroie try { 489*fdbf4cdbSCostin Stroie // Get the specified document by ID 490*fdbf4cdbSCostin Stroie $results = $chroma->getDocument($collection, $documentId); 491*fdbf4cdbSCostin Stroie 492*fdbf4cdbSCostin Stroie if ($verbose) { 493*fdbf4cdbSCostin Stroie $this->info("Document retrieval results for: \"$documentId\""); 494*fdbf4cdbSCostin Stroie $this->info("Host: $host:$port"); 495*fdbf4cdbSCostin Stroie $this->info("Tenant: $tenant"); 496*fdbf4cdbSCostin Stroie $this->info("Database: $database"); 497*fdbf4cdbSCostin Stroie $this->info("Collection: $collection"); 498*fdbf4cdbSCostin Stroie $this->info("=========================================="); 499*fdbf4cdbSCostin Stroie } 500*fdbf4cdbSCostin Stroie 501*fdbf4cdbSCostin Stroie if (empty($results['ids'])) { 502*fdbf4cdbSCostin Stroie $this->info("No document found with ID: $documentId"); 503*fdbf4cdbSCostin Stroie return; 504*fdbf4cdbSCostin Stroie } 505*fdbf4cdbSCostin Stroie 506*fdbf4cdbSCostin Stroie for ($i = 0; $i < count($results['ids']); $i++) { 507*fdbf4cdbSCostin Stroie $this->info("Document " . ($i + 1) . ":"); 508*fdbf4cdbSCostin Stroie $this->info(" ID: " . $results['ids'][$i]); 509*fdbf4cdbSCostin Stroie 510*fdbf4cdbSCostin Stroie if (isset($results['documents'][$i])) { 511*fdbf4cdbSCostin Stroie $this->info(" Content: " . $results['documents'][$i]); 512*fdbf4cdbSCostin Stroie } 513*fdbf4cdbSCostin Stroie 514*fdbf4cdbSCostin Stroie if (isset($results['metadatas'][$i])) { 515*fdbf4cdbSCostin Stroie $this->info(" Metadata: " . json_encode($results['metadatas'][$i], JSON_PRETTY_PRINT)); 516*fdbf4cdbSCostin Stroie } 517*fdbf4cdbSCostin Stroie $this->info(""); 518*fdbf4cdbSCostin Stroie } 519*fdbf4cdbSCostin Stroie } catch (Exception $e) { 520*fdbf4cdbSCostin Stroie $this->error("Error retrieving document from ChromaDB: " . $e->getMessage()); 521*fdbf4cdbSCostin Stroie return; 522*fdbf4cdbSCostin Stroie } 523*fdbf4cdbSCostin Stroie } 524*fdbf4cdbSCostin Stroie} 525