1<?php 2namespace dokuwiki\plugin\dokullm; 3 4/** 5 * LLM Client for the dokullm plugin 6 * 7 * This class provides methods to interact with an LLM API for various 8 * text processing tasks such as completion, rewriting, grammar correction, 9 * summarization, conclusion creation, text analysis, and custom prompts. 10 * 11 * The client handles: 12 * - API configuration and authentication 13 * - Prompt template loading and processing 14 * - Context-aware requests with metadata 15 * - DokuWiki page content retrieval 16 */ 17 18// must be run within Dokuwiki 19if (!defined('DOKU_INC')) { 20 die(); 21} 22 23/** 24 * LLM Client class for handling API communications 25 * 26 * Manages configuration settings and provides methods for various 27 * text processing operations through an LLM API. 28 * Implements caching for tool calls to avoid duplicate processing. 29 */ 30class LlmClient 31{ 32 /** @var string The API endpoint URL */ 33 private $api_url; 34 35 /** @var array Cache for tool call results */ 36 private $toolCallCache = []; 37 38 /** @var string Current text for tool usage */ 39 private $currentText = ''; 40 41 /** @var array Track tool call counts to prevent infinite loops */ 42 private $toolCallCounts = []; 43 44 /** @var string The API authentication key */ 45 private $api_key; 46 47 /** @var string The model identifier to use */ 48 private $model; 49 50 /** @var int The request timeout in seconds */ 51 private $timeout; 52 53 /** @var float The temperature setting for response randomness */ 54 private $temperature; 55 56 /** @var float The top-p setting for nucleus sampling */ 57 private $top_p; 58 59 /** @var int The top-k setting for token selection */ 60 private $top_k; 61 62 /** @var float The min-p setting for minimum probability threshold */ 63 private $min_p; 64 65 /** @var bool Whether to enable thinking in the LLM responses */ 66 private $think; 67 68 /** 69 * Initialize the LLM client with configuration settings 70 * 71 * Retrieves configuration values from DokuWiki's configuration system 72 * for API URL, key, model, timeout, and LLM sampling parameters. 73 * 74 * Configuration values: 75 * - api_url: The LLM API endpoint URL 76 * - api_key: Authentication key for the API (optional) 77 * - model: The model identifier to use for requests 78 * - timeout: Request timeout in seconds 79 * - language: Language code for prompt templates 80 * - temperature: Temperature setting for response randomness (0.0-1.0) 81 * - top_p: Top-p (nucleus sampling) setting (0.0-1.0) 82 * - top_k: Top-k setting (integer >= 1) 83 * - min_p: Minimum probability threshold (0.0-1.0) 84 * - think: Whether to enable thinking in LLM responses (boolean) 85 */ 86 public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $language = null) 87 { 88 $this->api_url = $api_url; 89 $this->api_key = $api_key; 90 $this->model = $model; 91 $this->timeout = $timeout; 92 $this->temperature = $temperature; 93 $this->top_p = $top_p; 94 $this->top_k = $top_k; 95 $this->min_p = $min_p; 96 $this->think = $think; 97 $this->language = $language; 98 } 99 100 101 102 public function process($action, $text, $metadata = [], $useContext = true) 103 { 104 // Store the current text for tool usage 105 $this->currentText = $text; 106 107 // Add text, think and action to metadata 108 $metadata['text'] = $text; 109 $metadata['think'] = $this->think ? '/think' : '/no_think'; 110 $metadata['action'] = $action; 111 112 // If we have 'template' in metadata, move it to 'page_template' 113 if (isset($metadata['template'])) { 114 $metadata['page_template'] = $metadata['template']; 115 unset($metadata['template']); 116 } 117 118 // If we have 'examples' in metadata, move it to 'page_examples' 119 if (isset($metadata['examples'])) { 120 $metadata['page_examples'] = $metadata['examples']; 121 unset($metadata['examples']); 122 } 123 124 // If we have 'previous' in metadata, move it to 'page_previous' 125 if (isset($metadata['previous'])) { 126 $metadata['page_previous'] = $metadata['previous']; 127 unset($metadata['previous']); 128 } 129 130 $prompt = $this->loadPrompt($action, $metadata); 131 132 return $this->callAPI($action, $prompt, $metadata, $useContext); 133 } 134 135 /** 136 * Process text with a custom user prompt 137 * 138 * Sends a custom prompt to the LLM along with the provided text. 139 * 140 * @param string $text The text to process 141 * @param string $customPrompt The custom prompt to use 142 * @param array $metadata Optional metadata containing template and examples 143 * @param bool $useContext Whether to include template and examples in the context (default: true) 144 * @return string The processed text 145 */ 146 public function processCustomPrompt($text, $metadata = [], $useContext = true) 147 { 148 // Store the current text for tool usage 149 $this->currentText = $text; 150 151 // Format the prompt with the text and custom prompt 152 $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text; 153 154 return $this->callAPI('custom', $prompt, $metadata, $useContext); 155 } 156 157 /** 158 * Get the list of available tools for the LLM 159 * 160 * Defines the tools that can be used by the LLM during processing. 161 * 162 * @return array List of tool definitions 163 */ 164 private function getAvailableTools() 165 { 166 return [ 167 [ 168 'type' => 'function', 169 'function' => [ 170 'name' => 'get_document', 171 'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.', 172 'parameters' => [ 173 'type' => 'object', 174 'properties' => [ 175 'id' => [ 176 'type' => 'string', 177 'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.' 178 ] 179 ], 180 'required' => ['id'] 181 ] 182 ] 183 ], 184 [ 185 'type' => 'function', 186 'function' => [ 187 'name' => 'get_template', 188 'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.', 189 'parameters' => [ 190 'type' => 'object', 191 'properties' => [ 192 'language' => [ 193 'type' => 'string', 194 'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).', 195 'default' => 'ro' 196 ] 197 ] 198 ] 199 ] 200 ], 201 [ 202 'type' => 'function', 203 'function' => [ 204 'name' => 'get_examples', 205 'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.', 206 'parameters' => [ 207 'type' => 'object', 208 'properties' => [ 209 'count' => [ 210 'type' => 'integer', 211 'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.', 212 'default' => 5 213 ] 214 ] 215 ] 216 ] 217 ] 218 ]; 219 } 220 221 /** 222 * Call the LLM API with the specified prompt 223 * 224 * Makes an HTTP POST request to the configured API endpoint with 225 * the prompt and other parameters. Handles authentication if an 226 * API key is configured. 227 * 228 * The method constructs a conversation with system and user messages, 229 * including context information from metadata when available. 230 * 231 * Complex logic includes: 232 * 1. Loading and enhancing the system prompt with metadata context 233 * 2. Building the API request with model parameters 234 * 3. Handling authentication with API key if configured 235 * 4. Making the HTTP request with proper error handling 236 * 5. Parsing and validating the API response 237 * 6. Supporting tool usage with automatic tool calling when enabled 238 * 7. Implementing context enhancement with templates, examples, and snippets 239 * 240 * The context information includes: 241 * - Template content: Used as a starting point for the response 242 * - Example pages: Full content of specified example pages 243 * - Text snippets: Relevant text examples from ChromaDB 244 * 245 * When tools are enabled, the method supports automatic tool calling: 246 * - Tools can retrieve documents, templates, and examples as needed 247 * - Tool responses are cached to avoid duplicate calls with identical parameters 248 * - Infinite loop protection prevents excessive tool calls 249 * 250 * @param string $command The command name for loading command-specific system prompts 251 * @param string $prompt The prompt to send to the LLM as user message 252 * @param array $metadata Optional metadata containing template, examples, and snippets 253 * @param bool $useContext Whether to include template and examples in the context (default: true) 254 * @return string The response content from the LLM 255 * @throws Exception If the API request fails or returns unexpected format 256 */ 257 258 private function callAPI($command, $prompt, $metadata = [], $useContext = true, $useTools = false) 259 { 260 // Load system prompt which provides general instructions to the LLM 261 $systemPrompt = $this->loadSystemPrompt($command, []); 262 263 // Enhance the prompt with context information from metadata 264 // This provides the LLM with additional context about templates and examples 265 if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) { 266 $contextInfo = "\n\n<context>\n"; 267 268 // Add template content if specified in metadata 269 if (!empty($metadata['template'])) { 270 $templateContent = $this->getPageContent($metadata['template']); 271 if ($templateContent !== false) { 272 $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n"; 273 } 274 } 275 276 // Add example pages content if specified in metadata 277 if (!empty($metadata['examples'])) { 278 $examplesContent = []; 279 foreach ($metadata['examples'] as $example) { 280 $content = $this->getPageContent($example); 281 if ($content !== false) { 282 $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n"; 283 } 284 } 285 if (!empty($examplesContent)) { 286 $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n"; 287 } 288 } 289 290 // Add text snippets if specified in metadata 291 if (!empty($metadata['snippets'])) { 292 $snippetsContent = []; 293 foreach ($metadata['snippets'] as $index => $snippet) { 294 // These are text snippets from ChromaDB 295 $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n"; 296 } 297 if (!empty($snippetsContent)) { 298 $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n"; 299 } 300 } 301 302 $contextInfo .= "\n</context>\n"; 303 304 // Append context information to system prompt 305 $prompt = $contextInfo . "\n\n" . $prompt; 306 } 307 308 // Prepare API request data with model parameters 309 $data = [ 310 'model' => $this->model, 311 'messages' => [ 312 ['role' => 'system', 'content' => $systemPrompt], 313 ['role' => 'user', 'content' => $prompt] 314 ], 315 'max_tokens' => 6144, 316 'stream' => false, 317 'keep_alive' => '30m', 318 'think' => true 319 ]; 320 321 // Add tools to the request only if useTools is true 322 if ($useTools) { 323 // Define available tools 324 $data['tools'] = $this->getAvailableTools(); 325 $data['tool_choice'] = 'auto'; 326 $data['parallel_tool_calls'] = false; 327 } 328 329 // Only add parameters if they are defined and not null 330 if ($this->temperature !== null) { 331 $data['temperature'] = $this->temperature; 332 } 333 if ($this->top_p !== null) { 334 $data['top_p'] = $this->top_p; 335 } 336 if ($this->top_k !== null) { 337 $data['top_k'] = $this->top_k; 338 } 339 if ($this->min_p !== null) { 340 $data['min_p'] = $this->min_p; 341 } 342 343 // Make an API call with tool responses 344 return $this->callAPIWithTools($data, false); 345 } 346 347 /** 348 * Handle tool calls from the LLM 349 * 350 * Processes tool calls made by the LLM and returns appropriate responses. 351 * Implements caching to avoid duplicate calls with identical parameters. 352 * 353 * @param array $toolCall The tool call data from the LLM 354 * @return array The tool response message 355 */ 356 private function handleToolCall($toolCall) 357 { 358 $toolName = $toolCall['function']['name']; 359 $arguments = json_decode($toolCall['function']['arguments'], true); 360 361 // Create a cache key from the tool name and arguments 362 $cacheKey = md5($toolName . serialize($arguments)); 363 364 // Check if we have a cached result for this tool call 365 if (isset($this->toolCallCache[$cacheKey])) { 366 // Return cached result and indicate it was found in cache 367 $toolResponse = $this->toolCallCache[$cacheKey]; 368 // Update with current tool call ID 369 $toolResponse['tool_call_id'] = $toolCall['id']; 370 $toolResponse['cached'] = true; // Indicate this response was cached 371 return $toolResponse; 372 } 373 374 $toolResponse = [ 375 'role' => 'tool', 376 'tool_call_id' => $toolCall['id'], 377 'cached' => false // Indicate this is a fresh response 378 ]; 379 380 switch ($toolName) { 381 case 'get_document': 382 $documentId = $arguments['id']; 383 $content = $this->getPageContent($documentId); 384 if ($content === false) { 385 $toolResponse['content'] = 'Document not found: ' . $documentId; 386 } else { 387 $toolResponse['content'] = $content; 388 } 389 break; 390 391 case 'get_template': 392 // Get template content using the convenience function 393 $toolResponse['content'] = $this->getTemplateContent(); 394 break; 395 396 case 'get_examples': 397 // Get examples content using the convenience function 398 $count = isset($arguments['count']) ? (int)$arguments['count'] : 5; 399 $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>'; 400 break; 401 402 default: 403 $toolResponse['content'] = 'Unknown tool: ' . $toolName; 404 } 405 406 // Cache the result for future calls with the same parameters 407 $cacheEntry = $toolResponse; 408 // Remove tool_call_id and cached flag from cache as they change per call 409 unset($cacheEntry['tool_call_id']); 410 unset($cacheEntry['cached']); 411 $this->toolCallCache[$cacheKey] = $cacheEntry; 412 413 return $toolResponse; 414 } 415 416 /** 417 * Make an API call with tool responses 418 * 419 * Sends a follow-up request to the LLM with tool responses. 420 * Implements complex logic for handling tool calls with caching and loop protection. 421 * 422 * Complex logic includes: 423 * 1. Making HTTP requests with proper authentication and error handling 424 * 2. Processing tool calls from the LLM response 425 * 3. Caching tool responses to avoid duplicate calls with identical parameters 426 * 4. Tracking tool call counts to prevent infinite loops 427 * 5. Implementing loop protection with call count limits 428 * 6. Handling recursive tool calls until final content is generated 429 * 430 * Loop protection works by: 431 * - Tracking individual tool call counts (max 3 per tool) 432 * - Tracking total tool calls (max 10 total) 433 * - Disabling tools when limits are exceeded to break potential loops 434 * 435 * @param array $data The API request data including messages with tool responses 436 * @param bool $toolsCalled Whether tools have already been called (used for loop protection) 437 * @param bool $useTools Whether to process tool calls (used for loop protection) 438 * @return string The final response content 439 */ 440 private function callAPIWithTools($data, $toolsCalled = false, $useTools = true) 441 { 442 // Set up HTTP headers, including authentication if API key is configured 443 $headers = [ 444 'Content-Type: application/json' 445 ]; 446 447 if (!empty($this->api_key)) { 448 $headers[] = 'Authorization: Bearer ' . $this->api_key; 449 } 450 451 // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops 452 if ($toolsCalled) { 453 unset($data['tools']); 454 unset($data['tool_choice']); 455 } 456 457 // Initialize and configure cURL for the API request 458 $ch = curl_init(); 459 curl_setopt($ch, CURLOPT_URL, $this->api_url); 460 curl_setopt($ch, CURLOPT_POST, true); 461 curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); 462 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 463 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 464 curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); 465 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 466 467 // Execute the API request 468 $response = curl_exec($ch); 469 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 470 $error = curl_error($ch); 471 curl_close($ch); 472 473 // Handle cURL errors 474 if ($error) { 475 throw new Exception('API request failed: ' . $error); 476 } 477 478 // Handle HTTP errors 479 if ($httpCode !== 200) { 480 throw new Exception('API request failed with HTTP code: ' . $httpCode); 481 } 482 483 // Parse and validate the JSON response 484 $result = json_decode($response, true); 485 486 // Extract the content from the response if available 487 if (isset($result['choices'][0]['message']['content'])) { 488 $content = trim($result['choices'][0]['message']['content']); 489 // Reset tool call counts when we get final content 490 $this->toolCallCounts = []; 491 return $content; 492 } 493 494 // Handle tool calls if present 495 if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) { 496 $toolCalls = $result['choices'][0]['message']['tool_calls']; 497 // Start with original messages 498 $messages = $data['messages']; 499 // Add assistant's message with tool calls, keeping all original fields except for content (which is null) 500 $assistantMessage = []; 501 foreach ($result['choices'][0]['message'] as $key => $value) { 502 if ($key !== 'content') { 503 $assistantMessage[$key] = $value; 504 } 505 } 506 // Add assistant's message with tool calls 507 $messages[] = $assistantMessage; 508 509 // Process each tool call and track counts to prevent infinite loops 510 foreach ($toolCalls as $toolCall) { 511 $toolName = $toolCall['function']['name']; 512 // Increment tool call count 513 if (!isset($this->toolCallCounts[$toolName])) { 514 $this->toolCallCounts[$toolName] = 0; 515 } 516 $this->toolCallCounts[$toolName]++; 517 518 $toolResponse = $this->handleToolCall($toolCall); 519 $messages[] = $toolResponse; 520 } 521 522 // Check if any tool has been called more than 3 times 523 $toolsCalledCount = 0; 524 foreach ($this->toolCallCounts as $count) { 525 if ($count > 3) { 526 // If any tool called more than 3 times, disable tools to break loop 527 $toolsCalled = true; 528 break; 529 } 530 $toolsCalledCount += $count; 531 } 532 533 // If total tool calls exceed 10, also disable tools 534 if ($toolsCalledCount > 10) { 535 $toolsCalled = true; 536 } 537 538 // Make another API call with tool responses 539 $data['messages'] = $messages; 540 return $this->callAPIWithTools($data, $toolsCalled, $useTools); 541 } 542 543 // Throw exception for unexpected response format 544 throw new Exception('Unexpected API response format'); 545 } 546 547 /** 548 * Load a prompt template from a DokuWiki page and replace placeholders 549 * 550 * Loads prompt templates from DokuWiki pages with IDs in the format 551 * dokullm:prompts:LANGUAGE:PROMPT_NAME 552 * 553 * The method implements a language fallback mechanism: 554 * 1. First tries to load the prompt in the configured language 555 * 2. If not found, falls back to English prompts 556 * 3. Throws an exception if neither is available 557 * 558 * After loading the prompt, it scans for placeholders and automatically 559 * adds missing ones with appropriate values before replacing all placeholders. 560 * 561 * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite') 562 * @param array $variables Associative array of placeholder => value pairs 563 * @return string The processed prompt with placeholders replaced 564 * @throws Exception If the prompt page cannot be loaded in any language 565 */ 566 private function loadPrompt($promptName, $variables = []) 567 { 568 // Default to 'en' if language is 'default' or not set 569 if ($this->language === 'default' || empty($this->language)) { 570 $this->language = 'en'; 571 } 572 573 // Construct the page ID for the prompt in the configured language 574 $promptPageId = 'dokullm:prompts:' . $this->language . ':' . $promptName; 575 576 // Try to get the content of the prompt page in the configured language 577 $prompt = $this->getPageContent($promptPageId); 578 579 // If the language-specific prompt doesn't exist, try English as fallback 580 if ($prompt === false && $this->language !== 'en') { 581 $promptPageId = 'dokullm:prompts:en:' . $promptName; 582 $prompt = $this->getPageContent($promptPageId); 583 } 584 585 // If still no prompt found, throw an exception 586 if ($prompt === false) { 587 throw new Exception('Prompt page not found: ' . $promptPageId); 588 } 589 590 // Find placeholders in the prompt 591 $placeholders = $this->findPlaceholders($prompt); 592 593 // Add missing placeholders with appropriate values 594 foreach ($placeholders as $placeholder) { 595 // Skip if already provided in variables 596 if (isset($variables[$placeholder])) { 597 continue; 598 } 599 600 // Add appropriate values for specific placeholders 601 switch ($placeholder) { 602 case 'template': 603 // If we have a page_template in variables, use it 604 $variables[$placeholder] = $this->getTemplateContent($variables['page_template']); 605 break; 606 607 case 'snippets': 608 $variables[$placeholder] = $this->getSnippets(10); 609 break; 610 611 case 'examples': 612 // If we have example page IDs in metadata, add examples content 613 $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']); 614 break; 615 616 case 'previous': 617 // If we have a previous report page ID in metadata, add previous content 618 $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']); 619 620 // Add current and previous dates to metadata 621 $variables['current_date'] = $this->getPageDate(); 622 $variables['previous_date'] = !empty($variables['page_previous']) ? 623 $this->getPageDate($variables['page_previous']) : 624 ''; 625 break; 626 627 default: 628 // For other placeholders, leave them empty or set a default value 629 $variables[$placeholder] = ''; 630 break; 631 } 632 } 633 634 // Replace placeholders with actual values 635 // Placeholders are in the format {placeholder_name} 636 foreach ($variables as $placeholder => $value) { 637 $prompt = str_replace('{' . $placeholder . '}', $value, $prompt); 638 } 639 640 // Return the processed prompt 641 return $prompt; 642 } 643 644 /** 645 * Load system prompt with optional command-specific appendage 646 * 647 * Loads the main system prompt and appends any command-specific system prompt 648 * if available. 649 * 650 * @param string $action The action/command name 651 * @param array $variables Associative array of placeholder => value pairs 652 * @return string The combined system prompt 653 */ 654 private function loadSystemPrompt($action, $variables = []) 655 { 656 // Load system prompt which provides general instructions to the LLM 657 $systemPrompt = $this->loadPrompt('system', $variables); 658 659 // Check if there's a command-specific system prompt appendage 660 if (!empty($action)) { 661 try { 662 $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables); 663 if ($commandSystemPrompt !== false) { 664 $systemPrompt .= "\n" . $commandSystemPrompt; 665 } 666 } catch (Exception $e) { 667 // Ignore exceptions when loading command-specific system prompt 668 // This allows the main system prompt to still be used 669 } 670 } 671 672 return $systemPrompt; 673 } 674 675 /** 676 * Get the content of a DokuWiki page 677 * 678 * Retrieves the raw content of a DokuWiki page by its ID. 679 * Used for loading template and example page content for context. 680 * 681 * @param string $pageId The page ID to retrieve 682 * @return string|false The page content or false if not found/readable 683 */ 684 public function getPageContent($pageId) 685 { 686 // Convert page ID to file path 687 $pageFile = wikiFN($pageId); 688 689 // Check if file exists and is readable 690 if (file_exists($pageFile) && is_readable($pageFile)) { 691 return file_get_contents($pageFile); 692 } 693 694 return false; 695 } 696 697 /** 698 * Extract date from page ID or file timestamp 699 * 700 * Attempts to extract a date in YYmmdd format from the page ID. 701 * If not found, uses the file's last modification timestamp. 702 * 703 * @param string $pageId Optional page ID to extract date from (defaults to current page) 704 * @return string Formatted date string (YYYY-MM-DD) 705 */ 706 private function getPageDate($pageId = null) 707 { 708 global $ID; 709 710 // Use provided page ID or current page ID 711 $targetPageId = $pageId ?: $ID; 712 713 // Try to extract date from page ID (looking for YYmmdd pattern) 714 if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) { 715 // Convert YYmmdd to YYYY-MM-DD 716 $year = $matches[1]; 717 $month = $matches[2]; 718 $day = $matches[3]; 719 720 // Assume 20xx for years 00-69, 19xx for years 70-99 721 $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year; 722 723 return $fullYear . '-' . $month . '-' . $day; 724 } 725 726 // Fallback to file timestamp 727 $pageFile = wikiFN($targetPageId); 728 if (file_exists($pageFile)) { 729 $timestamp = filemtime($pageFile); 730 return date('Y-m-d', $timestamp); 731 } 732 733 // Return empty string if no date can be determined 734 return ''; 735 } 736 737 /** 738 * Get current text 739 * 740 * Retrieves the current text stored from the process function. 741 * 742 * @return string The current text 743 */ 744 private function getCurrentText() 745 { 746 return $this->currentText; 747 } 748 749 /** 750 * Scan text for placeholders 751 * 752 * Finds all placeholders in the format {placeholder_name} in the provided text 753 * and returns an array of unique placeholder names. 754 * 755 * @param string $text The text to scan for placeholders 756 * @return array List of unique placeholder names found in the text 757 */ 758 public function findPlaceholders($text) 759 { 760 $placeholders = []; 761 $pattern = '/\{([^}]+)\}/'; 762 763 if (preg_match_all($pattern, $text, $matches)) { 764 // Get unique placeholder names 765 $placeholders = array_unique($matches[1]); 766 } 767 768 return $placeholders; 769 } 770 771 /** 772 * Get template content for the current text 773 * 774 * Convenience function to retrieve template content. If a pageId is provided, 775 * retrieves content directly from that page. Otherwise, queries ChromaDB for 776 * a relevant template based on the current text. 777 * 778 * @param string|null $pageId Optional page ID to retrieve template from directly 779 * @return string The template content or empty string if not found 780 */ 781 private function getTemplateContent($pageId = null) 782 { 783 // If pageId is provided, use it directly 784 if ($pageId !== null) { 785 $templateContent = $this->getPageContent($pageId); 786 if ($templateContent !== false) { 787 return $templateContent; 788 } 789 } 790 791 // Otherwise, get template suggestion for the current text 792 $pageId = $this->queryChromaDBTemplate($this->getCurrentText()); 793 if (!empty($pageId)) { 794 $templateContent = $this->getPageContent($pageId[0]); 795 if ($templateContent !== false) { 796 return $templateContent; 797 } 798 } 799 return '( no template )'; 800 } 801 802 /** 803 * Get snippets content for the current text 804 * 805 * Convenience function to retrieve relevant snippets for the current text. 806 * Queries ChromaDB for relevant snippets and returns them formatted. 807 * 808 * @param int $count Number of snippets to retrieve (default: 10) 809 * @return string Formatted snippets content or empty string if not found 810 */ 811 private function getSnippets($count = 10) 812 { 813 // Get example snippets for the current text 814 $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count); 815 if (!empty($snippets)) { 816 $formattedSnippets = []; 817 foreach ($snippets as $index => $snippet) { 818 $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>'; 819 } 820 return implode("\n", $formattedSnippets); 821 } 822 return '( no examples )'; 823 } 824 825 /** 826 * Get examples content from example page IDs 827 * 828 * Convenience function to retrieve content from example pages. 829 * Returns the content of each page packed in XML elements. 830 * 831 * @param array $exampleIds List of example page IDs 832 * @return string Formatted examples content or empty string if not found 833 */ 834 private function getExamplesContent($exampleIds = []) 835 { 836 if (empty($exampleIds) || !is_array($exampleIds)) { 837 return '( no examples )'; 838 } 839 840 $examplesContent = []; 841 foreach ($exampleIds as $index => $exampleId) { 842 $content = $this->getPageContent($exampleId); 843 if ($content !== false) { 844 $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>'; 845 } 846 } 847 848 return implode("\n", $examplesContent); 849 } 850 851 /** 852 * Get previous report content from previous page ID 853 * 854 * Convenience function to retrieve content from a previous report page. 855 * Returns the content of the previous page or a default message if not found. 856 * 857 * @param string $previousId Previous page ID 858 * @return string Previous report content or default message if not found 859 */ 860 private function getPreviousContent($previousId = '') 861 { 862 if (empty($previousId)) { 863 return '( no previous report )'; 864 } 865 866 $content = $this->getPageContent($previousId); 867 if ($content !== false) { 868 return $content; 869 } 870 871 return '( previous report not found )'; 872 } 873 874 /** 875 * Get ChromaDB client with configuration 876 * 877 * Creates and returns a ChromaDB client with the appropriate configuration. 878 * Extracts modality from the current page ID to use as the collection name. 879 * 880 * @return array Array containing the ChromaDB client and collection name 881 */ 882 private function getChromaDBClient() 883 { 884 // Get ChromaDB configuration from DokuWiki plugin configuration 885 $chromaHost = $this->getConf('chroma_host'); 886 $chromaPort = $this->getConf('chroma_port'); 887 $chromaTenant = $this->getConf('chroma_tenant'); 888 $chromaDatabase = $this->getConf('chroma_database'); 889 $chromaDefaultCollection = $this->getConf('chroma_collection'); 890 $ollamaHost = $this->getConf('ollama_host'); 891 $ollamaPort = $this->getConf('ollama_port'); 892 $ollamaModel = $this->getConf('ollama_embeddings_model'); 893 894 // Use the first part of the current page ID as collection name, fallback to default 895 global $ID; 896 $chromaCollection = $chromaDefaultCollection; // Default collection name 897 898 if (!empty($ID)) { 899 // Split the page ID by ':' and take the first part as collection name 900 $parts = explode(':', $ID); 901 if (isset($parts[0]) && !empty($parts[0])) { 902 // If the first part is 'playground', use the default collection 903 // Otherwise, use the first part as the collection name 904 if ($parts[0] === 'playground') { 905 $chromaCollection = $chromaDefaultCollection; 906 } else { 907 $chromaCollection = $parts[0]; 908 } 909 } 910 } 911 912 // Create ChromaDB client with all required parameters 913 $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient( 914 $chromaHost, 915 $chromaPort, 916 $chromaTenant, 917 $chromaDatabase, 918 $ollamaHost, 919 $ollamaPort, 920 $ollamaModel 921 ); 922 923 924 return [$chromaClient, $chromaCollection]; 925 } 926 927 /** 928 * Query ChromaDB for relevant documents 929 * 930 * Generates embeddings for the input text and queries ChromaDB for similar documents. 931 * Extracts modality from the current page ID to use as the collection name. 932 * 933 * @param string $text The text to find similar documents for 934 * @param int $limit Maximum number of documents to retrieve (default: 5) 935 * @param array|null $where Optional filter conditions for metadata 936 * @return array List of document IDs 937 */ 938 private function queryChromaDB($text, $limit = 5, $where = null) 939 { 940 try { 941 // Get ChromaDB client and collection name 942 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 943 // Query for similar documents 944 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 945 946 // Extract document IDs from results 947 $documentIds = []; 948 if (isset($results['ids'][0]) && is_array($results['ids'][0])) { 949 foreach ($results['ids'][0] as $id) { 950 // Use the ChromaDB ID directly without conversion 951 $documentIds[] = $id; 952 } 953 } 954 955 return $documentIds; 956 } catch (Exception $e) { 957 // Log error but don't fail the operation 958 error_log('ChromaDB query failed: ' . $e->getMessage()); 959 return []; 960 } 961 } 962 963 /** 964 * Query ChromaDB for relevant documents and return text snippets 965 * 966 * Generates embeddings for the input text and queries ChromaDB for similar documents. 967 * Returns the actual text snippets instead of document IDs. 968 * 969 * @param string $text The text to find similar documents for 970 * @param int $limit Maximum number of documents to retrieve (default: 10) 971 * @param array|null $where Optional filter conditions for metadata 972 * @return array List of text snippets 973 */ 974 private function queryChromaDBSnippets($text, $limit = 10, $where = null) 975 { 976 try { 977 // Get ChromaDB client and collection name 978 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 979 // Query for similar documents 980 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 981 982 // Extract document texts from results 983 $snippets = []; 984 if (isset($results['documents'][0]) && is_array($results['documents'][0])) { 985 foreach ($results['documents'][0] as $document) { 986 $snippets[] = $document; 987 } 988 } 989 990 return $snippets; 991 } catch (Exception $e) { 992 // Log error but don't fail the operation 993 error_log('ChromaDB query failed: ' . $e->getMessage()); 994 return []; 995 } 996 } 997 998 /** 999 * Query ChromaDB for a template document 1000 * 1001 * Generates embeddings for the input text and queries ChromaDB for a template document 1002 * by filtering with metadata 'template=true'. 1003 * 1004 * @param string $text The text to find a template for 1005 * @return array List of template document IDs (maximum 1) 1006 */ 1007 public function queryChromaDBTemplate($text) 1008 { 1009 $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']); 1010 1011 // Remove chunk number (e.g., "@2") from the ID to get the base document ID 1012 if (!empty($templateIds)) { 1013 $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]); 1014 } 1015 1016 return $templateIds; 1017 } 1018 1019} 1020