1<?php 2namespace dokuwiki\plugin\dokullm; 3 4/** 5 * LLM Client for the dokullm plugin 6 * 7 * This class provides methods to interact with an LLM API for various 8 * text processing tasks such as completion, rewriting, grammar correction, 9 * summarization, conclusion creation, text analysis, and custom prompts. 10 * 11 * The client handles: 12 * - API configuration and authentication 13 * - Prompt template loading and processing 14 * - Context-aware requests with metadata 15 * - DokuWiki page content retrieval 16 */ 17 18// must be run within Dokuwiki 19if (!defined('DOKU_INC')) { 20 die(); 21} 22 23/** 24 * LLM Client class for handling API communications 25 * 26 * Manages configuration settings and provides methods for various 27 * text processing operations through an LLM API. 28 * Implements caching for tool calls to avoid duplicate processing. 29 */ 30class LlmClient 31{ 32 /** @var string The API endpoint URL */ 33 private $api_url; 34 35 /** @var array Cache for tool call results */ 36 private $toolCallCache = []; 37 38 /** @var string Current text for tool usage */ 39 private $currentText = ''; 40 41 /** @var array Track tool call counts to prevent infinite loops */ 42 private $toolCallCounts = []; 43 44 /** @var string The API authentication key */ 45 private $api_key; 46 47 /** @var string The model identifier to use */ 48 private $model; 49 50 /** @var int The request timeout in seconds */ 51 private $timeout; 52 53 /** @var float The temperature setting for response randomness */ 54 private $temperature; 55 56 /** @var float The top-p setting for nucleus sampling */ 57 private $top_p; 58 59 /** @var int The top-k setting for token selection */ 60 private $top_k; 61 62 /** @var float The min-p setting for minimum probability threshold */ 63 private $min_p; 64 65 /** @var bool Whether to enable thinking in LLM responses */ 66 private $think; 67 68 /** @var object|null ChromaDB client instance */ 69 private $chromaClient; 70 71 /** @var string|null Page ID */ 72 private $pageId; 73 74 /** 75 * Initialize the LLM client with configuration settings 76 * 77 * Retrieves configuration values from DokuWiki's configuration system 78 * for API URL, key, model, timeout, and LLM sampling parameters. 79 * 80 * Configuration values: 81 * - api_url: The LLM API endpoint URL 82 * - api_key: Authentication key for the API (optional) 83 * - model: The model identifier to use for requests 84 * - timeout: Request timeout in seconds 85 * - profile: Profile for prompt templates 86 * - temperature: Temperature setting for response randomness (0.0-1.0) 87 * - top_p: Top-p (nucleus sampling) setting (0.0-1.0) 88 * - top_k: Top-k setting (integer >= 1) 89 * - min_p: Minimum probability threshold (0.0-1.0) 90 * - think: Whether to enable thinking in LLM responses (boolean) 91 * - chromaClient: ChromaDB client instance (optional) 92 * - pageId: Page ID (optional) 93 */ 94 public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $profile = null, $chromaClient = null, $pageId = null) 95 { 96 $this->api_url = $api_url; 97 $this->api_key = $api_key; 98 $this->model = $model; 99 $this->timeout = $timeout; 100 $this->temperature = $temperature; 101 $this->top_p = $top_p; 102 $this->top_k = $top_k; 103 $this->min_p = $min_p; 104 $this->think = $think; 105 $this->profile = $profile; 106 $this->chromaClient = $chromaClient; 107 $this->pageId = $pageId; 108 } 109 110 111 112 public function process($action, $text, $metadata = [], $useContext = true) 113 { 114 // Store the current text for tool usage 115 $this->currentText = $text; 116 117 // Add text, think and action to metadata 118 $metadata['text'] = $text; 119 $metadata['think'] = $this->think ? '/think' : '/no_think'; 120 $metadata['action'] = $action; 121 122 // If we have 'template' in metadata, move it to 'page_template' 123 if (isset($metadata['template'])) { 124 $metadata['page_template'] = $metadata['template']; 125 unset($metadata['template']); 126 } 127 128 // If we have 'examples' in metadata, move it to 'page_examples' 129 if (isset($metadata['examples'])) { 130 $metadata['page_examples'] = $metadata['examples']; 131 unset($metadata['examples']); 132 } 133 134 // If we have 'previous' in metadata, move it to 'page_previous' 135 if (isset($metadata['previous'])) { 136 $metadata['page_previous'] = $metadata['previous']; 137 unset($metadata['previous']); 138 } 139 140 $prompt = $this->loadPrompt($action, $metadata); 141 142 return $this->callAPI($action, $prompt, $metadata, $useContext); 143 } 144 145 /** 146 * Process text with a custom user prompt 147 * 148 * Sends a custom prompt to the LLM along with the provided text. 149 * 150 * @param string $text The text to process 151 * @param string $customPrompt The custom prompt to use 152 * @param array $metadata Optional metadata containing template and examples 153 * @param bool $useContext Whether to include template and examples in the context (default: true) 154 * @return string The processed text 155 */ 156 public function processCustomPrompt($text, $metadata = [], $useContext = true) 157 { 158 // Store the current text for tool usage 159 $this->currentText = $text; 160 161 // Format the prompt with the text and custom prompt 162 $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text; 163 164 return $this->callAPI('custom', $prompt, $metadata, $useContext); 165 } 166 167 /** 168 * Get the list of available tools for the LLM 169 * 170 * Defines the tools that can be used by the LLM during processing. 171 * 172 * @return array List of tool definitions 173 */ 174 private function getAvailableTools() 175 { 176 return [ 177 [ 178 'type' => 'function', 179 'function' => [ 180 'name' => 'get_document', 181 'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.', 182 'parameters' => [ 183 'type' => 'object', 184 'properties' => [ 185 'id' => [ 186 'type' => 'string', 187 'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.' 188 ] 189 ], 190 'required' => ['id'] 191 ] 192 ] 193 ], 194 [ 195 'type' => 'function', 196 'function' => [ 197 'name' => 'get_template', 198 'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.', 199 'parameters' => [ 200 'type' => 'object', 201 'properties' => [ 202 'type' => [ 203 'type' => 'string', 204 'description' => 'The type of the template (e.g., "mri" for MRI reports, "daily" for daily reports).', 205 'default' => '' 206 ] 207 ] 208 ] 209 ] 210 ], 211 [ 212 'type' => 'function', 213 'function' => [ 214 'name' => 'get_examples', 215 'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.', 216 'parameters' => [ 217 'type' => 'object', 218 'properties' => [ 219 'count' => [ 220 'type' => 'integer', 221 'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.', 222 'default' => 5 223 ] 224 ] 225 ] 226 ] 227 ] 228 ]; 229 } 230 231 /** 232 * Call the LLM API with the specified prompt 233 * 234 * Makes an HTTP POST request to the configured API endpoint with 235 * the prompt and other parameters. Handles authentication if an 236 * API key is configured. 237 * 238 * The method constructs a conversation with system and user messages, 239 * including context information from metadata when available. 240 * 241 * Complex logic includes: 242 * 1. Loading and enhancing the system prompt with metadata context 243 * 2. Building the API request with model parameters 244 * 3. Handling authentication with API key if configured 245 * 4. Making the HTTP request with proper error handling 246 * 5. Parsing and validating the API response 247 * 6. Supporting tool usage with automatic tool calling when enabled 248 * 7. Implementing context enhancement with templates, examples, and snippets 249 * 250 * The context information includes: 251 * - Template content: Used as a starting point for the response 252 * - Example pages: Full content of specified example pages 253 * - Text snippets: Relevant text examples from ChromaDB 254 * 255 * When tools are enabled, the method supports automatic tool calling: 256 * - Tools can retrieve documents, templates, and examples as needed 257 * - Tool responses are cached to avoid duplicate calls with identical parameters 258 * - Infinite loop protection prevents excessive tool calls 259 * 260 * @param string $command The command name for loading command-specific system prompts 261 * @param string $prompt The prompt to send to the LLM as user message 262 * @param array $metadata Optional metadata containing template, examples, and snippets 263 * @param bool $useContext Whether to include template and examples in the context (default: true) 264 * @return string The response content from the LLM 265 * @throws Exception If the API request fails or returns unexpected format 266 */ 267 268 private function callAPI($command, $prompt, $metadata = [], $useContext = true, $useTools = false) 269 { 270 // Load system prompt which provides general instructions to the LLM 271 $systemPrompt = $this->loadSystemPrompt($command, []); 272 273 // Enhance the prompt with context information from metadata 274 // This provides the LLM with additional context about templates and examples 275 if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) { 276 $contextInfo = "\n\n<context>\n"; 277 278 // Add template content if specified in metadata 279 if (!empty($metadata['template'])) { 280 $templateContent = $this->getPageContent($metadata['template']); 281 if ($templateContent !== false) { 282 $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n"; 283 } 284 } 285 286 // Add example pages content if specified in metadata 287 if (!empty($metadata['examples'])) { 288 $examplesContent = []; 289 foreach ($metadata['examples'] as $example) { 290 $content = $this->getPageContent($example); 291 if ($content !== false) { 292 $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n"; 293 } 294 } 295 if (!empty($examplesContent)) { 296 $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n"; 297 } 298 } 299 300 // Add text snippets if specified in metadata 301 if (!empty($metadata['snippets'])) { 302 $snippetsContent = []; 303 foreach ($metadata['snippets'] as $index => $snippet) { 304 // These are text snippets from ChromaDB 305 $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n"; 306 } 307 if (!empty($snippetsContent)) { 308 $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n"; 309 } 310 } 311 312 $contextInfo .= "\n</context>\n"; 313 314 // Append context information to system prompt 315 $prompt = $contextInfo . "\n\n" . $prompt; 316 } 317 318 // Prepare API request data with model parameters 319 $data = [ 320 'model' => $this->model, 321 'messages' => [ 322 ['role' => 'system', 'content' => $systemPrompt], 323 ['role' => 'user', 'content' => $prompt] 324 ], 325 'max_tokens' => 6144, 326 'stream' => false, 327 'keep_alive' => '30m', 328 'think' => true 329 ]; 330 331 // Add tools to the request only if useTools is true 332 if ($useTools) { 333 // Define available tools 334 $data['tools'] = $this->getAvailableTools(); 335 $data['tool_choice'] = 'auto'; 336 $data['parallel_tool_calls'] = false; 337 } 338 339 // Only add parameters if they are defined and not null 340 if ($this->temperature !== null) { 341 $data['temperature'] = $this->temperature; 342 } 343 if ($this->top_p !== null) { 344 $data['top_p'] = $this->top_p; 345 } 346 if ($this->top_k !== null) { 347 $data['top_k'] = $this->top_k; 348 } 349 if ($this->min_p !== null) { 350 $data['min_p'] = $this->min_p; 351 } 352 353 // Make an API call with tool responses 354 return $this->callAPIWithTools($data, false); 355 } 356 357 /** 358 * Handle tool calls from the LLM 359 * 360 * Processes tool calls made by the LLM and returns appropriate responses. 361 * Implements caching to avoid duplicate calls with identical parameters. 362 * 363 * @param array $toolCall The tool call data from the LLM 364 * @return array The tool response message 365 */ 366 private function handleToolCall($toolCall) 367 { 368 $toolName = $toolCall['function']['name']; 369 $arguments = json_decode($toolCall['function']['arguments'], true); 370 371 // Create a cache key from the tool name and arguments 372 $cacheKey = md5($toolName . serialize($arguments)); 373 374 // Check if we have a cached result for this tool call 375 if (isset($this->toolCallCache[$cacheKey])) { 376 // Return cached result and indicate it was found in cache 377 $toolResponse = $this->toolCallCache[$cacheKey]; 378 // Update with current tool call ID 379 $toolResponse['tool_call_id'] = $toolCall['id']; 380 $toolResponse['cached'] = true; // Indicate this response was cached 381 return $toolResponse; 382 } 383 384 $toolResponse = [ 385 'role' => 'tool', 386 'tool_call_id' => $toolCall['id'], 387 'cached' => false // Indicate this is a fresh response 388 ]; 389 390 switch ($toolName) { 391 case 'get_document': 392 $documentId = $arguments['id']; 393 $content = $this->getPageContent($documentId); 394 if ($content === false) { 395 $toolResponse['content'] = 'Document not found: ' . $documentId; 396 } else { 397 $toolResponse['content'] = $content; 398 } 399 break; 400 401 case 'get_template': 402 // Get template content using the convenience function 403 $toolResponse['content'] = $this->getTemplateContent(); 404 break; 405 406 case 'get_examples': 407 // Get examples content using the convenience function 408 $count = isset($arguments['count']) ? (int)$arguments['count'] : 5; 409 $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>'; 410 break; 411 412 default: 413 $toolResponse['content'] = 'Unknown tool: ' . $toolName; 414 } 415 416 // Cache the result for future calls with the same parameters 417 $cacheEntry = $toolResponse; 418 // Remove tool_call_id and cached flag from cache as they change per call 419 unset($cacheEntry['tool_call_id']); 420 unset($cacheEntry['cached']); 421 $this->toolCallCache[$cacheKey] = $cacheEntry; 422 423 return $toolResponse; 424 } 425 426 /** 427 * Make an API call with tool responses 428 * 429 * Sends a follow-up request to the LLM with tool responses. 430 * Implements complex logic for handling tool calls with caching and loop protection. 431 * 432 * Complex logic includes: 433 * 1. Making HTTP requests with proper authentication and error handling 434 * 2. Processing tool calls from the LLM response 435 * 3. Caching tool responses to avoid duplicate calls with identical parameters 436 * 4. Tracking tool call counts to prevent infinite loops 437 * 5. Implementing loop protection with call count limits 438 * 6. Handling recursive tool calls until final content is generated 439 * 440 * Loop protection works by: 441 * - Tracking individual tool call counts (max 3 per tool) 442 * - Tracking total tool calls (max 10 total) 443 * - Disabling tools when limits are exceeded to break potential loops 444 * 445 * @param array $data The API request data including messages with tool responses 446 * @param bool $toolsCalled Whether tools have already been called (used for loop protection) 447 * @param bool $useTools Whether to process tool calls (used for loop protection) 448 * @return string The final response content 449 */ 450 private function callAPIWithTools($data, $toolsCalled = false, $useTools = true) 451 { 452 // Set up HTTP headers, including authentication if API key is configured 453 $headers = [ 454 'Content-Type: application/json' 455 ]; 456 457 if (!empty($this->api_key)) { 458 $headers[] = 'Authorization: Bearer ' . $this->api_key; 459 } 460 461 // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops 462 if ($toolsCalled) { 463 unset($data['tools']); 464 unset($data['tool_choice']); 465 } 466 467 // Initialize and configure cURL for the API request 468 $ch = curl_init(); 469 curl_setopt($ch, CURLOPT_URL, $this->api_url); 470 curl_setopt($ch, CURLOPT_POST, true); 471 curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); 472 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 473 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 474 curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); 475 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 476 477 // Execute the API request 478 $response = curl_exec($ch); 479 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 480 $error = curl_error($ch); 481 curl_close($ch); 482 483 // Handle cURL errors 484 if ($error) { 485 throw new Exception('API request failed: ' . $error); 486 } 487 488 // Handle HTTP errors 489 if ($httpCode !== 200) { 490 throw new Exception('API request failed with HTTP code: ' . $httpCode); 491 } 492 493 // Parse and validate the JSON response 494 $result = json_decode($response, true); 495 496 // Extract the content from the response if available 497 if (isset($result['choices'][0]['message']['content'])) { 498 $content = trim($result['choices'][0]['message']['content']); 499 // Reset tool call counts when we get final content 500 $this->toolCallCounts = []; 501 return $content; 502 } 503 504 // Handle tool calls if present 505 if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) { 506 $toolCalls = $result['choices'][0]['message']['tool_calls']; 507 // Start with original messages 508 $messages = $data['messages']; 509 // Add assistant's message with tool calls, keeping all original fields except for content (which is null) 510 $assistantMessage = []; 511 foreach ($result['choices'][0]['message'] as $key => $value) { 512 if ($key !== 'content') { 513 $assistantMessage[$key] = $value; 514 } 515 } 516 // Add assistant's message with tool calls 517 $messages[] = $assistantMessage; 518 519 // Process each tool call and track counts to prevent infinite loops 520 foreach ($toolCalls as $toolCall) { 521 $toolName = $toolCall['function']['name']; 522 // Increment tool call count 523 if (!isset($this->toolCallCounts[$toolName])) { 524 $this->toolCallCounts[$toolName] = 0; 525 } 526 $this->toolCallCounts[$toolName]++; 527 528 $toolResponse = $this->handleToolCall($toolCall); 529 $messages[] = $toolResponse; 530 } 531 532 // Check if any tool has been called more than 3 times 533 $toolsCalledCount = 0; 534 foreach ($this->toolCallCounts as $count) { 535 if ($count > 3) { 536 // If any tool called more than 3 times, disable tools to break loop 537 $toolsCalled = true; 538 break; 539 } 540 $toolsCalledCount += $count; 541 } 542 543 // If total tool calls exceed 10, also disable tools 544 if ($toolsCalledCount > 10) { 545 $toolsCalled = true; 546 } 547 548 // Make another API call with tool responses 549 $data['messages'] = $messages; 550 return $this->callAPIWithTools($data, $toolsCalled, $useTools); 551 } 552 553 // Throw exception for unexpected response format 554 throw new Exception('Unexpected API response format'); 555 } 556 557 /** 558 * Load a prompt template from a DokuWiki page and replace placeholders 559 * 560 * Loads prompt templates from DokuWiki pages with IDs in the format 561 * dokullm:profiles:PROFILE:PROMPT_NAME 562 * 563 * The method implements a profile fallback mechanism: 564 * 1. First tries to load the prompt from the configured profile 565 * 2. If not found, falls back to default prompts 566 * 3. Throws an exception if neither is available 567 * 568 * After loading the prompt, it scans for placeholders and automatically 569 * adds missing ones with appropriate values before replacing all placeholders. 570 * 571 * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite') 572 * @param array $variables Associative array of placeholder => value pairs 573 * @return string The processed prompt with placeholders replaced 574 * @throws Exception If the prompt page cannot be loaded from any profile 575 */ 576 private function loadPrompt($promptName, $variables = []) 577 { 578 // Default to 'default' if profile is not set 579 if (empty($this->profile)) { 580 $this->profile = 'default'; 581 } 582 583 // Construct the page ID for the prompt in the configured profile 584 $promptPageId = 'dokullm:profiles:' . $this->profile . ':' . $promptName; 585 586 // Try to get the content of the prompt page in the configured profile 587 $prompt = $this->getPageContent($promptPageId); 588 589 // If the profile-specific prompt doesn't exist, try default as fallback 590 if ($prompt === false && $this->profile !== 'default') { 591 $promptPageId = 'dokullm:profile:default:' . $promptName; 592 $prompt = $this->getPageContent($promptPageId); 593 } 594 595 // If still no prompt found, throw an exception 596 if ($prompt === false) { 597 throw new Exception('Prompt page not found: ' . $promptPageId); 598 } 599 600 // Find placeholders in the prompt 601 $placeholders = $this->findPlaceholders($prompt); 602 603 // Add missing placeholders with appropriate values 604 foreach ($placeholders as $placeholder) { 605 // Skip if already provided in variables 606 if (isset($variables[$placeholder])) { 607 continue; 608 } 609 610 // Add appropriate values for specific placeholders 611 switch ($placeholder) { 612 case 'template': 613 // If we have a page_template in variables, use it 614 $variables[$placeholder] = $this->getTemplateContent($variables['page_template']); 615 break; 616 617 case 'snippets': 618 $variables[$placeholder] = $this->chromaClient !== null ? $this->getSnippets(10) : '( no examples )'; 619 break; 620 621 case 'examples': 622 // If we have example page IDs in metadata, add examples content 623 $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']); 624 break; 625 626 case 'previous': 627 // If we have a previous report page ID in metadata, add previous content 628 $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']); 629 630 // Add current and previous dates to metadata 631 $variables['current_date'] = $this->getPageDate($this->pageId); 632 $variables['previous_date'] = !empty($variables['page_previous']) ? 633 $this->getPageDate($variables['page_previous']) : 634 ''; 635 break; 636 637 default: 638 // For other placeholders, leave them empty or set a default value 639 $variables[$placeholder] = ''; 640 break; 641 } 642 } 643 644 // Replace placeholders with actual values 645 // Placeholders are in the format {placeholder_name} 646 foreach ($variables as $placeholder => $value) { 647 $prompt = str_replace('{' . $placeholder . '}', $value, $prompt); 648 } 649 650 // Return the processed prompt 651 return $prompt; 652 } 653 654 /** 655 * Load system prompt with optional command-specific appendage 656 * 657 * Loads the main system prompt and appends any command-specific system prompt 658 * if available. 659 * 660 * @param string $action The action/command name 661 * @param array $variables Associative array of placeholder => value pairs 662 * @return string The combined system prompt 663 */ 664 private function loadSystemPrompt($action, $variables = []) 665 { 666 // Load system prompt which provides general instructions to the LLM 667 $systemPrompt = $this->loadPrompt('system', $variables); 668 669 // Check if there's a command-specific system prompt appendage 670 if (!empty($action)) { 671 try { 672 $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables); 673 if ($commandSystemPrompt !== false) { 674 $systemPrompt .= "\n" . $commandSystemPrompt; 675 } 676 } catch (Exception $e) { 677 // Ignore exceptions when loading command-specific system prompt 678 // This allows the main system prompt to still be used 679 } 680 } 681 682 return $systemPrompt; 683 } 684 685 /** 686 * Get the content of a DokuWiki page 687 * 688 * Retrieves the raw content of a DokuWiki page by its ID. 689 * Used for loading template and example page content for context. 690 * 691 * @param string $pageId The page ID to retrieve 692 * @return string|false The page content or false if not found/readable 693 */ 694 public function getPageContent($pageId) 695 { 696 // Convert page ID to file path 697 $pageFile = wikiFN($pageId); 698 699 // Check if file exists and is readable 700 if (file_exists($pageFile) && is_readable($pageFile)) { 701 return file_get_contents($pageFile); 702 } 703 704 return false; 705 } 706 707 /** 708 * Extract date from page ID or file timestamp 709 * 710 * Attempts to extract a date in YYmmdd format from the page ID. 711 * If not found, uses the file's last modification timestamp. 712 * 713 * @param string $pageId Optional page ID to extract date from (defaults to current page) 714 * @return string Formatted date string (YYYY-MM-DD) 715 */ 716 private function getPageDate($pageId = null) 717 { 718 // Use provided page ID or current page ID 719 $targetPageId = $pageId ?: $this->pageId; 720 721 // Try to extract date from page ID (looking for YYmmdd pattern) 722 if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) { 723 // Convert YYmmdd to YYYY-MM-DD 724 $year = $matches[1]; 725 $month = $matches[2]; 726 $day = $matches[3]; 727 728 // Assume 20xx for years 00-69, 19xx for years 70-99 729 $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year; 730 731 return $fullYear . '-' . $month . '-' . $day; 732 } 733 734 // Fallback to file timestamp 735 $pageFile = wikiFN($targetPageId); 736 if (file_exists($pageFile)) { 737 $timestamp = filemtime($pageFile); 738 return date('Y-m-d', $timestamp); 739 } 740 741 // Return empty string if no date can be determined 742 return ''; 743 } 744 745 /** 746 * Get current text 747 * 748 * Retrieves the current text stored from the process function. 749 * 750 * @return string The current text 751 */ 752 private function getCurrentText() 753 { 754 return $this->currentText; 755 } 756 757 /** 758 * Scan text for placeholders 759 * 760 * Finds all placeholders in the format {placeholder_name} in the provided text 761 * and returns an array of unique placeholder names. 762 * 763 * @param string $text The text to scan for placeholders 764 * @return array List of unique placeholder names found in the text 765 */ 766 public function findPlaceholders($text) 767 { 768 $placeholders = []; 769 $pattern = '/\{([^}]+)\}/'; 770 771 if (preg_match_all($pattern, $text, $matches)) { 772 // Get unique placeholder names 773 $placeholders = array_unique($matches[1]); 774 } 775 776 return $placeholders; 777 } 778 779 /** 780 * Get template content for the current text 781 * 782 * Convenience function to retrieve template content. If a pageId is provided, 783 * retrieves content directly from that page. Otherwise, queries ChromaDB for 784 * a relevant template based on the current text. 785 * 786 * @param string|null $pageId Optional page ID to retrieve template from directly 787 * @return string The template content or empty string if not found 788 */ 789 private function getTemplateContent($pageId = null) 790 { 791 // If pageId is provided, use it directly 792 if ($pageId !== null) { 793 $templateContent = $this->getPageContent($pageId); 794 if ($templateContent !== false) { 795 return $templateContent; 796 } 797 } 798 799 // If ChromaDB is disabled, return empty template 800 if ($this->chromaClient === null) { 801 return '( no template )'; 802 } 803 804 // Otherwise, get template suggestion for the current text 805 $pageId = $this->queryChromaDBTemplate($this->getCurrentText()); 806 if (!empty($pageId)) { 807 $templateContent = $this->getPageContent($pageId[0]); 808 if ($templateContent !== false) { 809 return $templateContent; 810 } 811 } 812 return '( no template )'; 813 } 814 815 /** 816 * Get snippets content for the current text 817 * 818 * Convenience function to retrieve relevant snippets for the current text. 819 * Queries ChromaDB for relevant snippets and returns them formatted. 820 * 821 * @param int $count Number of snippets to retrieve (default: 10) 822 * @return string Formatted snippets content or empty string if not found 823 */ 824 private function getSnippets($count = 10) 825 { 826 // If ChromaDB is disabled, return empty snippets 827 if ($this->chromaClient === null) { 828 return '( no examples )'; 829 } 830 831 // Get example snippets for the current text 832 $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count); 833 if (!empty($snippets)) { 834 $formattedSnippets = []; 835 foreach ($snippets as $index => $snippet) { 836 $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>'; 837 } 838 return implode("\n", $formattedSnippets); 839 } 840 return '( no examples )'; 841 } 842 843 /** 844 * Get examples content from example page IDs 845 * 846 * Convenience function to retrieve content from example pages. 847 * Returns the content of each page packed in XML elements. 848 * 849 * @param array $exampleIds List of example page IDs 850 * @return string Formatted examples content or empty string if not found 851 */ 852 private function getExamplesContent($exampleIds = []) 853 { 854 if (empty($exampleIds) || !is_array($exampleIds)) { 855 return '( no examples )'; 856 } 857 858 $examplesContent = []; 859 foreach ($exampleIds as $index => $exampleId) { 860 $content = $this->getPageContent($exampleId); 861 if ($content !== false) { 862 $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>'; 863 } 864 } 865 866 return implode("\n", $examplesContent); 867 } 868 869 /** 870 * Get previous report content from previous page ID 871 * 872 * Convenience function to retrieve content from a previous report page. 873 * Returns the content of the previous page or a default message if not found. 874 * 875 * @param string $previousId Previous page ID 876 * @return string Previous report content or default message if not found 877 */ 878 private function getPreviousContent($previousId = '') 879 { 880 if (empty($previousId)) { 881 return '( no previous report )'; 882 } 883 884 $content = $this->getPageContent($previousId); 885 if ($content !== false) { 886 return $content; 887 } 888 889 return '( previous report not found )'; 890 } 891 892 /** 893 * Get ChromaDB client with configuration 894 * 895 * Returns the ChromaDB client and collection name. 896 * If a client was passed in the constructor, use it. Otherwise, this method 897 * should not be called as it depends on getConf() which is not available. 898 * 899 * @return array Array containing the ChromaDB client and collection name 900 * @throws Exception If no ChromaDB client is available 901 */ 902 private function getChromaDBClient() 903 { 904 // If we have a ChromaDB client passed in constructor, use it 905 if ($this->chromaClient !== null) { 906 // Get the collection name based on the page ID 907 // FIXME 908 $chromaCollection = 'reports'; 909 $pageId = $pageId; 910 911 if (!empty($this->pageId)) { 912 // Split the page ID by ':' and take the first part as collection name 913 $parts = explode(':', $this->pageId); 914 if (isset($parts[0]) && !empty($parts[0])) { 915 // If the first part is 'playground', use the default collection 916 // Otherwise, use the first part as the collection name 917 if ($parts[0] === 'playground') { 918 $chromaCollection = ''; 919 } else { 920 $chromaCollection = $parts[0]; 921 } 922 } 923 } 924 925 return [$this->chromaClient, $chromaCollection]; 926 } 927 928 // If we don't have a ChromaDB client, we can't create one here 929 // because getConf() is not available in this context 930 throw new Exception('No ChromaDB client available'); 931 } 932 933 /** 934 * Query ChromaDB for relevant documents 935 * 936 * Generates embeddings for the input text and queries ChromaDB for similar documents. 937 * Extracts modality from the current page ID to use as the collection name. 938 * 939 * @param string $text The text to find similar documents for 940 * @param int $limit Maximum number of documents to retrieve (default: 5) 941 * @param array|null $where Optional filter conditions for metadata 942 * @return array List of document IDs 943 */ 944 private function queryChromaDB($text, $limit = 5, $where = null) 945 { 946 try { 947 // Get ChromaDB client and collection name 948 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 949 // Query for similar documents 950 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 951 952 // Extract document IDs from results 953 $documentIds = []; 954 if (isset($results['ids'][0]) && is_array($results['ids'][0])) { 955 foreach ($results['ids'][0] as $id) { 956 // Use the ChromaDB ID directly without conversion 957 $documentIds[] = $id; 958 } 959 } 960 961 return $documentIds; 962 } catch (Exception $e) { 963 // Log error but don't fail the operation 964 error_log('ChromaDB query failed: ' . $e->getMessage()); 965 return []; 966 } 967 } 968 969 /** 970 * Query ChromaDB for relevant documents and return text snippets 971 * 972 * Generates embeddings for the input text and queries ChromaDB for similar documents. 973 * Returns the actual text snippets instead of document IDs. 974 * 975 * @param string $text The text to find similar documents for 976 * @param int $limit Maximum number of documents to retrieve (default: 10) 977 * @param array|null $where Optional filter conditions for metadata 978 * @return array List of text snippets 979 */ 980 private function queryChromaDBSnippets($text, $limit = 10, $where = null) 981 { 982 try { 983 // Get ChromaDB client and collection name 984 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 985 // Query for similar documents 986 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 987 988 // Extract document texts from results 989 $snippets = []; 990 if (isset($results['documents'][0]) && is_array($results['documents'][0])) { 991 foreach ($results['documents'][0] as $document) { 992 $snippets[] = $document; 993 } 994 } 995 996 return $snippets; 997 } catch (Exception $e) { 998 // Log error but don't fail the operation 999 error_log('ChromaDB query failed: ' . $e->getMessage()); 1000 return []; 1001 } 1002 } 1003 1004 /** 1005 * Query ChromaDB for a template document 1006 * 1007 * Generates embeddings for the input text and queries ChromaDB for a template document 1008 * by filtering with metadata 'template=true'. 1009 * 1010 * @param string $text The text to find a template for 1011 * @return array List of template document IDs (maximum 1) 1012 */ 1013 public function queryChromaDBTemplate($text) 1014 { 1015 $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']); 1016 1017 // Remove chunk number (e.g., "@2") from the ID to get the base document ID 1018 if (!empty($templateIds)) { 1019 $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]); 1020 } 1021 1022 return $templateIds; 1023 } 1024 1025} 1026