1<?php 2namespace dokuwiki\plugin\dokullm; 3 4use Exception; 5 6/** 7 * LLM Client for the dokullm plugin 8 * 9 * This class provides methods to interact with an LLM API for various 10 * text processing tasks such as completion, rewriting, grammar correction, 11 * summarization, conclusion creation, text analysis, and custom prompts. 12 * 13 * The client handles: 14 * - API configuration and authentication 15 * - Prompt template loading and processing 16 * - Context-aware requests with metadata 17 * - DokuWiki page content retrieval 18 */ 19 20// must be run within Dokuwiki 21if (!defined('DOKU_INC')) { 22 die(); 23} 24 25/** 26 * LLM Client class for handling API communications 27 * 28 * Manages configuration settings and provides methods for various 29 * text processing operations through an LLM API. 30 * Implements caching for tool calls to avoid duplicate processing. 31 */ 32class LlmClient 33{ 34 /** @var string The API endpoint URL */ 35 private $api_url; 36 37 /** @var array Cache for tool call results */ 38 private $toolCallCache = []; 39 40 /** @var string Current text for tool usage */ 41 private $currentText = ''; 42 43 /** @var array Track tool call counts to prevent infinite loops */ 44 private $toolCallCounts = []; 45 46 /** @var string The API authentication key */ 47 private $api_key; 48 49 /** @var string The model identifier to use */ 50 private $model; 51 52 /** @var int The request timeout in seconds */ 53 private $timeout; 54 55 /** @var float The temperature setting for response randomness */ 56 private $temperature; 57 58 /** @var float The top-p setting for nucleus sampling */ 59 private $top_p; 60 61 /** @var int The top-k setting for token selection */ 62 private $top_k; 63 64 /** @var float The min-p setting for minimum probability threshold */ 65 private $min_p; 66 67 /** @var bool Whether to enable thinking in LLM responses */ 68 private $think; 69 70 /** @var object|null ChromaDB client instance */ 71 private $chromaClient; 72 73 /** @var string|null Page ID */ 74 private $pageId; 75 76 /** 77 * Initialize the LLM client with configuration settings 78 * 79 * Retrieves configuration values from DokuWiki's configuration system 80 * for API URL, key, model, timeout, and LLM sampling parameters. 81 * 82 * Configuration values: 83 * - api_url: The LLM API endpoint URL 84 * - api_key: Authentication key for the API (optional) 85 * - model: The model identifier to use for requests 86 * - timeout: Request timeout in seconds 87 * - profile: Profile for prompt templates 88 * - temperature: Temperature setting for response randomness (0.0-1.0) 89 * - top_p: Top-p (nucleus sampling) setting (0.0-1.0) 90 * - top_k: Top-k setting (integer >= 1) 91 * - min_p: Minimum probability threshold (0.0-1.0) 92 * - think: Whether to enable thinking in LLM responses (boolean) 93 * - chromaClient: ChromaDB client instance (optional) 94 * - pageId: Page ID (optional) 95 */ 96 public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $profile = null, $chromaClient = null, $pageId = null) 97 { 98 $this->api_url = $api_url; 99 $this->api_key = $api_key; 100 $this->model = $model; 101 $this->timeout = $timeout; 102 $this->temperature = $temperature; 103 $this->top_p = $top_p; 104 $this->top_k = $top_k; 105 $this->min_p = $min_p; 106 $this->think = $think; 107 $this->profile = $profile; 108 $this->chromaClient = $chromaClient; 109 $this->pageId = $pageId; 110 } 111 112 113 114 public function process($action, $text, $metadata = [], $useContext = true) 115 { 116 // Store the current text for tool usage 117 $this->currentText = $text; 118 119 // Add text, think and action to metadata 120 $metadata['text'] = $text; 121 $metadata['think'] = $this->think ? '/think' : '/no_think'; 122 $metadata['action'] = $action; 123 124 // If we have 'template' in metadata, move it to 'page_template' 125 if (isset($metadata['template'])) { 126 $metadata['page_template'] = $metadata['template']; 127 unset($metadata['template']); 128 } 129 130 // If we have 'examples' in metadata, move it to 'page_examples' 131 if (isset($metadata['examples'])) { 132 $metadata['page_examples'] = $metadata['examples']; 133 unset($metadata['examples']); 134 } 135 136 // If we have 'previous' in metadata, move it to 'page_previous' 137 if (isset($metadata['previous'])) { 138 $metadata['page_previous'] = $metadata['previous']; 139 unset($metadata['previous']); 140 } 141 142 $prompt = $this->loadPrompt($action, $metadata); 143 144 return $this->callAPI($action, $prompt, $metadata, $useContext); 145 } 146 147 /** 148 * Process text with a custom user prompt 149 * 150 * Sends a custom prompt to the LLM along with the provided text. 151 * 152 * @param string $text The text to process 153 * @param string $customPrompt The custom prompt to use 154 * @param array $metadata Optional metadata containing template and examples 155 * @param bool $useContext Whether to include template and examples in the context (default: true) 156 * @return string The processed text 157 */ 158 public function processCustomPrompt($text, $metadata = [], $useContext = true) 159 { 160 // Store the current text for tool usage 161 $this->currentText = $text; 162 163 // Format the prompt with the text and custom prompt 164 $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text; 165 166 return $this->callAPI('custom', $prompt, $metadata, $useContext); 167 } 168 169 /** 170 * Get the list of available tools for the LLM 171 * 172 * Defines the tools that can be used by the LLM during processing. 173 * 174 * @return array List of tool definitions 175 */ 176 private function getAvailableTools() 177 { 178 return [ 179 [ 180 'type' => 'function', 181 'function' => [ 182 'name' => 'get_document', 183 'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.', 184 'parameters' => [ 185 'type' => 'object', 186 'properties' => [ 187 'id' => [ 188 'type' => 'string', 189 'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.' 190 ] 191 ], 192 'required' => ['id'] 193 ] 194 ] 195 ], 196 [ 197 'type' => 'function', 198 'function' => [ 199 'name' => 'get_template', 200 'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.', 201 'parameters' => [ 202 'type' => 'object', 203 'properties' => [ 204 'type' => [ 205 'type' => 'string', 206 'description' => 'The type of the template (e.g., "mri" for MRI reports, "daily" for daily reports).', 207 'default' => '' 208 ] 209 ] 210 ] 211 ] 212 ], 213 [ 214 'type' => 'function', 215 'function' => [ 216 'name' => 'get_examples', 217 'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.', 218 'parameters' => [ 219 'type' => 'object', 220 'properties' => [ 221 'count' => [ 222 'type' => 'integer', 223 'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.', 224 'default' => 5 225 ] 226 ] 227 ] 228 ] 229 ] 230 ]; 231 } 232 233 /** 234 * Call the LLM API with the specified prompt 235 * 236 * Makes an HTTP POST request to the configured API endpoint with 237 * the prompt and other parameters. Handles authentication if an 238 * API key is configured. 239 * 240 * The method constructs a conversation with system and user messages, 241 * including context information from metadata when available. 242 * 243 * Complex logic includes: 244 * 1. Loading and enhancing the system prompt with metadata context 245 * 2. Building the API request with model parameters 246 * 3. Handling authentication with API key if configured 247 * 4. Making the HTTP request with proper error handling 248 * 5. Parsing and validating the API response 249 * 6. Supporting tool usage with automatic tool calling when enabled 250 * 7. Implementing context enhancement with templates, examples, and snippets 251 * 252 * The context information includes: 253 * - Template content: Used as a starting point for the response 254 * - Example pages: Full content of specified example pages 255 * - Text snippets: Relevant text examples from ChromaDB 256 * 257 * When tools are enabled, the method supports automatic tool calling: 258 * - Tools can retrieve documents, templates, and examples as needed 259 * - Tool responses are cached to avoid duplicate calls with identical parameters 260 * - Infinite loop protection prevents excessive tool calls 261 * 262 * @param string $command The command name for loading command-specific system prompts 263 * @param string $prompt The prompt to send to the LLM as user message 264 * @param array $metadata Optional metadata containing template, examples, and snippets 265 * @param bool $useContext Whether to include template and examples in the context (default: true) 266 * @return string The response content from the LLM 267 * @throws Exception If the API request fails or returns unexpected format 268 */ 269 270 private function callAPI($command, $prompt, $metadata = [], $useContext = true, $useTools = false) 271 { 272 // Load system prompt which provides general instructions to the LLM 273 $systemPrompt = $this->loadSystemPrompt($command, []); 274 275 // Enhance the prompt with context information from metadata 276 // This provides the LLM with additional context about templates and examples 277 if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) { 278 $contextInfo = "\n\n<context>\n"; 279 280 // Add template content if specified in metadata 281 if (!empty($metadata['template'])) { 282 $templateContent = $this->getPageContent($metadata['template']); 283 if ($templateContent !== false) { 284 $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n"; 285 } 286 } 287 288 // Add example pages content if specified in metadata 289 if (!empty($metadata['examples'])) { 290 $examplesContent = []; 291 foreach ($metadata['examples'] as $example) { 292 $content = $this->getPageContent($example); 293 if ($content !== false) { 294 $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n"; 295 } 296 } 297 if (!empty($examplesContent)) { 298 $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n"; 299 } 300 } 301 302 // Add text snippets if specified in metadata 303 if (!empty($metadata['snippets'])) { 304 $snippetsContent = []; 305 foreach ($metadata['snippets'] as $index => $snippet) { 306 // These are text snippets from ChromaDB 307 $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n"; 308 } 309 if (!empty($snippetsContent)) { 310 $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n"; 311 } 312 } 313 314 $contextInfo .= "\n</context>\n"; 315 316 // Append context information to system prompt 317 $prompt = $contextInfo . "\n\n" . $prompt; 318 } 319 320 // Prepare API request data with model parameters 321 $data = [ 322 'model' => $this->model, 323 'messages' => [ 324 ['role' => 'system', 'content' => $systemPrompt], 325 ['role' => 'user', 'content' => $prompt] 326 ], 327 'max_tokens' => 6144, 328 'stream' => false, 329 'keep_alive' => '30m', 330 'think' => true 331 ]; 332 333 // Add tools to the request only if useTools is true 334 if ($useTools) { 335 // Define available tools 336 $data['tools'] = $this->getAvailableTools(); 337 $data['tool_choice'] = 'auto'; 338 $data['parallel_tool_calls'] = false; 339 } 340 341 // Only add parameters if they are defined and not null 342 if ($this->temperature !== null) { 343 $data['temperature'] = $this->temperature; 344 } 345 if ($this->top_p !== null) { 346 $data['top_p'] = $this->top_p; 347 } 348 if ($this->top_k !== null) { 349 $data['top_k'] = $this->top_k; 350 } 351 if ($this->min_p !== null) { 352 $data['min_p'] = $this->min_p; 353 } 354 355 // Make an API call with tool responses 356 return $this->callAPIWithTools($data, false); 357 } 358 359 /** 360 * Handle tool calls from the LLM 361 * 362 * Processes tool calls made by the LLM and returns appropriate responses. 363 * Implements caching to avoid duplicate calls with identical parameters. 364 * 365 * @param array $toolCall The tool call data from the LLM 366 * @return array The tool response message 367 */ 368 private function handleToolCall($toolCall) 369 { 370 $toolName = $toolCall['function']['name']; 371 $arguments = json_decode($toolCall['function']['arguments'], true); 372 373 // Create a cache key from the tool name and arguments 374 $cacheKey = md5($toolName . serialize($arguments)); 375 376 // Check if we have a cached result for this tool call 377 if (isset($this->toolCallCache[$cacheKey])) { 378 // Return cached result and indicate it was found in cache 379 $toolResponse = $this->toolCallCache[$cacheKey]; 380 // Update with current tool call ID 381 $toolResponse['tool_call_id'] = $toolCall['id']; 382 $toolResponse['cached'] = true; // Indicate this response was cached 383 return $toolResponse; 384 } 385 386 $toolResponse = [ 387 'role' => 'tool', 388 'tool_call_id' => $toolCall['id'], 389 'cached' => false // Indicate this is a fresh response 390 ]; 391 392 switch ($toolName) { 393 case 'get_document': 394 $documentId = $arguments['id']; 395 $content = $this->getPageContent($documentId); 396 if ($content === false) { 397 $toolResponse['content'] = 'Document not found: ' . $documentId; 398 } else { 399 $toolResponse['content'] = $content; 400 } 401 break; 402 403 case 'get_template': 404 // Get template content using the convenience function 405 $toolResponse['content'] = $this->getTemplateContent(); 406 break; 407 408 case 'get_examples': 409 // Get examples content using the convenience function 410 $count = isset($arguments['count']) ? (int)$arguments['count'] : 5; 411 $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>'; 412 break; 413 414 default: 415 $toolResponse['content'] = 'Unknown tool: ' . $toolName; 416 } 417 418 // Cache the result for future calls with the same parameters 419 $cacheEntry = $toolResponse; 420 // Remove tool_call_id and cached flag from cache as they change per call 421 unset($cacheEntry['tool_call_id']); 422 unset($cacheEntry['cached']); 423 $this->toolCallCache[$cacheKey] = $cacheEntry; 424 425 return $toolResponse; 426 } 427 428 /** 429 * Make an API call with tool responses 430 * 431 * Sends a follow-up request to the LLM with tool responses. 432 * Implements complex logic for handling tool calls with caching and loop protection. 433 * 434 * Complex logic includes: 435 * 1. Making HTTP requests with proper authentication and error handling 436 * 2. Processing tool calls from the LLM response 437 * 3. Caching tool responses to avoid duplicate calls with identical parameters 438 * 4. Tracking tool call counts to prevent infinite loops 439 * 5. Implementing loop protection with call count limits 440 * 6. Handling recursive tool calls until final content is generated 441 * 442 * Loop protection works by: 443 * - Tracking individual tool call counts (max 3 per tool) 444 * - Tracking total tool calls (max 10 total) 445 * - Disabling tools when limits are exceeded to break potential loops 446 * 447 * @param array $data The API request data including messages with tool responses 448 * @param bool $toolsCalled Whether tools have already been called (used for loop protection) 449 * @param bool $useTools Whether to process tool calls (used for loop protection) 450 * @return string The final response content 451 */ 452 private function callAPIWithTools($data, $toolsCalled = false, $useTools = true) 453 { 454 // Set up HTTP headers, including authentication if API key is configured 455 $headers = [ 456 'Content-Type: application/json' 457 ]; 458 459 if (!empty($this->api_key)) { 460 $headers[] = 'Authorization: Bearer ' . $this->api_key; 461 } 462 463 // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops 464 if ($toolsCalled) { 465 unset($data['tools']); 466 unset($data['tool_choice']); 467 } 468 469 // Initialize and configure cURL for the API request 470 $ch = curl_init(); 471 curl_setopt($ch, CURLOPT_URL, $this->api_url); 472 curl_setopt($ch, CURLOPT_POST, true); 473 curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); 474 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 475 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 476 curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); 477 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 478 479 // Execute the API request 480 $response = curl_exec($ch); 481 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 482 $error = curl_error($ch); 483 curl_close($ch); 484 485 // Handle cURL errors 486 if ($error) { 487 throw new Exception('API request failed: ' . $error); 488 } 489 490 // Handle HTTP errors 491 if ($httpCode !== 200) { 492 throw new Exception('API request failed with HTTP code: ' . $httpCode); 493 } 494 495 // Parse and validate the JSON response 496 $result = json_decode($response, true); 497 498 // Extract the content from the response if available 499 if (isset($result['choices'][0]['message']['content'])) { 500 $content = trim($result['choices'][0]['message']['content']); 501 // Reset tool call counts when we get final content 502 $this->toolCallCounts = []; 503 return $content; 504 } 505 506 // Handle tool calls if present 507 if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) { 508 $toolCalls = $result['choices'][0]['message']['tool_calls']; 509 // Start with original messages 510 $messages = $data['messages']; 511 // Add assistant's message with tool calls, keeping all original fields except for content (which is null) 512 $assistantMessage = []; 513 foreach ($result['choices'][0]['message'] as $key => $value) { 514 if ($key !== 'content') { 515 $assistantMessage[$key] = $value; 516 } 517 } 518 // Add assistant's message with tool calls 519 $messages[] = $assistantMessage; 520 521 // Process each tool call and track counts to prevent infinite loops 522 foreach ($toolCalls as $toolCall) { 523 $toolName = $toolCall['function']['name']; 524 // Increment tool call count 525 if (!isset($this->toolCallCounts[$toolName])) { 526 $this->toolCallCounts[$toolName] = 0; 527 } 528 $this->toolCallCounts[$toolName]++; 529 530 $toolResponse = $this->handleToolCall($toolCall); 531 $messages[] = $toolResponse; 532 } 533 534 // Check if any tool has been called more than 3 times 535 $toolsCalledCount = 0; 536 foreach ($this->toolCallCounts as $count) { 537 if ($count > 3) { 538 // If any tool called more than 3 times, disable tools to break loop 539 $toolsCalled = true; 540 break; 541 } 542 $toolsCalledCount += $count; 543 } 544 545 // If total tool calls exceed 10, also disable tools 546 if ($toolsCalledCount > 10) { 547 $toolsCalled = true; 548 } 549 550 // Make another API call with tool responses 551 $data['messages'] = $messages; 552 return $this->callAPIWithTools($data, $toolsCalled, $useTools); 553 } 554 555 // Throw exception for unexpected response format 556 throw new Exception('Unexpected API response format'); 557 } 558 559 /** 560 * Load a prompt template from a DokuWiki page and replace placeholders 561 * 562 * Loads prompt templates from DokuWiki pages with IDs in the format 563 * dokullm:profiles:PROFILE:PROMPT_NAME 564 * 565 * The method implements a profile fallback mechanism: 566 * 1. First tries to load the prompt from the configured profile 567 * 2. If not found, falls back to default prompts 568 * 3. Throws an exception if neither is available 569 * 570 * After loading the prompt, it scans for placeholders and automatically 571 * adds missing ones with appropriate values before replacing all placeholders. 572 * 573 * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite') 574 * @param array $variables Associative array of placeholder => value pairs 575 * @return string The processed prompt with placeholders replaced 576 * @throws Exception If the prompt page cannot be loaded from any profile 577 */ 578 private function loadPrompt($promptName, $variables = []) 579 { 580 // Default to 'default' if profile is not set 581 if (empty($this->profile)) { 582 $this->profile = 'default'; 583 } 584 585 // Construct the page ID for the prompt in the configured profile 586 $promptPageId = 'dokullm:profiles:' . $this->profile . ':' . $promptName; 587 588 // Try to get the content of the prompt page in the configured profile 589 $prompt = $this->getPageContent($promptPageId); 590 591 // If the profile-specific prompt doesn't exist, try default as fallback 592 if ($prompt === false && $this->profile !== 'default') { 593 $promptPageId = 'dokullm:profile:default:' . $promptName; 594 $prompt = $this->getPageContent($promptPageId); 595 } 596 597 // If still no prompt found, throw an exception 598 if ($prompt === false) { 599 throw new Exception('Prompt page not found: ' . $promptPageId); 600 } 601 602 // Find placeholders in the prompt 603 $placeholders = $this->findPlaceholders($prompt); 604 605 // Add missing placeholders with appropriate values 606 foreach ($placeholders as $placeholder) { 607 // Skip if already provided in variables 608 if (isset($variables[$placeholder])) { 609 continue; 610 } 611 612 // Add appropriate values for specific placeholders 613 switch ($placeholder) { 614 case 'template': 615 // If we have a page_template in variables, use it 616 $variables[$placeholder] = $this->getTemplateContent($variables['page_template']); 617 break; 618 619 case 'snippets': 620 $variables[$placeholder] = $this->chromaClient !== null ? $this->getSnippets(10) : '( no examples )'; 621 break; 622 623 case 'examples': 624 // If we have example page IDs in metadata, add examples content 625 $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']); 626 break; 627 628 case 'previous': 629 // If we have a previous report page ID in metadata, add previous content 630 $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']); 631 632 // Add current and previous dates to metadata 633 $variables['current_date'] = $this->getPageDate($this->pageId); 634 $variables['previous_date'] = !empty($variables['page_previous']) ? 635 $this->getPageDate($variables['page_previous']) : 636 ''; 637 break; 638 639 default: 640 // For other placeholders, leave them empty or set a default value 641 $variables[$placeholder] = ''; 642 break; 643 } 644 } 645 646 // Replace placeholders with actual values 647 // Placeholders are in the format {placeholder_name} 648 foreach ($variables as $placeholder => $value) { 649 $prompt = str_replace('{' . $placeholder . '}', $value, $prompt); 650 } 651 652 // Return the processed prompt 653 return $prompt; 654 } 655 656 /** 657 * Load system prompt with optional command-specific appendage 658 * 659 * Loads the main system prompt and appends any command-specific system prompt 660 * if available. 661 * 662 * @param string $action The action/command name 663 * @param array $variables Associative array of placeholder => value pairs 664 * @return string The combined system prompt 665 */ 666 private function loadSystemPrompt($action, $variables = []) 667 { 668 // Load system prompt which provides general instructions to the LLM 669 $systemPrompt = $this->loadPrompt('system', $variables); 670 671 // Check if there's a command-specific system prompt appendage 672 if (!empty($action)) { 673 try { 674 $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables); 675 if ($commandSystemPrompt !== false) { 676 $systemPrompt .= "\n" . $commandSystemPrompt; 677 } 678 } catch (Exception $e) { 679 // Ignore exceptions when loading command-specific system prompt 680 // This allows the main system prompt to still be used 681 } 682 } 683 684 return $systemPrompt; 685 } 686 687 /** 688 * Get the content of a DokuWiki page 689 * 690 * Retrieves the raw content of a DokuWiki page by its ID. 691 * Used for loading template and example page content for context. 692 * 693 * @param string $pageId The page ID to retrieve 694 * @return string|false The page content or false if not found/readable 695 */ 696 public function getPageContent($pageId) 697 { 698 // Convert page ID to file path 699 $pageFile = wikiFN($pageId); 700 701 // Check if file exists and is readable 702 if (file_exists($pageFile) && is_readable($pageFile)) { 703 return file_get_contents($pageFile); 704 } 705 706 return false; 707 } 708 709 /** 710 * Extract date from page ID or file timestamp 711 * 712 * Attempts to extract a date in YYmmdd format from the page ID. 713 * If not found, uses the file's last modification timestamp. 714 * 715 * @param string $pageId Optional page ID to extract date from (defaults to current page) 716 * @return string Formatted date string (YYYY-MM-DD) 717 */ 718 private function getPageDate($pageId = null) 719 { 720 // Use provided page ID or current page ID 721 $targetPageId = $pageId ?: $this->pageId; 722 723 // Try to extract date from page ID (looking for YYmmdd pattern) 724 if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) { 725 // Convert YYmmdd to YYYY-MM-DD 726 $year = $matches[1]; 727 $month = $matches[2]; 728 $day = $matches[3]; 729 730 // Assume 20xx for years 00-69, 19xx for years 70-99 731 $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year; 732 733 return $fullYear . '-' . $month . '-' . $day; 734 } 735 736 // Fallback to file timestamp 737 $pageFile = wikiFN($targetPageId); 738 if (file_exists($pageFile)) { 739 $timestamp = filemtime($pageFile); 740 return date('Y-m-d', $timestamp); 741 } 742 743 // Return empty string if no date can be determined 744 return ''; 745 } 746 747 /** 748 * Get current text 749 * 750 * Retrieves the current text stored from the process function. 751 * 752 * @return string The current text 753 */ 754 private function getCurrentText() 755 { 756 return $this->currentText; 757 } 758 759 /** 760 * Scan text for placeholders 761 * 762 * Finds all placeholders in the format {placeholder_name} in the provided text 763 * and returns an array of unique placeholder names. 764 * 765 * @param string $text The text to scan for placeholders 766 * @return array List of unique placeholder names found in the text 767 */ 768 public function findPlaceholders($text) 769 { 770 $placeholders = []; 771 $pattern = '/\{([^}]+)\}/'; 772 773 if (preg_match_all($pattern, $text, $matches)) { 774 // Get unique placeholder names 775 $placeholders = array_unique($matches[1]); 776 } 777 778 return $placeholders; 779 } 780 781 /** 782 * Get template content for the current text 783 * 784 * Convenience function to retrieve template content. If a pageId is provided, 785 * retrieves content directly from that page. Otherwise, queries ChromaDB for 786 * a relevant template based on the current text. 787 * 788 * @param string|null $pageId Optional page ID to retrieve template from directly 789 * @return string The template content or empty string if not found 790 */ 791 private function getTemplateContent($pageId = null) 792 { 793 // If pageId is provided, use it directly 794 if ($pageId !== null) { 795 $templateContent = $this->getPageContent($pageId); 796 if ($templateContent !== false) { 797 return $templateContent; 798 } 799 } 800 801 // If ChromaDB is disabled, return empty template 802 if ($this->chromaClient === null) { 803 return '( no template )'; 804 } 805 806 // Otherwise, get template suggestion for the current text 807 $pageId = $this->queryChromaDBTemplate($this->getCurrentText()); 808 if (!empty($pageId)) { 809 $templateContent = $this->getPageContent($pageId[0]); 810 if ($templateContent !== false) { 811 return $templateContent; 812 } 813 } 814 return '( no template )'; 815 } 816 817 /** 818 * Get snippets content for the current text 819 * 820 * Convenience function to retrieve relevant snippets for the current text. 821 * Queries ChromaDB for relevant snippets and returns them formatted. 822 * 823 * @param int $count Number of snippets to retrieve (default: 10) 824 * @return string Formatted snippets content or empty string if not found 825 */ 826 private function getSnippets($count = 10) 827 { 828 // If ChromaDB is disabled, return empty snippets 829 if ($this->chromaClient === null) { 830 return '( no examples )'; 831 } 832 833 // Get example snippets for the current text 834 $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count); 835 if (!empty($snippets)) { 836 $formattedSnippets = []; 837 foreach ($snippets as $index => $snippet) { 838 $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>'; 839 } 840 return implode("\n", $formattedSnippets); 841 } 842 return '( no examples )'; 843 } 844 845 /** 846 * Get examples content from example page IDs 847 * 848 * Convenience function to retrieve content from example pages. 849 * Returns the content of each page packed in XML elements. 850 * 851 * @param array $exampleIds List of example page IDs 852 * @return string Formatted examples content or empty string if not found 853 */ 854 private function getExamplesContent($exampleIds = []) 855 { 856 if (empty($exampleIds) || !is_array($exampleIds)) { 857 return '( no examples )'; 858 } 859 860 $examplesContent = []; 861 foreach ($exampleIds as $index => $exampleId) { 862 $content = $this->getPageContent($exampleId); 863 if ($content !== false) { 864 $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>'; 865 } 866 } 867 868 return implode("\n", $examplesContent); 869 } 870 871 /** 872 * Get previous report content from previous page ID 873 * 874 * Convenience function to retrieve content from a previous report page. 875 * Returns the content of the previous page or a default message if not found. 876 * 877 * @param string $previousId Previous page ID 878 * @return string Previous report content or default message if not found 879 */ 880 private function getPreviousContent($previousId = '') 881 { 882 if (empty($previousId)) { 883 return '( no previous report )'; 884 } 885 886 $content = $this->getPageContent($previousId); 887 if ($content !== false) { 888 return $content; 889 } 890 891 return '( previous report not found )'; 892 } 893 894 /** 895 * Get ChromaDB client with configuration 896 * 897 * Returns the ChromaDB client and collection name. 898 * If a client was passed in the constructor, use it. Otherwise, this method 899 * should not be called as it depends on getConf() which is not available. 900 * 901 * @return array Array containing the ChromaDB client and collection name 902 * @throws Exception If no ChromaDB client is available 903 */ 904 private function getChromaDBClient() 905 { 906 // If we have a ChromaDB client passed in constructor, use it 907 if ($this->chromaClient !== null) { 908 // Get the collection name based on the page ID 909 // FIXME 910 $chromaCollection = 'reports'; 911 $pageId = $pageId; 912 913 if (!empty($this->pageId)) { 914 // Split the page ID by ':' and take the first part as collection name 915 $parts = explode(':', $this->pageId); 916 if (isset($parts[0]) && !empty($parts[0])) { 917 // If the first part is 'playground', use the default collection 918 // Otherwise, use the first part as the collection name 919 if ($parts[0] === 'playground') { 920 $chromaCollection = ''; 921 } else { 922 $chromaCollection = $parts[0]; 923 } 924 } 925 } 926 927 return [$this->chromaClient, $chromaCollection]; 928 } 929 930 // If we don't have a ChromaDB client, we can't create one here 931 // because getConf() is not available in this context 932 throw new Exception('No ChromaDB client available'); 933 } 934 935 /** 936 * Query ChromaDB for relevant documents 937 * 938 * Generates embeddings for the input text and queries ChromaDB for similar documents. 939 * Extracts modality from the current page ID to use as the collection name. 940 * 941 * @param string $text The text to find similar documents for 942 * @param int $limit Maximum number of documents to retrieve (default: 5) 943 * @param array|null $where Optional filter conditions for metadata 944 * @return array List of document IDs 945 */ 946 private function queryChromaDB($text, $limit = 5, $where = null) 947 { 948 try { 949 // Get ChromaDB client and collection name 950 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 951 // Query for similar documents 952 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 953 954 // Extract document IDs from results 955 $documentIds = []; 956 if (isset($results['ids'][0]) && is_array($results['ids'][0])) { 957 foreach ($results['ids'][0] as $id) { 958 // Use the ChromaDB ID directly without conversion 959 $documentIds[] = $id; 960 } 961 } 962 963 return $documentIds; 964 } catch (Exception $e) { 965 // Log error but don't fail the operation 966 error_log('ChromaDB query failed: ' . $e->getMessage()); 967 return []; 968 } 969 } 970 971 /** 972 * Query ChromaDB for relevant documents and return text snippets 973 * 974 * Generates embeddings for the input text and queries ChromaDB for similar documents. 975 * Returns the actual text snippets instead of document IDs. 976 * 977 * @param string $text The text to find similar documents for 978 * @param int $limit Maximum number of documents to retrieve (default: 10) 979 * @param array|null $where Optional filter conditions for metadata 980 * @return array List of text snippets 981 */ 982 private function queryChromaDBSnippets($text, $limit = 10, $where = null) 983 { 984 try { 985 // Get ChromaDB client and collection name 986 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 987 // Query for similar documents 988 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 989 990 // Extract document texts from results 991 $snippets = []; 992 if (isset($results['documents'][0]) && is_array($results['documents'][0])) { 993 foreach ($results['documents'][0] as $document) { 994 $snippets[] = $document; 995 } 996 } 997 998 return $snippets; 999 } catch (Exception $e) { 1000 // Log error but don't fail the operation 1001 error_log('ChromaDB query failed: ' . $e->getMessage()); 1002 return []; 1003 } 1004 } 1005 1006 /** 1007 * Query ChromaDB for a template document 1008 * 1009 * Generates embeddings for the input text and queries ChromaDB for a template document 1010 * by filtering with metadata 'template=true'. 1011 * 1012 * @param string $text The text to find a template for 1013 * @return array List of template document IDs (maximum 1) 1014 */ 1015 public function queryChromaDBTemplate($text) 1016 { 1017 $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']); 1018 1019 // Remove chunk number (e.g., "@2") from the ID to get the base document ID 1020 if (!empty($templateIds)) { 1021 $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]); 1022 } 1023 1024 return $templateIds; 1025 } 1026 1027} 1028