1<?php 2namespace dokuwiki\plugin\dokullm; 3 4/** 5 * LLM Client for the dokullm plugin 6 * 7 * This class provides methods to interact with an LLM API for various 8 * text processing tasks such as completion, rewriting, grammar correction, 9 * summarization, conclusion creation, text analysis, and custom prompts. 10 * 11 * The client handles: 12 * - API configuration and authentication 13 * - Prompt template loading and processing 14 * - Context-aware requests with metadata 15 * - DokuWiki page content retrieval 16 */ 17 18// must be run within Dokuwiki 19if (!defined('DOKU_INC')) { 20 die(); 21} 22 23/** 24 * LLM Client class for handling API communications 25 * 26 * Manages configuration settings and provides methods for various 27 * text processing operations through an LLM API. 28 * Implements caching for tool calls to avoid duplicate processing. 29 */ 30class LlmClient 31{ 32 /** @var string The API endpoint URL */ 33 private $api_url; 34 35 /** @var array Cache for tool call results */ 36 private $toolCallCache = []; 37 38 /** @var string Current text for tool usage */ 39 private $currentText = ''; 40 41 /** @var array Track tool call counts to prevent infinite loops */ 42 private $toolCallCounts = []; 43 44 /** @var string The API authentication key */ 45 private $api_key; 46 47 /** @var string The model identifier to use */ 48 private $model; 49 50 /** @var int The request timeout in seconds */ 51 private $timeout; 52 53 /** @var float The temperature setting for response randomness */ 54 private $temperature; 55 56 /** @var float The top-p setting for nucleus sampling */ 57 private $top_p; 58 59 /** @var int The top-k setting for token selection */ 60 private $top_k; 61 62 /** @var float The min-p setting for minimum probability threshold */ 63 private $min_p; 64 65 /** @var bool Whether to enable thinking in LLM responses */ 66 private $think; 67 68 /** @var object|null ChromaDB client instance */ 69 private $chromaClient; 70 71 /** @var string|null Page ID */ 72 private $pageId; 73 74 /** 75 * Initialize the LLM client with configuration settings 76 * 77 * Retrieves configuration values from DokuWiki's configuration system 78 * for API URL, key, model, timeout, and LLM sampling parameters. 79 * 80 * Configuration values: 81 * - api_url: The LLM API endpoint URL 82 * - api_key: Authentication key for the API (optional) 83 * - model: The model identifier to use for requests 84 * - timeout: Request timeout in seconds 85 * - profile: Profile for prompt templates 86 * - temperature: Temperature setting for response randomness (0.0-1.0) 87 * - top_p: Top-p (nucleus sampling) setting (0.0-1.0) 88 * - top_k: Top-k setting (integer >= 1) 89 * - min_p: Minimum probability threshold (0.0-1.0) 90 * - think: Whether to enable thinking in LLM responses (boolean) 91 * - chromaClient: ChromaDB client instance (optional) 92 * - pageId: Page ID (optional) 93 */ 94 public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $profile = null, $chromaClient = null, $pageId = null, $enableChromaDB = null) 95 { 96 $this->api_url = $api_url; 97 $this->api_key = $api_key; 98 $this->model = $model; 99 $this->timeout = $timeout; 100 $this->temperature = $temperature; 101 $this->top_p = $top_p; 102 $this->top_k = $top_k; 103 $this->min_p = $min_p; 104 $this->think = $think; 105 $this->profile = $profile; 106 $this->chromaClient = $chromaClient; 107 $this->pageId = $pageId; 108 $this->enableChromaDB = $enableChromaDB ?? false; 109 } 110 111 112 113 public function process($action, $text, $metadata = [], $useContext = true) 114 { 115 // Store the current text for tool usage 116 $this->currentText = $text; 117 118 // Add text, think and action to metadata 119 $metadata['text'] = $text; 120 $metadata['think'] = $this->think ? '/think' : '/no_think'; 121 $metadata['action'] = $action; 122 123 // If we have 'template' in metadata, move it to 'page_template' 124 if (isset($metadata['template'])) { 125 $metadata['page_template'] = $metadata['template']; 126 unset($metadata['template']); 127 } 128 129 // If we have 'examples' in metadata, move it to 'page_examples' 130 if (isset($metadata['examples'])) { 131 $metadata['page_examples'] = $metadata['examples']; 132 unset($metadata['examples']); 133 } 134 135 // If we have 'previous' in metadata, move it to 'page_previous' 136 if (isset($metadata['previous'])) { 137 $metadata['page_previous'] = $metadata['previous']; 138 unset($metadata['previous']); 139 } 140 141 $prompt = $this->loadPrompt($action, $metadata); 142 143 return $this->callAPI($action, $prompt, $metadata, $useContext); 144 } 145 146 /** 147 * Process text with a custom user prompt 148 * 149 * Sends a custom prompt to the LLM along with the provided text. 150 * 151 * @param string $text The text to process 152 * @param string $customPrompt The custom prompt to use 153 * @param array $metadata Optional metadata containing template and examples 154 * @param bool $useContext Whether to include template and examples in the context (default: true) 155 * @return string The processed text 156 */ 157 public function processCustomPrompt($text, $metadata = [], $useContext = true) 158 { 159 // Store the current text for tool usage 160 $this->currentText = $text; 161 162 // Format the prompt with the text and custom prompt 163 $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text; 164 165 return $this->callAPI('custom', $prompt, $metadata, $useContext); 166 } 167 168 /** 169 * Get the list of available tools for the LLM 170 * 171 * Defines the tools that can be used by the LLM during processing. 172 * 173 * @return array List of tool definitions 174 */ 175 private function getAvailableTools() 176 { 177 return [ 178 [ 179 'type' => 'function', 180 'function' => [ 181 'name' => 'get_document', 182 'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.', 183 'parameters' => [ 184 'type' => 'object', 185 'properties' => [ 186 'id' => [ 187 'type' => 'string', 188 'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.' 189 ] 190 ], 191 'required' => ['id'] 192 ] 193 ] 194 ], 195 [ 196 'type' => 'function', 197 'function' => [ 198 'name' => 'get_template', 199 'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.', 200 'parameters' => [ 201 'type' => 'object', 202 'properties' => [ 203 'type' => [ 204 'type' => 'string', 205 'description' => 'The type of the template (e.g., "mri" for MRI reports, "daily" for daily reports).', 206 'default' => '' 207 ] 208 ] 209 ] 210 ] 211 ], 212 [ 213 'type' => 'function', 214 'function' => [ 215 'name' => 'get_examples', 216 'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.', 217 'parameters' => [ 218 'type' => 'object', 219 'properties' => [ 220 'count' => [ 221 'type' => 'integer', 222 'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.', 223 'default' => 5 224 ] 225 ] 226 ] 227 ] 228 ] 229 ]; 230 } 231 232 /** 233 * Call the LLM API with the specified prompt 234 * 235 * Makes an HTTP POST request to the configured API endpoint with 236 * the prompt and other parameters. Handles authentication if an 237 * API key is configured. 238 * 239 * The method constructs a conversation with system and user messages, 240 * including context information from metadata when available. 241 * 242 * Complex logic includes: 243 * 1. Loading and enhancing the system prompt with metadata context 244 * 2. Building the API request with model parameters 245 * 3. Handling authentication with API key if configured 246 * 4. Making the HTTP request with proper error handling 247 * 5. Parsing and validating the API response 248 * 6. Supporting tool usage with automatic tool calling when enabled 249 * 7. Implementing context enhancement with templates, examples, and snippets 250 * 251 * The context information includes: 252 * - Template content: Used as a starting point for the response 253 * - Example pages: Full content of specified example pages 254 * - Text snippets: Relevant text examples from ChromaDB 255 * 256 * When tools are enabled, the method supports automatic tool calling: 257 * - Tools can retrieve documents, templates, and examples as needed 258 * - Tool responses are cached to avoid duplicate calls with identical parameters 259 * - Infinite loop protection prevents excessive tool calls 260 * 261 * @param string $command The command name for loading command-specific system prompts 262 * @param string $prompt The prompt to send to the LLM as user message 263 * @param array $metadata Optional metadata containing template, examples, and snippets 264 * @param bool $useContext Whether to include template and examples in the context (default: true) 265 * @return string The response content from the LLM 266 * @throws Exception If the API request fails or returns unexpected format 267 */ 268 269 private function callAPI($command, $prompt, $metadata = [], $useContext = true, $useTools = false) 270 { 271 // Load system prompt which provides general instructions to the LLM 272 $systemPrompt = $this->loadSystemPrompt($command, []); 273 274 // Enhance the prompt with context information from metadata 275 // This provides the LLM with additional context about templates and examples 276 if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) { 277 $contextInfo = "\n\n<context>\n"; 278 279 // Add template content if specified in metadata 280 if (!empty($metadata['template'])) { 281 $templateContent = $this->getPageContent($metadata['template']); 282 if ($templateContent !== false) { 283 $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n"; 284 } 285 } 286 287 // Add example pages content if specified in metadata 288 if (!empty($metadata['examples'])) { 289 $examplesContent = []; 290 foreach ($metadata['examples'] as $example) { 291 $content = $this->getPageContent($example); 292 if ($content !== false) { 293 $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n"; 294 } 295 } 296 if (!empty($examplesContent)) { 297 $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n"; 298 } 299 } 300 301 // Add text snippets if specified in metadata 302 if (!empty($metadata['snippets'])) { 303 $snippetsContent = []; 304 foreach ($metadata['snippets'] as $index => $snippet) { 305 // These are text snippets from ChromaDB 306 $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n"; 307 } 308 if (!empty($snippetsContent)) { 309 $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n"; 310 } 311 } 312 313 $contextInfo .= "\n</context>\n"; 314 315 // Append context information to system prompt 316 $prompt = $contextInfo . "\n\n" . $prompt; 317 } 318 319 // Prepare API request data with model parameters 320 $data = [ 321 'model' => $this->model, 322 'messages' => [ 323 ['role' => 'system', 'content' => $systemPrompt], 324 ['role' => 'user', 'content' => $prompt] 325 ], 326 'max_tokens' => 6144, 327 'stream' => false, 328 'keep_alive' => '30m', 329 'think' => true 330 ]; 331 332 // Add tools to the request only if useTools is true 333 if ($useTools) { 334 // Define available tools 335 $data['tools'] = $this->getAvailableTools(); 336 $data['tool_choice'] = 'auto'; 337 $data['parallel_tool_calls'] = false; 338 } 339 340 // Only add parameters if they are defined and not null 341 if ($this->temperature !== null) { 342 $data['temperature'] = $this->temperature; 343 } 344 if ($this->top_p !== null) { 345 $data['top_p'] = $this->top_p; 346 } 347 if ($this->top_k !== null) { 348 $data['top_k'] = $this->top_k; 349 } 350 if ($this->min_p !== null) { 351 $data['min_p'] = $this->min_p; 352 } 353 354 // Make an API call with tool responses 355 return $this->callAPIWithTools($data, false); 356 } 357 358 /** 359 * Handle tool calls from the LLM 360 * 361 * Processes tool calls made by the LLM and returns appropriate responses. 362 * Implements caching to avoid duplicate calls with identical parameters. 363 * 364 * @param array $toolCall The tool call data from the LLM 365 * @return array The tool response message 366 */ 367 private function handleToolCall($toolCall) 368 { 369 $toolName = $toolCall['function']['name']; 370 $arguments = json_decode($toolCall['function']['arguments'], true); 371 372 // Create a cache key from the tool name and arguments 373 $cacheKey = md5($toolName . serialize($arguments)); 374 375 // Check if we have a cached result for this tool call 376 if (isset($this->toolCallCache[$cacheKey])) { 377 // Return cached result and indicate it was found in cache 378 $toolResponse = $this->toolCallCache[$cacheKey]; 379 // Update with current tool call ID 380 $toolResponse['tool_call_id'] = $toolCall['id']; 381 $toolResponse['cached'] = true; // Indicate this response was cached 382 return $toolResponse; 383 } 384 385 $toolResponse = [ 386 'role' => 'tool', 387 'tool_call_id' => $toolCall['id'], 388 'cached' => false // Indicate this is a fresh response 389 ]; 390 391 switch ($toolName) { 392 case 'get_document': 393 $documentId = $arguments['id']; 394 $content = $this->getPageContent($documentId); 395 if ($content === false) { 396 $toolResponse['content'] = 'Document not found: ' . $documentId; 397 } else { 398 $toolResponse['content'] = $content; 399 } 400 break; 401 402 case 'get_template': 403 // Get template content using the convenience function 404 $toolResponse['content'] = $this->getTemplateContent(); 405 break; 406 407 case 'get_examples': 408 // Get examples content using the convenience function 409 $count = isset($arguments['count']) ? (int)$arguments['count'] : 5; 410 $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>'; 411 break; 412 413 default: 414 $toolResponse['content'] = 'Unknown tool: ' . $toolName; 415 } 416 417 // Cache the result for future calls with the same parameters 418 $cacheEntry = $toolResponse; 419 // Remove tool_call_id and cached flag from cache as they change per call 420 unset($cacheEntry['tool_call_id']); 421 unset($cacheEntry['cached']); 422 $this->toolCallCache[$cacheKey] = $cacheEntry; 423 424 return $toolResponse; 425 } 426 427 /** 428 * Make an API call with tool responses 429 * 430 * Sends a follow-up request to the LLM with tool responses. 431 * Implements complex logic for handling tool calls with caching and loop protection. 432 * 433 * Complex logic includes: 434 * 1. Making HTTP requests with proper authentication and error handling 435 * 2. Processing tool calls from the LLM response 436 * 3. Caching tool responses to avoid duplicate calls with identical parameters 437 * 4. Tracking tool call counts to prevent infinite loops 438 * 5. Implementing loop protection with call count limits 439 * 6. Handling recursive tool calls until final content is generated 440 * 441 * Loop protection works by: 442 * - Tracking individual tool call counts (max 3 per tool) 443 * - Tracking total tool calls (max 10 total) 444 * - Disabling tools when limits are exceeded to break potential loops 445 * 446 * @param array $data The API request data including messages with tool responses 447 * @param bool $toolsCalled Whether tools have already been called (used for loop protection) 448 * @param bool $useTools Whether to process tool calls (used for loop protection) 449 * @return string The final response content 450 */ 451 private function callAPIWithTools($data, $toolsCalled = false, $useTools = true) 452 { 453 // Set up HTTP headers, including authentication if API key is configured 454 $headers = [ 455 'Content-Type: application/json' 456 ]; 457 458 if (!empty($this->api_key)) { 459 $headers[] = 'Authorization: Bearer ' . $this->api_key; 460 } 461 462 // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops 463 if ($toolsCalled) { 464 unset($data['tools']); 465 unset($data['tool_choice']); 466 } 467 468 // Initialize and configure cURL for the API request 469 $ch = curl_init(); 470 curl_setopt($ch, CURLOPT_URL, $this->api_url); 471 curl_setopt($ch, CURLOPT_POST, true); 472 curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); 473 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 474 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 475 curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); 476 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 477 478 // Execute the API request 479 $response = curl_exec($ch); 480 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 481 $error = curl_error($ch); 482 curl_close($ch); 483 484 // Handle cURL errors 485 if ($error) { 486 throw new Exception('API request failed: ' . $error); 487 } 488 489 // Handle HTTP errors 490 if ($httpCode !== 200) { 491 throw new Exception('API request failed with HTTP code: ' . $httpCode); 492 } 493 494 // Parse and validate the JSON response 495 $result = json_decode($response, true); 496 497 // Extract the content from the response if available 498 if (isset($result['choices'][0]['message']['content'])) { 499 $content = trim($result['choices'][0]['message']['content']); 500 // Reset tool call counts when we get final content 501 $this->toolCallCounts = []; 502 return $content; 503 } 504 505 // Handle tool calls if present 506 if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) { 507 $toolCalls = $result['choices'][0]['message']['tool_calls']; 508 // Start with original messages 509 $messages = $data['messages']; 510 // Add assistant's message with tool calls, keeping all original fields except for content (which is null) 511 $assistantMessage = []; 512 foreach ($result['choices'][0]['message'] as $key => $value) { 513 if ($key !== 'content') { 514 $assistantMessage[$key] = $value; 515 } 516 } 517 // Add assistant's message with tool calls 518 $messages[] = $assistantMessage; 519 520 // Process each tool call and track counts to prevent infinite loops 521 foreach ($toolCalls as $toolCall) { 522 $toolName = $toolCall['function']['name']; 523 // Increment tool call count 524 if (!isset($this->toolCallCounts[$toolName])) { 525 $this->toolCallCounts[$toolName] = 0; 526 } 527 $this->toolCallCounts[$toolName]++; 528 529 $toolResponse = $this->handleToolCall($toolCall); 530 $messages[] = $toolResponse; 531 } 532 533 // Check if any tool has been called more than 3 times 534 $toolsCalledCount = 0; 535 foreach ($this->toolCallCounts as $count) { 536 if ($count > 3) { 537 // If any tool called more than 3 times, disable tools to break loop 538 $toolsCalled = true; 539 break; 540 } 541 $toolsCalledCount += $count; 542 } 543 544 // If total tool calls exceed 10, also disable tools 545 if ($toolsCalledCount > 10) { 546 $toolsCalled = true; 547 } 548 549 // Make another API call with tool responses 550 $data['messages'] = $messages; 551 return $this->callAPIWithTools($data, $toolsCalled, $useTools); 552 } 553 554 // Throw exception for unexpected response format 555 throw new Exception('Unexpected API response format'); 556 } 557 558 /** 559 * Load a prompt template from a DokuWiki page and replace placeholders 560 * 561 * Loads prompt templates from DokuWiki pages with IDs in the format 562 * dokullm:profiles:PROFILE:PROMPT_NAME 563 * 564 * The method implements a profile fallback mechanism: 565 * 1. First tries to load the prompt from the configured profile 566 * 2. If not found, falls back to default prompts 567 * 3. Throws an exception if neither is available 568 * 569 * After loading the prompt, it scans for placeholders and automatically 570 * adds missing ones with appropriate values before replacing all placeholders. 571 * 572 * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite') 573 * @param array $variables Associative array of placeholder => value pairs 574 * @return string The processed prompt with placeholders replaced 575 * @throws Exception If the prompt page cannot be loaded from any profile 576 */ 577 private function loadPrompt($promptName, $variables = []) 578 { 579 // Default to 'default' if profile is not set 580 if (empty($this->profile)) { 581 $this->profile = 'default'; 582 } 583 584 // Construct the page ID for the prompt in the configured profile 585 $promptPageId = 'dokullm:profiles:' . $this->profile . ':' . $promptName; 586 587 // Try to get the content of the prompt page in the configured profile 588 $prompt = $this->getPageContent($promptPageId); 589 590 // If the profile-specific prompt doesn't exist, try default as fallback 591 if ($prompt === false && $this->profile !== 'default') { 592 $promptPageId = 'dokullm:profile:default:' . $promptName; 593 $prompt = $this->getPageContent($promptPageId); 594 } 595 596 // If still no prompt found, throw an exception 597 if ($prompt === false) { 598 throw new Exception('Prompt page not found: ' . $promptPageId); 599 } 600 601 // Find placeholders in the prompt 602 $placeholders = $this->findPlaceholders($prompt); 603 604 // Add missing placeholders with appropriate values 605 foreach ($placeholders as $placeholder) { 606 // Skip if already provided in variables 607 if (isset($variables[$placeholder])) { 608 continue; 609 } 610 611 // Add appropriate values for specific placeholders 612 switch ($placeholder) { 613 case 'template': 614 // If we have a page_template in variables, use it 615 $variables[$placeholder] = $this->getTemplateContent($variables['page_template']); 616 break; 617 618 case 'snippets': 619 $variables[$placeholder] = $this->enableChromaDB ? $this->getSnippets(10) : '( no examples )'; 620 break; 621 622 case 'examples': 623 // If we have example page IDs in metadata, add examples content 624 $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']); 625 break; 626 627 case 'previous': 628 // If we have a previous report page ID in metadata, add previous content 629 $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']); 630 631 // Add current and previous dates to metadata 632 $variables['current_date'] = $this->getPageDate($this->pageId); 633 $variables['previous_date'] = !empty($variables['page_previous']) ? 634 $this->getPageDate($variables['page_previous']) : 635 ''; 636 break; 637 638 default: 639 // For other placeholders, leave them empty or set a default value 640 $variables[$placeholder] = ''; 641 break; 642 } 643 } 644 645 // Replace placeholders with actual values 646 // Placeholders are in the format {placeholder_name} 647 foreach ($variables as $placeholder => $value) { 648 $prompt = str_replace('{' . $placeholder . '}', $value, $prompt); 649 } 650 651 // Return the processed prompt 652 return $prompt; 653 } 654 655 /** 656 * Load system prompt with optional command-specific appendage 657 * 658 * Loads the main system prompt and appends any command-specific system prompt 659 * if available. 660 * 661 * @param string $action The action/command name 662 * @param array $variables Associative array of placeholder => value pairs 663 * @return string The combined system prompt 664 */ 665 private function loadSystemPrompt($action, $variables = []) 666 { 667 // Load system prompt which provides general instructions to the LLM 668 $systemPrompt = $this->loadPrompt('system', $variables); 669 670 // Check if there's a command-specific system prompt appendage 671 if (!empty($action)) { 672 try { 673 $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables); 674 if ($commandSystemPrompt !== false) { 675 $systemPrompt .= "\n" . $commandSystemPrompt; 676 } 677 } catch (Exception $e) { 678 // Ignore exceptions when loading command-specific system prompt 679 // This allows the main system prompt to still be used 680 } 681 } 682 683 return $systemPrompt; 684 } 685 686 /** 687 * Get the content of a DokuWiki page 688 * 689 * Retrieves the raw content of a DokuWiki page by its ID. 690 * Used for loading template and example page content for context. 691 * 692 * @param string $pageId The page ID to retrieve 693 * @return string|false The page content or false if not found/readable 694 */ 695 public function getPageContent($pageId) 696 { 697 // Convert page ID to file path 698 $pageFile = wikiFN($pageId); 699 700 // Check if file exists and is readable 701 if (file_exists($pageFile) && is_readable($pageFile)) { 702 return file_get_contents($pageFile); 703 } 704 705 return false; 706 } 707 708 /** 709 * Extract date from page ID or file timestamp 710 * 711 * Attempts to extract a date in YYmmdd format from the page ID. 712 * If not found, uses the file's last modification timestamp. 713 * 714 * @param string $pageId Optional page ID to extract date from (defaults to current page) 715 * @return string Formatted date string (YYYY-MM-DD) 716 */ 717 private function getPageDate($pageId = null) 718 { 719 // Use provided page ID or current page ID 720 $targetPageId = $pageId ?: $this->pageId; 721 722 // Try to extract date from page ID (looking for YYmmdd pattern) 723 if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) { 724 // Convert YYmmdd to YYYY-MM-DD 725 $year = $matches[1]; 726 $month = $matches[2]; 727 $day = $matches[3]; 728 729 // Assume 20xx for years 00-69, 19xx for years 70-99 730 $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year; 731 732 return $fullYear . '-' . $month . '-' . $day; 733 } 734 735 // Fallback to file timestamp 736 $pageFile = wikiFN($targetPageId); 737 if (file_exists($pageFile)) { 738 $timestamp = filemtime($pageFile); 739 return date('Y-m-d', $timestamp); 740 } 741 742 // Return empty string if no date can be determined 743 return ''; 744 } 745 746 /** 747 * Get current text 748 * 749 * Retrieves the current text stored from the process function. 750 * 751 * @return string The current text 752 */ 753 private function getCurrentText() 754 { 755 return $this->currentText; 756 } 757 758 /** 759 * Scan text for placeholders 760 * 761 * Finds all placeholders in the format {placeholder_name} in the provided text 762 * and returns an array of unique placeholder names. 763 * 764 * @param string $text The text to scan for placeholders 765 * @return array List of unique placeholder names found in the text 766 */ 767 public function findPlaceholders($text) 768 { 769 $placeholders = []; 770 $pattern = '/\{([^}]+)\}/'; 771 772 if (preg_match_all($pattern, $text, $matches)) { 773 // Get unique placeholder names 774 $placeholders = array_unique($matches[1]); 775 } 776 777 return $placeholders; 778 } 779 780 /** 781 * Get template content for the current text 782 * 783 * Convenience function to retrieve template content. If a pageId is provided, 784 * retrieves content directly from that page. Otherwise, queries ChromaDB for 785 * a relevant template based on the current text. 786 * 787 * @param string|null $pageId Optional page ID to retrieve template from directly 788 * @return string The template content or empty string if not found 789 */ 790 private function getTemplateContent($pageId = null) 791 { 792 // If pageId is provided, use it directly 793 if ($pageId !== null) { 794 $templateContent = $this->getPageContent($pageId); 795 if ($templateContent !== false) { 796 return $templateContent; 797 } 798 } 799 800 // If ChromaDB is disabled, return empty template 801 if (!$this->enableChromaDB) { 802 return '( no template )'; 803 } 804 805 // Otherwise, get template suggestion for the current text 806 $pageId = $this->queryChromaDBTemplate($this->getCurrentText()); 807 if (!empty($pageId)) { 808 $templateContent = $this->getPageContent($pageId[0]); 809 if ($templateContent !== false) { 810 return $templateContent; 811 } 812 } 813 return '( no template )'; 814 } 815 816 /** 817 * Get snippets content for the current text 818 * 819 * Convenience function to retrieve relevant snippets for the current text. 820 * Queries ChromaDB for relevant snippets and returns them formatted. 821 * 822 * @param int $count Number of snippets to retrieve (default: 10) 823 * @return string Formatted snippets content or empty string if not found 824 */ 825 private function getSnippets($count = 10) 826 { 827 // If ChromaDB is disabled, return empty snippets 828 if (!$this->enableChromaDB) { 829 return '( no examples )'; 830 } 831 832 // Get example snippets for the current text 833 $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count); 834 if (!empty($snippets)) { 835 $formattedSnippets = []; 836 foreach ($snippets as $index => $snippet) { 837 $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>'; 838 } 839 return implode("\n", $formattedSnippets); 840 } 841 return '( no examples )'; 842 } 843 844 /** 845 * Get examples content from example page IDs 846 * 847 * Convenience function to retrieve content from example pages. 848 * Returns the content of each page packed in XML elements. 849 * 850 * @param array $exampleIds List of example page IDs 851 * @return string Formatted examples content or empty string if not found 852 */ 853 private function getExamplesContent($exampleIds = []) 854 { 855 if (empty($exampleIds) || !is_array($exampleIds)) { 856 return '( no examples )'; 857 } 858 859 $examplesContent = []; 860 foreach ($exampleIds as $index => $exampleId) { 861 $content = $this->getPageContent($exampleId); 862 if ($content !== false) { 863 $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>'; 864 } 865 } 866 867 return implode("\n", $examplesContent); 868 } 869 870 /** 871 * Get previous report content from previous page ID 872 * 873 * Convenience function to retrieve content from a previous report page. 874 * Returns the content of the previous page or a default message if not found. 875 * 876 * @param string $previousId Previous page ID 877 * @return string Previous report content or default message if not found 878 */ 879 private function getPreviousContent($previousId = '') 880 { 881 if (empty($previousId)) { 882 return '( no previous report )'; 883 } 884 885 $content = $this->getPageContent($previousId); 886 if ($content !== false) { 887 return $content; 888 } 889 890 return '( previous report not found )'; 891 } 892 893 /** 894 * Get ChromaDB client with configuration 895 * 896 * Returns the ChromaDB client and collection name. 897 * If a client was passed in the constructor, use it. Otherwise, this method 898 * should not be called as it depends on getConf() which is not available. 899 * 900 * @return array Array containing the ChromaDB client and collection name 901 * @throws Exception If no ChromaDB client is available 902 */ 903 private function getChromaDBClient() 904 { 905 // If we have a ChromaDB client passed in constructor, use it 906 if ($this->chromaClient !== null) { 907 // Get the collection name based on the page ID 908 // FIXME 909 $chromaCollection = 'reports'; 910 $pageId = $pageId; 911 912 if (!empty($this->pageId)) { 913 // Split the page ID by ':' and take the first part as collection name 914 $parts = explode(':', $this->pageId); 915 if (isset($parts[0]) && !empty($parts[0])) { 916 // If the first part is 'playground', use the default collection 917 // Otherwise, use the first part as the collection name 918 if ($parts[0] === 'playground') { 919 $chromaCollection = ''; 920 } else { 921 $chromaCollection = $parts[0]; 922 } 923 } 924 } 925 926 return [$this->chromaClient, $chromaCollection]; 927 } 928 929 // If we don't have a ChromaDB client, we can't create one here 930 // because getConf() is not available in this context 931 throw new Exception('No ChromaDB client available'); 932 } 933 934 /** 935 * Query ChromaDB for relevant documents 936 * 937 * Generates embeddings for the input text and queries ChromaDB for similar documents. 938 * Extracts modality from the current page ID to use as the collection name. 939 * 940 * @param string $text The text to find similar documents for 941 * @param int $limit Maximum number of documents to retrieve (default: 5) 942 * @param array|null $where Optional filter conditions for metadata 943 * @return array List of document IDs 944 */ 945 private function queryChromaDB($text, $limit = 5, $where = null) 946 { 947 try { 948 // Get ChromaDB client and collection name 949 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 950 // Query for similar documents 951 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 952 953 // Extract document IDs from results 954 $documentIds = []; 955 if (isset($results['ids'][0]) && is_array($results['ids'][0])) { 956 foreach ($results['ids'][0] as $id) { 957 // Use the ChromaDB ID directly without conversion 958 $documentIds[] = $id; 959 } 960 } 961 962 return $documentIds; 963 } catch (Exception $e) { 964 // Log error but don't fail the operation 965 error_log('ChromaDB query failed: ' . $e->getMessage()); 966 return []; 967 } 968 } 969 970 /** 971 * Query ChromaDB for relevant documents and return text snippets 972 * 973 * Generates embeddings for the input text and queries ChromaDB for similar documents. 974 * Returns the actual text snippets instead of document IDs. 975 * 976 * @param string $text The text to find similar documents for 977 * @param int $limit Maximum number of documents to retrieve (default: 10) 978 * @param array|null $where Optional filter conditions for metadata 979 * @return array List of text snippets 980 */ 981 private function queryChromaDBSnippets($text, $limit = 10, $where = null) 982 { 983 try { 984 // Get ChromaDB client and collection name 985 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 986 // Query for similar documents 987 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 988 989 // Extract document texts from results 990 $snippets = []; 991 if (isset($results['documents'][0]) && is_array($results['documents'][0])) { 992 foreach ($results['documents'][0] as $document) { 993 $snippets[] = $document; 994 } 995 } 996 997 return $snippets; 998 } catch (Exception $e) { 999 // Log error but don't fail the operation 1000 error_log('ChromaDB query failed: ' . $e->getMessage()); 1001 return []; 1002 } 1003 } 1004 1005 /** 1006 * Query ChromaDB for a template document 1007 * 1008 * Generates embeddings for the input text and queries ChromaDB for a template document 1009 * by filtering with metadata 'template=true'. 1010 * 1011 * @param string $text The text to find a template for 1012 * @return array List of template document IDs (maximum 1) 1013 */ 1014 public function queryChromaDBTemplate($text) 1015 { 1016 $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']); 1017 1018 // Remove chunk number (e.g., "@2") from the ID to get the base document ID 1019 if (!empty($templateIds)) { 1020 $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]); 1021 } 1022 1023 return $templateIds; 1024 } 1025 1026} 1027