1<?php 2namespace dokuwiki\plugin\dokullm; 3 4use Exception; 5 6/** 7 * LLM Client for the dokullm plugin 8 * 9 * This class provides methods to interact with an LLM API for various 10 * text processing tasks such as completion, rewriting, grammar correction, 11 * summarization, conclusion creation, text analysis, and custom prompts. 12 * 13 * The client handles: 14 * - API configuration and authentication 15 * - Prompt template loading and processing 16 * - Context-aware requests with metadata 17 * - DokuWiki page content retrieval 18 */ 19 20// must be run within Dokuwiki 21if (!defined('DOKU_INC')) { 22 die(); 23} 24 25/** 26 * LLM Client class for handling API communications 27 * 28 * Manages configuration settings and provides methods for various 29 * text processing operations through an LLM API. 30 * Implements caching for tool calls to avoid duplicate processing. 31 */ 32class LlmClient 33{ 34 /** @var string The API endpoint URL */ 35 private $api_url; 36 37 /** @var array Cache for tool call results */ 38 private $toolCallCache = []; 39 40 /** @var string Current text for tool usage */ 41 private $currentText = ''; 42 43 /** @var array Track tool call counts to prevent infinite loops */ 44 private $toolCallCounts = []; 45 46 /** @var string The API authentication key */ 47 private $api_key; 48 49 /** @var string The model identifier to use */ 50 private $model; 51 52 /** @var int The request timeout in seconds */ 53 private $timeout; 54 55 /** @var float The temperature setting for response randomness */ 56 private $temperature; 57 58 /** @var float The top-p setting for nucleus sampling */ 59 private $top_p; 60 61 /** @var int The top-k setting for token selection */ 62 private $top_k; 63 64 /** @var float The min-p setting for minimum probability threshold */ 65 private $min_p; 66 67 /** @var bool Whether to enable thinking in LLM responses */ 68 private $think; 69 70 /** @var object|null ChromaDB client instance */ 71 private $chromaClient; 72 73 /** @var string|null Page ID */ 74 private $pageId; 75 76 /** 77 * Initialize the LLM client with configuration settings 78 * 79 * Retrieves configuration values from DokuWiki's configuration system 80 * for API URL, key, model, timeout, and LLM sampling parameters. 81 * 82 * Configuration values: 83 * - api_url: The LLM API endpoint URL 84 * - api_key: Authentication key for the API (optional) 85 * - model: The model identifier to use for requests 86 * - timeout: Request timeout in seconds 87 * - profile: Profile for prompt templates 88 * - temperature: Temperature setting for response randomness (0.0-1.0) 89 * - top_p: Top-p (nucleus sampling) setting (0.0-1.0) 90 * - top_k: Top-k setting (integer >= 1) 91 * - min_p: Minimum probability threshold (0.0-1.0) 92 * - think: Whether to enable thinking in LLM responses (boolean) 93 * - chromaClient: ChromaDB client instance (optional) 94 * - pageId: Page ID (optional) 95 */ 96 public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $profile = null, $chromaClient = null, $pageId = null) 97 { 98 $this->api_url = $api_url; 99 $this->api_key = $api_key; 100 $this->model = $model; 101 $this->timeout = $timeout; 102 $this->temperature = $temperature; 103 $this->top_p = $top_p; 104 $this->top_k = $top_k; 105 $this->min_p = $min_p; 106 $this->think = $think; 107 $this->profile = $profile; 108 $this->chromaClient = $chromaClient; 109 $this->pageId = $pageId; 110 } 111 112 113 114 public function process($action, $text, $metadata = [], $useContext = true) 115 { 116 // Store the current text for tool usage 117 $this->currentText = $text; 118 119 // Add text, think and action to metadata 120 $metadata['text'] = $text; 121 $metadata['think'] = $this->think ? '/think' : '/no_think'; 122 $metadata['action'] = $action; 123 124 // If we have 'template' in metadata, move it to 'page_template' 125 if (isset($metadata['template'])) { 126 $metadata['page_template'] = $metadata['template']; 127 unset($metadata['template']); 128 } 129 130 // If we have 'examples' in metadata, move it to 'page_examples' 131 if (isset($metadata['examples'])) { 132 $metadata['page_examples'] = $metadata['examples']; 133 unset($metadata['examples']); 134 } 135 136 // If we have 'previous' in metadata, move it to 'page_previous' 137 if (isset($metadata['previous'])) { 138 $metadata['page_previous'] = $metadata['previous']; 139 unset($metadata['previous']); 140 } 141 142 $prompt = $this->loadPrompt($action, $metadata); 143 144 return $this->callAPI($action, $prompt, $metadata, $useContext); 145 } 146 147 /** 148 * Process text with a custom user prompt 149 * 150 * Sends a custom prompt to the LLM along with the provided text. 151 * 152 * @param string $text The text to process 153 * @param string $customPrompt The custom prompt to use 154 * @param array $metadata Optional metadata containing template and examples 155 * @param bool $useContext Whether to include template and examples in the context (default: true) 156 * @return string The processed text 157 */ 158 159 /** 160 * Get the list of available tools for the LLM 161 * 162 * Defines the tools that can be used by the LLM during processing. 163 * 164 * @return array List of tool definitions 165 */ 166 private function getAvailableTools() 167 { 168 return [ 169 [ 170 'type' => 'function', 171 'function' => [ 172 'name' => 'get_document', 173 'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.', 174 'parameters' => [ 175 'type' => 'object', 176 'properties' => [ 177 'id' => [ 178 'type' => 'string', 179 'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.' 180 ] 181 ], 182 'required' => ['id'] 183 ] 184 ] 185 ], 186 [ 187 'type' => 'function', 188 'function' => [ 189 'name' => 'get_template', 190 'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.', 191 'parameters' => [ 192 'type' => 'object', 193 'properties' => [ 194 'type' => [ 195 'type' => 'string', 196 'description' => 'The type of the template (e.g., "mri" for MRI reports, "daily" for daily reports).', 197 'default' => '' 198 ] 199 ] 200 ] 201 ] 202 ], 203 [ 204 'type' => 'function', 205 'function' => [ 206 'name' => 'get_examples', 207 'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.', 208 'parameters' => [ 209 'type' => 'object', 210 'properties' => [ 211 'count' => [ 212 'type' => 'integer', 213 'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.', 214 'default' => 5 215 ] 216 ] 217 ] 218 ] 219 ] 220 ]; 221 } 222 223 /** 224 * Call the LLM API with the specified prompt 225 * 226 * Makes an HTTP POST request to the configured API endpoint with 227 * the prompt and other parameters. Handles authentication if an 228 * API key is configured. 229 * 230 * The method constructs a conversation with system and user messages, 231 * including context information from metadata when available. 232 * 233 * Complex logic includes: 234 * 1. Loading and enhancing the system prompt with metadata context 235 * 2. Building the API request with model parameters 236 * 3. Handling authentication with API key if configured 237 * 4. Making the HTTP request with proper error handling 238 * 5. Parsing and validating the API response 239 * 6. Supporting tool usage with automatic tool calling when enabled 240 * 7. Implementing context enhancement with templates, examples, and snippets 241 * 242 * The context information includes: 243 * - Template content: Used as a starting point for the response 244 * - Example pages: Full content of specified example pages 245 * - Text snippets: Relevant text examples from ChromaDB 246 * 247 * When tools are enabled, the method supports automatic tool calling: 248 * - Tools can retrieve documents, templates, and examples as needed 249 * - Tool responses are cached to avoid duplicate calls with identical parameters 250 * - Infinite loop protection prevents excessive tool calls 251 * 252 * @param string $command The command name for loading command-specific system prompts 253 * @param string $prompt The prompt to send to the LLM as user message 254 * @param array $metadata Optional metadata containing template, examples, and snippets 255 * @param bool $useContext Whether to include template and examples in the context (default: true) 256 * @return string The response content from the LLM 257 * @throws Exception If the API request fails or returns unexpected format 258 */ 259 260 private function callAPI($command, $prompt, $metadata = [], $useContext = true, $useTools = false) 261 { 262 // Load system prompt which provides general instructions to the LLM 263 $systemPrompt = $this->loadSystemPrompt($command, []); 264 265 // Enhance the prompt with context information from metadata 266 // This provides the LLM with additional context about templates and examples 267 if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) { 268 $contextInfo = "\n\n<context>\n"; 269 270 // Add template content if specified in metadata 271 if (!empty($metadata['template'])) { 272 $templateContent = $this->getPageContent($metadata['template']); 273 if ($templateContent !== false) { 274 $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n"; 275 } 276 } 277 278 // Add example pages content if specified in metadata 279 if (!empty($metadata['examples'])) { 280 $examplesContent = []; 281 foreach ($metadata['examples'] as $example) { 282 $content = $this->getPageContent($example); 283 if ($content !== false) { 284 $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n"; 285 } 286 } 287 if (!empty($examplesContent)) { 288 $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n"; 289 } 290 } 291 292 // Add text snippets if specified in metadata 293 if (!empty($metadata['snippets'])) { 294 $snippetsContent = []; 295 foreach ($metadata['snippets'] as $index => $snippet) { 296 // These are text snippets from ChromaDB 297 $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n"; 298 } 299 if (!empty($snippetsContent)) { 300 $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n"; 301 } 302 } 303 304 $contextInfo .= "\n</context>\n"; 305 306 // Append context information to system prompt 307 $prompt = $contextInfo . "\n\n" . $prompt; 308 } 309 310 // Prepare API request data with model parameters 311 $data = [ 312 'model' => $this->model, 313 'messages' => [ 314 ['role' => 'system', 'content' => $systemPrompt], 315 ['role' => 'user', 'content' => $prompt] 316 ], 317 'max_tokens' => 6144, 318 'stream' => false, 319 'keep_alive' => '30m', 320 'think' => true 321 ]; 322 323 // Add tools to the request only if useTools is true 324 if ($useTools) { 325 // Define available tools 326 $data['tools'] = $this->getAvailableTools(); 327 $data['tool_choice'] = 'auto'; 328 $data['parallel_tool_calls'] = false; 329 } 330 331 // Only add parameters if they are defined and not null 332 if ($this->temperature !== null) { 333 $data['temperature'] = $this->temperature; 334 } 335 if ($this->top_p !== null) { 336 $data['top_p'] = $this->top_p; 337 } 338 if ($this->top_k !== null) { 339 $data['top_k'] = $this->top_k; 340 } 341 if ($this->min_p !== null) { 342 $data['min_p'] = $this->min_p; 343 } 344 345 // Make an API call with tool responses 346 return $this->callAPIWithTools($data, false); 347 } 348 349 /** 350 * Handle tool calls from the LLM 351 * 352 * Processes tool calls made by the LLM and returns appropriate responses. 353 * Implements caching to avoid duplicate calls with identical parameters. 354 * 355 * @param array $toolCall The tool call data from the LLM 356 * @return array The tool response message 357 */ 358 private function handleToolCall($toolCall) 359 { 360 $toolName = $toolCall['function']['name']; 361 $arguments = json_decode($toolCall['function']['arguments'], true); 362 363 // Create a cache key from the tool name and arguments 364 $cacheKey = md5($toolName . serialize($arguments)); 365 366 // Check if we have a cached result for this tool call 367 if (isset($this->toolCallCache[$cacheKey])) { 368 // Return cached result and indicate it was found in cache 369 $toolResponse = $this->toolCallCache[$cacheKey]; 370 // Update with current tool call ID 371 $toolResponse['tool_call_id'] = $toolCall['id']; 372 $toolResponse['cached'] = true; // Indicate this response was cached 373 return $toolResponse; 374 } 375 376 $toolResponse = [ 377 'role' => 'tool', 378 'tool_call_id' => $toolCall['id'], 379 'cached' => false // Indicate this is a fresh response 380 ]; 381 382 switch ($toolName) { 383 case 'get_document': 384 $documentId = $arguments['id']; 385 $content = $this->getPageContent($documentId); 386 if ($content === false) { 387 $toolResponse['content'] = 'Document not found: ' . $documentId; 388 } else { 389 $toolResponse['content'] = $content; 390 } 391 break; 392 393 case 'get_template': 394 // Get template content using the convenience function 395 $toolResponse['content'] = $this->getTemplateContent(); 396 break; 397 398 case 'get_examples': 399 // Get examples content using the convenience function 400 $count = isset($arguments['count']) ? (int)$arguments['count'] : 5; 401 $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>'; 402 break; 403 404 default: 405 $toolResponse['content'] = 'Unknown tool: ' . $toolName; 406 } 407 408 // Cache the result for future calls with the same parameters 409 $cacheEntry = $toolResponse; 410 // Remove tool_call_id and cached flag from cache as they change per call 411 unset($cacheEntry['tool_call_id']); 412 unset($cacheEntry['cached']); 413 $this->toolCallCache[$cacheKey] = $cacheEntry; 414 415 return $toolResponse; 416 } 417 418 /** 419 * Make an API call with tool responses 420 * 421 * Sends a follow-up request to the LLM with tool responses. 422 * Implements complex logic for handling tool calls with caching and loop protection. 423 * 424 * Complex logic includes: 425 * 1. Making HTTP requests with proper authentication and error handling 426 * 2. Processing tool calls from the LLM response 427 * 3. Caching tool responses to avoid duplicate calls with identical parameters 428 * 4. Tracking tool call counts to prevent infinite loops 429 * 5. Implementing loop protection with call count limits 430 * 6. Handling recursive tool calls until final content is generated 431 * 432 * Loop protection works by: 433 * - Tracking individual tool call counts (max 3 per tool) 434 * - Tracking total tool calls (max 10 total) 435 * - Disabling tools when limits are exceeded to break potential loops 436 * 437 * @param array $data The API request data including messages with tool responses 438 * @param bool $toolsCalled Whether tools have already been called (used for loop protection) 439 * @param bool $useTools Whether to process tool calls (used for loop protection) 440 * @return string The final response content 441 */ 442 private function callAPIWithTools($data, $toolsCalled = false, $useTools = true) 443 { 444 // Set up HTTP headers, including authentication if API key is configured 445 $headers = [ 446 'Content-Type: application/json' 447 ]; 448 449 if (!empty($this->api_key)) { 450 $headers[] = 'Authorization: Bearer ' . $this->api_key; 451 } 452 453 // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops 454 if ($toolsCalled) { 455 unset($data['tools']); 456 unset($data['tool_choice']); 457 } 458 459 // Initialize and configure cURL for the API request 460 $ch = curl_init(); 461 curl_setopt($ch, CURLOPT_URL, $this->api_url); 462 curl_setopt($ch, CURLOPT_POST, true); 463 curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); 464 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 465 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 466 curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); 467 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 468 469 // Execute the API request 470 $response = curl_exec($ch); 471 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 472 $error = curl_error($ch); 473 curl_close($ch); 474 475 // Handle cURL errors 476 if ($error) { 477 throw new Exception('API request failed: ' . $error); 478 } 479 480 // Handle HTTP errors 481 if ($httpCode !== 200) { 482 throw new Exception('API request failed with HTTP code: ' . $httpCode); 483 } 484 485 // Parse and validate the JSON response 486 $result = json_decode($response, true); 487 488 // Extract the content from the response if available 489 if (isset($result['choices'][0]['message']['content'])) { 490 $content = trim($result['choices'][0]['message']['content']); 491 // Reset tool call counts when we get final content 492 $this->toolCallCounts = []; 493 return $content; 494 } 495 496 // Handle tool calls if present 497 if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) { 498 $toolCalls = $result['choices'][0]['message']['tool_calls']; 499 // Start with original messages 500 $messages = $data['messages']; 501 // Add assistant's message with tool calls, keeping all original fields except for content (which is null) 502 $assistantMessage = []; 503 foreach ($result['choices'][0]['message'] as $key => $value) { 504 if ($key !== 'content') { 505 $assistantMessage[$key] = $value; 506 } 507 } 508 // Add assistant's message with tool calls 509 $messages[] = $assistantMessage; 510 511 // Process each tool call and track counts to prevent infinite loops 512 foreach ($toolCalls as $toolCall) { 513 $toolName = $toolCall['function']['name']; 514 // Increment tool call count 515 if (!isset($this->toolCallCounts[$toolName])) { 516 $this->toolCallCounts[$toolName] = 0; 517 } 518 $this->toolCallCounts[$toolName]++; 519 520 $toolResponse = $this->handleToolCall($toolCall); 521 $messages[] = $toolResponse; 522 } 523 524 // Check if any tool has been called more than 3 times 525 $toolsCalledCount = 0; 526 foreach ($this->toolCallCounts as $count) { 527 if ($count > 3) { 528 // If any tool called more than 3 times, disable tools to break loop 529 $toolsCalled = true; 530 break; 531 } 532 $toolsCalledCount += $count; 533 } 534 535 // If total tool calls exceed 10, also disable tools 536 if ($toolsCalledCount > 10) { 537 $toolsCalled = true; 538 } 539 540 // Make another API call with tool responses 541 $data['messages'] = $messages; 542 return $this->callAPIWithTools($data, $toolsCalled, $useTools); 543 } 544 545 // Throw exception for unexpected response format 546 throw new Exception('Unexpected API response format'); 547 } 548 549 /** 550 * Load a prompt template from a DokuWiki page and replace placeholders 551 * 552 * Loads prompt templates from DokuWiki pages with IDs in the format 553 * dokullm:profiles:PROFILE:PROMPT_NAME 554 * 555 * The method implements a profile fallback mechanism: 556 * 1. First tries to load the prompt from the configured profile 557 * 2. If not found, falls back to default prompts 558 * 3. Throws an exception if neither is available 559 * 560 * After loading the prompt, it scans for placeholders and automatically 561 * adds missing ones with appropriate values before replacing all placeholders. 562 * 563 * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite') 564 * @param array $variables Associative array of placeholder => value pairs 565 * @return string The processed prompt with placeholders replaced 566 * @throws Exception If the prompt page cannot be loaded from any profile 567 */ 568 private function loadPrompt($promptName, $variables = []) 569 { 570 // Default to 'default' if profile is not set 571 if (empty($this->profile)) { 572 $this->profile = 'default'; 573 } 574 575 // Construct the page ID for the prompt in the configured profile 576 $promptPageId = 'dokullm:profiles:' . $this->profile . ':' . $promptName; 577 578 // Try to get the content of the prompt page in the configured profile 579 $prompt = $this->getPageContent($promptPageId); 580 581 // If the profile-specific prompt doesn't exist, try default as fallback 582 if ($prompt === false && $this->profile !== 'default') { 583 $promptPageId = 'dokullm:profile:default:' . $promptName; 584 $prompt = $this->getPageContent($promptPageId); 585 } 586 587 // If still no prompt found, throw an exception 588 if ($prompt === false) { 589 throw new Exception('Prompt page not found: ' . $promptPageId); 590 } 591 592 // Find placeholders in the prompt 593 $placeholders = $this->findPlaceholders($prompt); 594 595 // Add missing placeholders with appropriate values 596 foreach ($placeholders as $placeholder) { 597 // Skip if already provided in variables 598 if (isset($variables[$placeholder])) { 599 continue; 600 } 601 602 // Add appropriate values for specific placeholders 603 switch ($placeholder) { 604 case 'template': 605 // If we have a page_template in variables, use it 606 $variables[$placeholder] = $this->getTemplateContent($variables['page_template']); 607 break; 608 609 case 'snippets': 610 $variables[$placeholder] = $this->chromaClient !== null ? $this->getSnippets(10) : '( no examples )'; 611 break; 612 613 case 'examples': 614 // If we have example page IDs in metadata, add examples content 615 $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']); 616 break; 617 618 case 'previous': 619 // If we have a previous report page ID in metadata, add previous content 620 $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']); 621 622 // Add current and previous dates to metadata 623 $variables['current_date'] = $this->getPageDate($this->pageId); 624 $variables['previous_date'] = !empty($variables['page_previous']) ? 625 $this->getPageDate($variables['page_previous']) : 626 ''; 627 break; 628 629 case 'prompt': 630 // Add the custom prompt value 631 $variables[$placeholder] = isset($variables['prompt']) ? $variables['prompt'] : ''; 632 break; 633 634 default: 635 // For other placeholders, leave them empty or set a default value 636 $variables[$placeholder] = ''; 637 break; 638 } 639 } 640 641 // Replace placeholders with actual values 642 // Placeholders are in the format {placeholder_name} 643 foreach ($variables as $placeholder => $value) { 644 $prompt = str_replace('{' . $placeholder . '}', $value, $prompt); 645 } 646 647 // Return the processed prompt 648 return $prompt; 649 } 650 651 /** 652 * Load system prompt with optional command-specific appendage 653 * 654 * Loads the main system prompt and appends any command-specific system prompt 655 * if available. 656 * 657 * @param string $action The action/command name 658 * @param array $variables Associative array of placeholder => value pairs 659 * @return string The combined system prompt 660 */ 661 private function loadSystemPrompt($action, $variables = []) 662 { 663 // Load system prompt which provides general instructions to the LLM 664 $systemPrompt = $this->loadPrompt('system', $variables); 665 666 // Check if there's a command-specific system prompt appendage 667 if (!empty($action)) { 668 try { 669 $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables); 670 if ($commandSystemPrompt !== false) { 671 $systemPrompt .= "\n" . $commandSystemPrompt; 672 } 673 } catch (Exception $e) { 674 // Ignore exceptions when loading command-specific system prompt 675 // This allows the main system prompt to still be used 676 } 677 } 678 679 return $systemPrompt; 680 } 681 682 /** 683 * Get the content of a DokuWiki page 684 * 685 * Retrieves the raw content of a DokuWiki page by its ID. 686 * Used for loading template and example page content for context. 687 * 688 * @param string $pageId The page ID to retrieve 689 * @return string|false The page content or false if not found/readable 690 */ 691 public function getPageContent($pageId) 692 { 693 // Convert page ID to file path 694 $pageFile = wikiFN($pageId); 695 696 // Check if file exists and is readable 697 if (file_exists($pageFile) && is_readable($pageFile)) { 698 return file_get_contents($pageFile); 699 } 700 701 return false; 702 } 703 704 /** 705 * Extract date from page ID or file timestamp 706 * 707 * Attempts to extract a date in YYmmdd format from the page ID. 708 * If not found, uses the file's last modification timestamp. 709 * 710 * @param string $pageId Optional page ID to extract date from (defaults to current page) 711 * @return string Formatted date string (YYYY-MM-DD) 712 */ 713 private function getPageDate($pageId = null) 714 { 715 // Use provided page ID or current page ID 716 $targetPageId = $pageId ?: $this->pageId; 717 718 // Try to extract date from page ID (looking for YYmmdd pattern) 719 if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) { 720 // Convert YYmmdd to YYYY-MM-DD 721 $year = $matches[1]; 722 $month = $matches[2]; 723 $day = $matches[3]; 724 725 // Assume 20xx for years 00-69, 19xx for years 70-99 726 $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year; 727 728 return $fullYear . '-' . $month . '-' . $day; 729 } 730 731 // Fallback to file timestamp 732 $pageFile = wikiFN($targetPageId); 733 if (file_exists($pageFile)) { 734 $timestamp = filemtime($pageFile); 735 return date('Y-m-d', $timestamp); 736 } 737 738 // Return empty string if no date can be determined 739 return ''; 740 } 741 742 /** 743 * Get current text 744 * 745 * Retrieves the current text stored from the process function. 746 * 747 * @return string The current text 748 */ 749 private function getCurrentText() 750 { 751 return $this->currentText; 752 } 753 754 /** 755 * Scan text for placeholders 756 * 757 * Finds all placeholders in the format {placeholder_name} in the provided text 758 * and returns an array of unique placeholder names. 759 * 760 * @param string $text The text to scan for placeholders 761 * @return array List of unique placeholder names found in the text 762 */ 763 public function findPlaceholders($text) 764 { 765 $placeholders = []; 766 $pattern = '/\{([^}]+)\}/'; 767 768 if (preg_match_all($pattern, $text, $matches)) { 769 // Get unique placeholder names 770 $placeholders = array_unique($matches[1]); 771 } 772 773 return $placeholders; 774 } 775 776 /** 777 * Get template content for the current text 778 * 779 * Convenience function to retrieve template content. If a pageId is provided, 780 * retrieves content directly from that page. Otherwise, queries ChromaDB for 781 * a relevant template based on the current text. 782 * 783 * @param string|null $pageId Optional page ID to retrieve template from directly 784 * @return string The template content or empty string if not found 785 */ 786 private function getTemplateContent($pageId = null) 787 { 788 // If pageId is provided, use it directly 789 if ($pageId !== null) { 790 $templateContent = $this->getPageContent($pageId); 791 if ($templateContent !== false) { 792 return $templateContent; 793 } 794 } 795 796 // If ChromaDB is disabled, return empty template 797 if ($this->chromaClient === null) { 798 return '( no template )'; 799 } 800 801 // Otherwise, get template suggestion for the current text 802 $pageId = $this->queryChromaDBTemplate($this->getCurrentText()); 803 if (!empty($pageId)) { 804 $templateContent = $this->getPageContent($pageId[0]); 805 if ($templateContent !== false) { 806 return $templateContent; 807 } 808 } 809 return '( no template )'; 810 } 811 812 /** 813 * Get snippets content for the current text 814 * 815 * Convenience function to retrieve relevant snippets for the current text. 816 * Queries ChromaDB for relevant snippets and returns them formatted. 817 * 818 * @param int $count Number of snippets to retrieve (default: 10) 819 * @return string Formatted snippets content or empty string if not found 820 */ 821 private function getSnippets($count = 10) 822 { 823 // If ChromaDB is disabled, return empty snippets 824 if ($this->chromaClient === null) { 825 return '( no examples )'; 826 } 827 828 // Get example snippets for the current text 829 $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count); 830 if (!empty($snippets)) { 831 $formattedSnippets = []; 832 foreach ($snippets as $index => $snippet) { 833 $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>'; 834 } 835 return implode("\n", $formattedSnippets); 836 } 837 return '( no examples )'; 838 } 839 840 /** 841 * Get examples content from example page IDs 842 * 843 * Convenience function to retrieve content from example pages. 844 * Returns the content of each page packed in XML elements. 845 * 846 * @param array $exampleIds List of example page IDs 847 * @return string Formatted examples content or empty string if not found 848 */ 849 private function getExamplesContent($exampleIds = []) 850 { 851 if (empty($exampleIds) || !is_array($exampleIds)) { 852 return '( no examples )'; 853 } 854 855 $examplesContent = []; 856 foreach ($exampleIds as $index => $exampleId) { 857 $content = $this->getPageContent($exampleId); 858 if ($content !== false) { 859 $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>'; 860 } 861 } 862 863 return implode("\n", $examplesContent); 864 } 865 866 /** 867 * Get previous report content from previous page ID 868 * 869 * Convenience function to retrieve content from a previous report page. 870 * Returns the content of the previous page or a default message if not found. 871 * 872 * @param string $previousId Previous page ID 873 * @return string Previous report content or default message if not found 874 */ 875 private function getPreviousContent($previousId = '') 876 { 877 if (empty($previousId)) { 878 return '( no previous report )'; 879 } 880 881 $content = $this->getPageContent($previousId); 882 if ($content !== false) { 883 return $content; 884 } 885 886 return '( previous report not found )'; 887 } 888 889 /** 890 * Get ChromaDB client with configuration 891 * 892 * Returns the ChromaDB client and collection name. 893 * If a client was passed in the constructor, use it. Otherwise, this method 894 * should not be called as it depends on getConf() which is not available. 895 * 896 * @return array Array containing the ChromaDB client and collection name 897 * @throws Exception If no ChromaDB client is available 898 */ 899 private function getChromaDBClient() 900 { 901 // If we have a ChromaDB client passed in constructor, use it 902 if ($this->chromaClient !== null) { 903 // Get the collection name based on the page ID 904 // FIXME 905 $chromaCollection = 'reports'; 906 $pageId = $pageId; 907 908 if (!empty($this->pageId)) { 909 // Split the page ID by ':' and take the first part as collection name 910 $parts = explode(':', $this->pageId); 911 if (isset($parts[0]) && !empty($parts[0])) { 912 // If the first part is 'playground', use the default collection 913 // Otherwise, use the first part as the collection name 914 if ($parts[0] === 'playground') { 915 $chromaCollection = ''; 916 } else { 917 $chromaCollection = $parts[0]; 918 } 919 } 920 } 921 922 return [$this->chromaClient, $chromaCollection]; 923 } 924 925 // If we don't have a ChromaDB client, we can't create one here 926 // because getConf() is not available in this context 927 throw new Exception('No ChromaDB client available'); 928 } 929 930 /** 931 * Query ChromaDB for relevant documents 932 * 933 * Generates embeddings for the input text and queries ChromaDB for similar documents. 934 * Extracts modality from the current page ID to use as the collection name. 935 * 936 * @param string $text The text to find similar documents for 937 * @param int $limit Maximum number of documents to retrieve (default: 5) 938 * @param array|null $where Optional filter conditions for metadata 939 * @return array List of document IDs 940 */ 941 private function queryChromaDB($text, $limit = 5, $where = null) 942 { 943 try { 944 // Get ChromaDB client and collection name 945 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 946 // Query for similar documents 947 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 948 949 // Extract document IDs from results 950 $documentIds = []; 951 if (isset($results['ids'][0]) && is_array($results['ids'][0])) { 952 foreach ($results['ids'][0] as $id) { 953 // Use the ChromaDB ID directly without conversion 954 $documentIds[] = $id; 955 } 956 } 957 958 return $documentIds; 959 } catch (Exception $e) { 960 // Log error but don't fail the operation 961 error_log('ChromaDB query failed: ' . $e->getMessage()); 962 return []; 963 } 964 } 965 966 /** 967 * Query ChromaDB for relevant documents and return text snippets 968 * 969 * Generates embeddings for the input text and queries ChromaDB for similar documents. 970 * Returns the actual text snippets instead of document IDs. 971 * 972 * @param string $text The text to find similar documents for 973 * @param int $limit Maximum number of documents to retrieve (default: 10) 974 * @param array|null $where Optional filter conditions for metadata 975 * @return array List of text snippets 976 */ 977 private function queryChromaDBSnippets($text, $limit = 10, $where = null) 978 { 979 try { 980 // Get ChromaDB client and collection name 981 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 982 // Query for similar documents 983 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 984 985 // Extract document texts from results 986 $snippets = []; 987 if (isset($results['documents'][0]) && is_array($results['documents'][0])) { 988 foreach ($results['documents'][0] as $document) { 989 $snippets[] = $document; 990 } 991 } 992 993 return $snippets; 994 } catch (Exception $e) { 995 // Log error but don't fail the operation 996 error_log('ChromaDB query failed: ' . $e->getMessage()); 997 return []; 998 } 999 } 1000 1001 /** 1002 * Query ChromaDB for a template document 1003 * 1004 * Generates embeddings for the input text and queries ChromaDB for a template document 1005 * by filtering with metadata 'template=true'. 1006 * 1007 * @param string $text The text to find a template for 1008 * @return array List of template document IDs (maximum 1) 1009 */ 1010 public function queryChromaDBTemplate($text) 1011 { 1012 $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']); 1013 1014 // Remove chunk number (e.g., "@2") from the ID to get the base document ID 1015 if (!empty($templateIds)) { 1016 $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]); 1017 } 1018 1019 return $templateIds; 1020 } 1021 1022} 1023