1<?php 2namespace dokuwiki\plugin\dokullm; 3 4/** 5 * LLM Client for the dokullm plugin 6 * 7 * This class provides methods to interact with an LLM API for various 8 * text processing tasks such as completion, rewriting, grammar correction, 9 * summarization, conclusion creation, text analysis, and custom prompts. 10 * 11 * The client handles: 12 * - API configuration and authentication 13 * - Prompt template loading and processing 14 * - Context-aware requests with metadata 15 * - DokuWiki page content retrieval 16 */ 17 18// must be run within Dokuwiki 19if (!defined('DOKU_INC')) { 20 die(); 21} 22 23/** 24 * LLM Client class for handling API communications 25 * 26 * Manages configuration settings and provides methods for various 27 * text processing operations through an LLM API. 28 * Implements caching for tool calls to avoid duplicate processing. 29 */ 30class LlmClient 31{ 32 /** @var string The API endpoint URL */ 33 private $api_url; 34 35 /** @var array Cache for tool call results */ 36 private $toolCallCache = []; 37 38 /** @var string Current text for tool usage */ 39 private $currentText = ''; 40 41 /** @var array Track tool call counts to prevent infinite loops */ 42 private $toolCallCounts = []; 43 44 /** @var string The API authentication key */ 45 private $api_key; 46 47 /** @var string The model identifier to use */ 48 private $model; 49 50 /** @var int The request timeout in seconds */ 51 private $timeout; 52 53 /** @var float The temperature setting for response randomness */ 54 private $temperature; 55 56 /** @var float The top-p setting for nucleus sampling */ 57 private $top_p; 58 59 /** @var int The top-k setting for token selection */ 60 private $top_k; 61 62 /** @var float The min-p setting for minimum probability threshold */ 63 private $min_p; 64 65 /** @var bool Whether to enable thinking in LLM responses */ 66 private $think; 67 68 /** @var object|null ChromaDB client instance */ 69 private $chromaClient; 70 71 /** 72 * Initialize the LLM client with configuration settings 73 * 74 * Retrieves configuration values from DokuWiki's configuration system 75 * for API URL, key, model, timeout, and LLM sampling parameters. 76 * 77 * Configuration values: 78 * - api_url: The LLM API endpoint URL 79 * - api_key: Authentication key for the API (optional) 80 * - model: The model identifier to use for requests 81 * - timeout: Request timeout in seconds 82 * - language: Language code for prompt templates 83 * - temperature: Temperature setting for response randomness (0.0-1.0) 84 * - top_p: Top-p (nucleus sampling) setting (0.0-1.0) 85 * - top_k: Top-k setting (integer >= 1) 86 * - min_p: Minimum probability threshold (0.0-1.0) 87 * - think: Whether to enable thinking in LLM responses (boolean) 88 * - chromaClient: ChromaDB client instance (optional) 89 */ 90 public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $language = null, $chromaClient = null) 91 { 92 $this->api_url = $api_url; 93 $this->api_key = $api_key; 94 $this->model = $model; 95 $this->timeout = $timeout; 96 $this->temperature = $temperature; 97 $this->top_p = $top_p; 98 $this->top_k = $top_k; 99 $this->min_p = $min_p; 100 $this->think = $think; 101 $this->language = $language; 102 $this->chromaClient = $chromaClient; 103 } 104 105 106 107 public function process($action, $text, $metadata = [], $useContext = true) 108 { 109 // Store the current text for tool usage 110 $this->currentText = $text; 111 112 // Add text, think and action to metadata 113 $metadata['text'] = $text; 114 $metadata['think'] = $this->think ? '/think' : '/no_think'; 115 $metadata['action'] = $action; 116 117 // If we have 'template' in metadata, move it to 'page_template' 118 if (isset($metadata['template'])) { 119 $metadata['page_template'] = $metadata['template']; 120 unset($metadata['template']); 121 } 122 123 // If we have 'examples' in metadata, move it to 'page_examples' 124 if (isset($metadata['examples'])) { 125 $metadata['page_examples'] = $metadata['examples']; 126 unset($metadata['examples']); 127 } 128 129 // If we have 'previous' in metadata, move it to 'page_previous' 130 if (isset($metadata['previous'])) { 131 $metadata['page_previous'] = $metadata['previous']; 132 unset($metadata['previous']); 133 } 134 135 $prompt = $this->loadPrompt($action, $metadata); 136 137 return $this->callAPI($action, $prompt, $metadata, $useContext); 138 } 139 140 /** 141 * Process text with a custom user prompt 142 * 143 * Sends a custom prompt to the LLM along with the provided text. 144 * 145 * @param string $text The text to process 146 * @param string $customPrompt The custom prompt to use 147 * @param array $metadata Optional metadata containing template and examples 148 * @param bool $useContext Whether to include template and examples in the context (default: true) 149 * @return string The processed text 150 */ 151 public function processCustomPrompt($text, $metadata = [], $useContext = true) 152 { 153 // Store the current text for tool usage 154 $this->currentText = $text; 155 156 // Format the prompt with the text and custom prompt 157 $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text; 158 159 return $this->callAPI('custom', $prompt, $metadata, $useContext); 160 } 161 162 /** 163 * Get the list of available tools for the LLM 164 * 165 * Defines the tools that can be used by the LLM during processing. 166 * 167 * @return array List of tool definitions 168 */ 169 private function getAvailableTools() 170 { 171 return [ 172 [ 173 'type' => 'function', 174 'function' => [ 175 'name' => 'get_document', 176 'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.', 177 'parameters' => [ 178 'type' => 'object', 179 'properties' => [ 180 'id' => [ 181 'type' => 'string', 182 'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.' 183 ] 184 ], 185 'required' => ['id'] 186 ] 187 ] 188 ], 189 [ 190 'type' => 'function', 191 'function' => [ 192 'name' => 'get_template', 193 'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.', 194 'parameters' => [ 195 'type' => 'object', 196 'properties' => [ 197 'language' => [ 198 'type' => 'string', 199 'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).', 200 'default' => 'ro' 201 ] 202 ] 203 ] 204 ] 205 ], 206 [ 207 'type' => 'function', 208 'function' => [ 209 'name' => 'get_examples', 210 'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.', 211 'parameters' => [ 212 'type' => 'object', 213 'properties' => [ 214 'count' => [ 215 'type' => 'integer', 216 'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.', 217 'default' => 5 218 ] 219 ] 220 ] 221 ] 222 ] 223 ]; 224 } 225 226 /** 227 * Call the LLM API with the specified prompt 228 * 229 * Makes an HTTP POST request to the configured API endpoint with 230 * the prompt and other parameters. Handles authentication if an 231 * API key is configured. 232 * 233 * The method constructs a conversation with system and user messages, 234 * including context information from metadata when available. 235 * 236 * Complex logic includes: 237 * 1. Loading and enhancing the system prompt with metadata context 238 * 2. Building the API request with model parameters 239 * 3. Handling authentication with API key if configured 240 * 4. Making the HTTP request with proper error handling 241 * 5. Parsing and validating the API response 242 * 6. Supporting tool usage with automatic tool calling when enabled 243 * 7. Implementing context enhancement with templates, examples, and snippets 244 * 245 * The context information includes: 246 * - Template content: Used as a starting point for the response 247 * - Example pages: Full content of specified example pages 248 * - Text snippets: Relevant text examples from ChromaDB 249 * 250 * When tools are enabled, the method supports automatic tool calling: 251 * - Tools can retrieve documents, templates, and examples as needed 252 * - Tool responses are cached to avoid duplicate calls with identical parameters 253 * - Infinite loop protection prevents excessive tool calls 254 * 255 * @param string $command The command name for loading command-specific system prompts 256 * @param string $prompt The prompt to send to the LLM as user message 257 * @param array $metadata Optional metadata containing template, examples, and snippets 258 * @param bool $useContext Whether to include template and examples in the context (default: true) 259 * @return string The response content from the LLM 260 * @throws Exception If the API request fails or returns unexpected format 261 */ 262 263 private function callAPI($command, $prompt, $metadata = [], $useContext = true, $useTools = false) 264 { 265 // Load system prompt which provides general instructions to the LLM 266 $systemPrompt = $this->loadSystemPrompt($command, []); 267 268 // Enhance the prompt with context information from metadata 269 // This provides the LLM with additional context about templates and examples 270 if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) { 271 $contextInfo = "\n\n<context>\n"; 272 273 // Add template content if specified in metadata 274 if (!empty($metadata['template'])) { 275 $templateContent = $this->getPageContent($metadata['template']); 276 if ($templateContent !== false) { 277 $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n"; 278 } 279 } 280 281 // Add example pages content if specified in metadata 282 if (!empty($metadata['examples'])) { 283 $examplesContent = []; 284 foreach ($metadata['examples'] as $example) { 285 $content = $this->getPageContent($example); 286 if ($content !== false) { 287 $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n"; 288 } 289 } 290 if (!empty($examplesContent)) { 291 $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n"; 292 } 293 } 294 295 // Add text snippets if specified in metadata 296 if (!empty($metadata['snippets'])) { 297 $snippetsContent = []; 298 foreach ($metadata['snippets'] as $index => $snippet) { 299 // These are text snippets from ChromaDB 300 $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n"; 301 } 302 if (!empty($snippetsContent)) { 303 $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n"; 304 } 305 } 306 307 $contextInfo .= "\n</context>\n"; 308 309 // Append context information to system prompt 310 $prompt = $contextInfo . "\n\n" . $prompt; 311 } 312 313 // Prepare API request data with model parameters 314 $data = [ 315 'model' => $this->model, 316 'messages' => [ 317 ['role' => 'system', 'content' => $systemPrompt], 318 ['role' => 'user', 'content' => $prompt] 319 ], 320 'max_tokens' => 6144, 321 'stream' => false, 322 'keep_alive' => '30m', 323 'think' => true 324 ]; 325 326 // Add tools to the request only if useTools is true 327 if ($useTools) { 328 // Define available tools 329 $data['tools'] = $this->getAvailableTools(); 330 $data['tool_choice'] = 'auto'; 331 $data['parallel_tool_calls'] = false; 332 } 333 334 // Only add parameters if they are defined and not null 335 if ($this->temperature !== null) { 336 $data['temperature'] = $this->temperature; 337 } 338 if ($this->top_p !== null) { 339 $data['top_p'] = $this->top_p; 340 } 341 if ($this->top_k !== null) { 342 $data['top_k'] = $this->top_k; 343 } 344 if ($this->min_p !== null) { 345 $data['min_p'] = $this->min_p; 346 } 347 348 // Make an API call with tool responses 349 return $this->callAPIWithTools($data, false); 350 } 351 352 /** 353 * Handle tool calls from the LLM 354 * 355 * Processes tool calls made by the LLM and returns appropriate responses. 356 * Implements caching to avoid duplicate calls with identical parameters. 357 * 358 * @param array $toolCall The tool call data from the LLM 359 * @return array The tool response message 360 */ 361 private function handleToolCall($toolCall) 362 { 363 $toolName = $toolCall['function']['name']; 364 $arguments = json_decode($toolCall['function']['arguments'], true); 365 366 // Create a cache key from the tool name and arguments 367 $cacheKey = md5($toolName . serialize($arguments)); 368 369 // Check if we have a cached result for this tool call 370 if (isset($this->toolCallCache[$cacheKey])) { 371 // Return cached result and indicate it was found in cache 372 $toolResponse = $this->toolCallCache[$cacheKey]; 373 // Update with current tool call ID 374 $toolResponse['tool_call_id'] = $toolCall['id']; 375 $toolResponse['cached'] = true; // Indicate this response was cached 376 return $toolResponse; 377 } 378 379 $toolResponse = [ 380 'role' => 'tool', 381 'tool_call_id' => $toolCall['id'], 382 'cached' => false // Indicate this is a fresh response 383 ]; 384 385 switch ($toolName) { 386 case 'get_document': 387 $documentId = $arguments['id']; 388 $content = $this->getPageContent($documentId); 389 if ($content === false) { 390 $toolResponse['content'] = 'Document not found: ' . $documentId; 391 } else { 392 $toolResponse['content'] = $content; 393 } 394 break; 395 396 case 'get_template': 397 // Get template content using the convenience function 398 $toolResponse['content'] = $this->getTemplateContent(); 399 break; 400 401 case 'get_examples': 402 // Get examples content using the convenience function 403 $count = isset($arguments['count']) ? (int)$arguments['count'] : 5; 404 $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>'; 405 break; 406 407 default: 408 $toolResponse['content'] = 'Unknown tool: ' . $toolName; 409 } 410 411 // Cache the result for future calls with the same parameters 412 $cacheEntry = $toolResponse; 413 // Remove tool_call_id and cached flag from cache as they change per call 414 unset($cacheEntry['tool_call_id']); 415 unset($cacheEntry['cached']); 416 $this->toolCallCache[$cacheKey] = $cacheEntry; 417 418 return $toolResponse; 419 } 420 421 /** 422 * Make an API call with tool responses 423 * 424 * Sends a follow-up request to the LLM with tool responses. 425 * Implements complex logic for handling tool calls with caching and loop protection. 426 * 427 * Complex logic includes: 428 * 1. Making HTTP requests with proper authentication and error handling 429 * 2. Processing tool calls from the LLM response 430 * 3. Caching tool responses to avoid duplicate calls with identical parameters 431 * 4. Tracking tool call counts to prevent infinite loops 432 * 5. Implementing loop protection with call count limits 433 * 6. Handling recursive tool calls until final content is generated 434 * 435 * Loop protection works by: 436 * - Tracking individual tool call counts (max 3 per tool) 437 * - Tracking total tool calls (max 10 total) 438 * - Disabling tools when limits are exceeded to break potential loops 439 * 440 * @param array $data The API request data including messages with tool responses 441 * @param bool $toolsCalled Whether tools have already been called (used for loop protection) 442 * @param bool $useTools Whether to process tool calls (used for loop protection) 443 * @return string The final response content 444 */ 445 private function callAPIWithTools($data, $toolsCalled = false, $useTools = true) 446 { 447 // Set up HTTP headers, including authentication if API key is configured 448 $headers = [ 449 'Content-Type: application/json' 450 ]; 451 452 if (!empty($this->api_key)) { 453 $headers[] = 'Authorization: Bearer ' . $this->api_key; 454 } 455 456 // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops 457 if ($toolsCalled) { 458 unset($data['tools']); 459 unset($data['tool_choice']); 460 } 461 462 // Initialize and configure cURL for the API request 463 $ch = curl_init(); 464 curl_setopt($ch, CURLOPT_URL, $this->api_url); 465 curl_setopt($ch, CURLOPT_POST, true); 466 curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); 467 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 468 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 469 curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); 470 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 471 472 // Execute the API request 473 $response = curl_exec($ch); 474 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 475 $error = curl_error($ch); 476 curl_close($ch); 477 478 // Handle cURL errors 479 if ($error) { 480 throw new Exception('API request failed: ' . $error); 481 } 482 483 // Handle HTTP errors 484 if ($httpCode !== 200) { 485 throw new Exception('API request failed with HTTP code: ' . $httpCode); 486 } 487 488 // Parse and validate the JSON response 489 $result = json_decode($response, true); 490 491 // Extract the content from the response if available 492 if (isset($result['choices'][0]['message']['content'])) { 493 $content = trim($result['choices'][0]['message']['content']); 494 // Reset tool call counts when we get final content 495 $this->toolCallCounts = []; 496 return $content; 497 } 498 499 // Handle tool calls if present 500 if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) { 501 $toolCalls = $result['choices'][0]['message']['tool_calls']; 502 // Start with original messages 503 $messages = $data['messages']; 504 // Add assistant's message with tool calls, keeping all original fields except for content (which is null) 505 $assistantMessage = []; 506 foreach ($result['choices'][0]['message'] as $key => $value) { 507 if ($key !== 'content') { 508 $assistantMessage[$key] = $value; 509 } 510 } 511 // Add assistant's message with tool calls 512 $messages[] = $assistantMessage; 513 514 // Process each tool call and track counts to prevent infinite loops 515 foreach ($toolCalls as $toolCall) { 516 $toolName = $toolCall['function']['name']; 517 // Increment tool call count 518 if (!isset($this->toolCallCounts[$toolName])) { 519 $this->toolCallCounts[$toolName] = 0; 520 } 521 $this->toolCallCounts[$toolName]++; 522 523 $toolResponse = $this->handleToolCall($toolCall); 524 $messages[] = $toolResponse; 525 } 526 527 // Check if any tool has been called more than 3 times 528 $toolsCalledCount = 0; 529 foreach ($this->toolCallCounts as $count) { 530 if ($count > 3) { 531 // If any tool called more than 3 times, disable tools to break loop 532 $toolsCalled = true; 533 break; 534 } 535 $toolsCalledCount += $count; 536 } 537 538 // If total tool calls exceed 10, also disable tools 539 if ($toolsCalledCount > 10) { 540 $toolsCalled = true; 541 } 542 543 // Make another API call with tool responses 544 $data['messages'] = $messages; 545 return $this->callAPIWithTools($data, $toolsCalled, $useTools); 546 } 547 548 // Throw exception for unexpected response format 549 throw new Exception('Unexpected API response format'); 550 } 551 552 /** 553 * Load a prompt template from a DokuWiki page and replace placeholders 554 * 555 * Loads prompt templates from DokuWiki pages with IDs in the format 556 * dokullm:prompts:LANGUAGE:PROMPT_NAME 557 * 558 * The method implements a language fallback mechanism: 559 * 1. First tries to load the prompt in the configured language 560 * 2. If not found, falls back to English prompts 561 * 3. Throws an exception if neither is available 562 * 563 * After loading the prompt, it scans for placeholders and automatically 564 * adds missing ones with appropriate values before replacing all placeholders. 565 * 566 * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite') 567 * @param array $variables Associative array of placeholder => value pairs 568 * @return string The processed prompt with placeholders replaced 569 * @throws Exception If the prompt page cannot be loaded in any language 570 */ 571 private function loadPrompt($promptName, $variables = []) 572 { 573 // Default to 'en' if language is 'default' or not set 574 if ($this->language === 'default' || empty($this->language)) { 575 $this->language = 'en'; 576 } 577 578 // Construct the page ID for the prompt in the configured language 579 $promptPageId = 'dokullm:prompts:' . $this->language . ':' . $promptName; 580 581 // Try to get the content of the prompt page in the configured language 582 $prompt = $this->getPageContent($promptPageId); 583 584 // If the language-specific prompt doesn't exist, try English as fallback 585 if ($prompt === false && $this->language !== 'en') { 586 $promptPageId = 'dokullm:prompts:en:' . $promptName; 587 $prompt = $this->getPageContent($promptPageId); 588 } 589 590 // If still no prompt found, throw an exception 591 if ($prompt === false) { 592 throw new Exception('Prompt page not found: ' . $promptPageId); 593 } 594 595 // Find placeholders in the prompt 596 $placeholders = $this->findPlaceholders($prompt); 597 598 // Add missing placeholders with appropriate values 599 foreach ($placeholders as $placeholder) { 600 // Skip if already provided in variables 601 if (isset($variables[$placeholder])) { 602 continue; 603 } 604 605 // Add appropriate values for specific placeholders 606 switch ($placeholder) { 607 case 'template': 608 // If we have a page_template in variables, use it 609 $variables[$placeholder] = $this->getTemplateContent($variables['page_template']); 610 break; 611 612 case 'snippets': 613 $variables[$placeholder] = $this->getSnippets(10); 614 break; 615 616 case 'examples': 617 // If we have example page IDs in metadata, add examples content 618 $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']); 619 break; 620 621 case 'previous': 622 // If we have a previous report page ID in metadata, add previous content 623 $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']); 624 625 // Add current and previous dates to metadata 626 $variables['current_date'] = $this->getPageDate(); 627 $variables['previous_date'] = !empty($variables['page_previous']) ? 628 $this->getPageDate($variables['page_previous']) : 629 ''; 630 break; 631 632 default: 633 // For other placeholders, leave them empty or set a default value 634 $variables[$placeholder] = ''; 635 break; 636 } 637 } 638 639 // Replace placeholders with actual values 640 // Placeholders are in the format {placeholder_name} 641 foreach ($variables as $placeholder => $value) { 642 $prompt = str_replace('{' . $placeholder . '}', $value, $prompt); 643 } 644 645 // Return the processed prompt 646 return $prompt; 647 } 648 649 /** 650 * Load system prompt with optional command-specific appendage 651 * 652 * Loads the main system prompt and appends any command-specific system prompt 653 * if available. 654 * 655 * @param string $action The action/command name 656 * @param array $variables Associative array of placeholder => value pairs 657 * @return string The combined system prompt 658 */ 659 private function loadSystemPrompt($action, $variables = []) 660 { 661 // Load system prompt which provides general instructions to the LLM 662 $systemPrompt = $this->loadPrompt('system', $variables); 663 664 // Check if there's a command-specific system prompt appendage 665 if (!empty($action)) { 666 try { 667 $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables); 668 if ($commandSystemPrompt !== false) { 669 $systemPrompt .= "\n" . $commandSystemPrompt; 670 } 671 } catch (Exception $e) { 672 // Ignore exceptions when loading command-specific system prompt 673 // This allows the main system prompt to still be used 674 } 675 } 676 677 return $systemPrompt; 678 } 679 680 /** 681 * Get the content of a DokuWiki page 682 * 683 * Retrieves the raw content of a DokuWiki page by its ID. 684 * Used for loading template and example page content for context. 685 * 686 * @param string $pageId The page ID to retrieve 687 * @return string|false The page content or false if not found/readable 688 */ 689 public function getPageContent($pageId) 690 { 691 // Convert page ID to file path 692 $pageFile = wikiFN($pageId); 693 694 // Check if file exists and is readable 695 if (file_exists($pageFile) && is_readable($pageFile)) { 696 return file_get_contents($pageFile); 697 } 698 699 return false; 700 } 701 702 /** 703 * Extract date from page ID or file timestamp 704 * 705 * Attempts to extract a date in YYmmdd format from the page ID. 706 * If not found, uses the file's last modification timestamp. 707 * 708 * @param string $pageId Optional page ID to extract date from (defaults to current page) 709 * @return string Formatted date string (YYYY-MM-DD) 710 */ 711 private function getPageDate($pageId = null) 712 { 713 global $ID; 714 715 // Use provided page ID or current page ID 716 $targetPageId = $pageId ?: $ID; 717 718 // Try to extract date from page ID (looking for YYmmdd pattern) 719 if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) { 720 // Convert YYmmdd to YYYY-MM-DD 721 $year = $matches[1]; 722 $month = $matches[2]; 723 $day = $matches[3]; 724 725 // Assume 20xx for years 00-69, 19xx for years 70-99 726 $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year; 727 728 return $fullYear . '-' . $month . '-' . $day; 729 } 730 731 // Fallback to file timestamp 732 $pageFile = wikiFN($targetPageId); 733 if (file_exists($pageFile)) { 734 $timestamp = filemtime($pageFile); 735 return date('Y-m-d', $timestamp); 736 } 737 738 // Return empty string if no date can be determined 739 return ''; 740 } 741 742 /** 743 * Get current text 744 * 745 * Retrieves the current text stored from the process function. 746 * 747 * @return string The current text 748 */ 749 private function getCurrentText() 750 { 751 return $this->currentText; 752 } 753 754 /** 755 * Scan text for placeholders 756 * 757 * Finds all placeholders in the format {placeholder_name} in the provided text 758 * and returns an array of unique placeholder names. 759 * 760 * @param string $text The text to scan for placeholders 761 * @return array List of unique placeholder names found in the text 762 */ 763 public function findPlaceholders($text) 764 { 765 $placeholders = []; 766 $pattern = '/\{([^}]+)\}/'; 767 768 if (preg_match_all($pattern, $text, $matches)) { 769 // Get unique placeholder names 770 $placeholders = array_unique($matches[1]); 771 } 772 773 return $placeholders; 774 } 775 776 /** 777 * Get template content for the current text 778 * 779 * Convenience function to retrieve template content. If a pageId is provided, 780 * retrieves content directly from that page. Otherwise, queries ChromaDB for 781 * a relevant template based on the current text. 782 * 783 * @param string|null $pageId Optional page ID to retrieve template from directly 784 * @return string The template content or empty string if not found 785 */ 786 private function getTemplateContent($pageId = null) 787 { 788 // If pageId is provided, use it directly 789 if ($pageId !== null) { 790 $templateContent = $this->getPageContent($pageId); 791 if ($templateContent !== false) { 792 return $templateContent; 793 } 794 } 795 796 // Otherwise, get template suggestion for the current text 797 $pageId = $this->queryChromaDBTemplate($this->getCurrentText()); 798 if (!empty($pageId)) { 799 $templateContent = $this->getPageContent($pageId[0]); 800 if ($templateContent !== false) { 801 return $templateContent; 802 } 803 } 804 return '( no template )'; 805 } 806 807 /** 808 * Get snippets content for the current text 809 * 810 * Convenience function to retrieve relevant snippets for the current text. 811 * Queries ChromaDB for relevant snippets and returns them formatted. 812 * 813 * @param int $count Number of snippets to retrieve (default: 10) 814 * @return string Formatted snippets content or empty string if not found 815 */ 816 private function getSnippets($count = 10) 817 { 818 // Get example snippets for the current text 819 $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count); 820 if (!empty($snippets)) { 821 $formattedSnippets = []; 822 foreach ($snippets as $index => $snippet) { 823 $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>'; 824 } 825 return implode("\n", $formattedSnippets); 826 } 827 return '( no examples )'; 828 } 829 830 /** 831 * Get examples content from example page IDs 832 * 833 * Convenience function to retrieve content from example pages. 834 * Returns the content of each page packed in XML elements. 835 * 836 * @param array $exampleIds List of example page IDs 837 * @return string Formatted examples content or empty string if not found 838 */ 839 private function getExamplesContent($exampleIds = []) 840 { 841 if (empty($exampleIds) || !is_array($exampleIds)) { 842 return '( no examples )'; 843 } 844 845 $examplesContent = []; 846 foreach ($exampleIds as $index => $exampleId) { 847 $content = $this->getPageContent($exampleId); 848 if ($content !== false) { 849 $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>'; 850 } 851 } 852 853 return implode("\n", $examplesContent); 854 } 855 856 /** 857 * Get previous report content from previous page ID 858 * 859 * Convenience function to retrieve content from a previous report page. 860 * Returns the content of the previous page or a default message if not found. 861 * 862 * @param string $previousId Previous page ID 863 * @return string Previous report content or default message if not found 864 */ 865 private function getPreviousContent($previousId = '') 866 { 867 if (empty($previousId)) { 868 return '( no previous report )'; 869 } 870 871 $content = $this->getPageContent($previousId); 872 if ($content !== false) { 873 return $content; 874 } 875 876 return '( previous report not found )'; 877 } 878 879 /** 880 * Get ChromaDB client with configuration 881 * 882 * Returns the ChromaDB client and collection name. 883 * If a client was passed in the constructor, use it. Otherwise, this method 884 * should not be called as it depends on getConf() which is not available. 885 * 886 * @return array Array containing the ChromaDB client and collection name 887 * @throws Exception If no ChromaDB client is available 888 */ 889 private function getChromaDBClient() 890 { 891 // If we have a ChromaDB client passed in constructor, use it 892 if ($this->chromaClient !== null) { 893 // Get the collection name based on the current page ID 894 $chromaDefaultCollection = 'documents'; // Default fallback 895 global $ID; 896 $chromaCollection = $chromaDefaultCollection; 897 898 if (!empty($ID)) { 899 // Split the page ID by ':' and take the first part as collection name 900 $parts = explode(':', $ID); 901 if (isset($parts[0]) && !empty($parts[0])) { 902 // If the first part is 'playground', use the default collection 903 // Otherwise, use the first part as the collection name 904 if ($parts[0] === 'playground') { 905 $chromaCollection = $chromaDefaultCollection; 906 } else { 907 $chromaCollection = $parts[0]; 908 } 909 } 910 } 911 912 return [$this->chromaClient, $chromaCollection]; 913 } 914 915 // If we don't have a ChromaDB client, we can't create one here 916 // because getConf() is not available in this context 917 throw new Exception('No ChromaDB client available'); 918 } 919 920 /** 921 * Query ChromaDB for relevant documents 922 * 923 * Generates embeddings for the input text and queries ChromaDB for similar documents. 924 * Extracts modality from the current page ID to use as the collection name. 925 * 926 * @param string $text The text to find similar documents for 927 * @param int $limit Maximum number of documents to retrieve (default: 5) 928 * @param array|null $where Optional filter conditions for metadata 929 * @return array List of document IDs 930 */ 931 private function queryChromaDB($text, $limit = 5, $where = null) 932 { 933 try { 934 // Get ChromaDB client and collection name 935 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 936 // Query for similar documents 937 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 938 939 // Extract document IDs from results 940 $documentIds = []; 941 if (isset($results['ids'][0]) && is_array($results['ids'][0])) { 942 foreach ($results['ids'][0] as $id) { 943 // Use the ChromaDB ID directly without conversion 944 $documentIds[] = $id; 945 } 946 } 947 948 return $documentIds; 949 } catch (Exception $e) { 950 // Log error but don't fail the operation 951 error_log('ChromaDB query failed: ' . $e->getMessage()); 952 return []; 953 } 954 } 955 956 /** 957 * Query ChromaDB for relevant documents and return text snippets 958 * 959 * Generates embeddings for the input text and queries ChromaDB for similar documents. 960 * Returns the actual text snippets instead of document IDs. 961 * 962 * @param string $text The text to find similar documents for 963 * @param int $limit Maximum number of documents to retrieve (default: 10) 964 * @param array|null $where Optional filter conditions for metadata 965 * @return array List of text snippets 966 */ 967 private function queryChromaDBSnippets($text, $limit = 10, $where = null) 968 { 969 try { 970 // Get ChromaDB client and collection name 971 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 972 // Query for similar documents 973 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 974 975 // Extract document texts from results 976 $snippets = []; 977 if (isset($results['documents'][0]) && is_array($results['documents'][0])) { 978 foreach ($results['documents'][0] as $document) { 979 $snippets[] = $document; 980 } 981 } 982 983 return $snippets; 984 } catch (Exception $e) { 985 // Log error but don't fail the operation 986 error_log('ChromaDB query failed: ' . $e->getMessage()); 987 return []; 988 } 989 } 990 991 /** 992 * Query ChromaDB for a template document 993 * 994 * Generates embeddings for the input text and queries ChromaDB for a template document 995 * by filtering with metadata 'template=true'. 996 * 997 * @param string $text The text to find a template for 998 * @return array List of template document IDs (maximum 1) 999 */ 1000 public function queryChromaDBTemplate($text) 1001 { 1002 $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']); 1003 1004 // Remove chunk number (e.g., "@2") from the ID to get the base document ID 1005 if (!empty($templateIds)) { 1006 $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]); 1007 } 1008 1009 return $templateIds; 1010 } 1011 1012} 1013