1<?php 2namespace dokuwiki\plugin\dokullm; 3 4/** 5 * LLM Client for the dokullm plugin 6 * 7 * This class provides methods to interact with an LLM API for various 8 * text processing tasks such as completion, rewriting, grammar correction, 9 * summarization, conclusion creation, text analysis, and custom prompts. 10 * 11 * The client handles: 12 * - API configuration and authentication 13 * - Prompt template loading and processing 14 * - Context-aware requests with metadata 15 * - DokuWiki page content retrieval 16 */ 17 18// must be run within Dokuwiki 19if (!defined('DOKU_INC')) { 20 die(); 21} 22 23 24/** 25 * LLM Client class for handling API communications 26 * 27 * Manages configuration settings and provides methods for various 28 * text processing operations through an LLM API. 29 * Implements caching for tool calls to avoid duplicate processing. 30 */ 31class LlmClient 32{ 33 /** @var string The API endpoint URL */ 34 private $api_url; 35 36 /** @var array Cache for tool call results */ 37 private $toolCallCache = []; 38 39 /** @var string Current text for tool usage */ 40 private $currentText = ''; 41 42 /** @var array Track tool call counts to prevent infinite loops */ 43 private $toolCallCounts = []; 44 45 /** @var string The API authentication key */ 46 private $api_key; 47 48 /** @var string The model identifier to use */ 49 private $model; 50 51 /** @var int The request timeout in seconds */ 52 private $timeout; 53 54 /** @var float The temperature setting for response randomness */ 55 private $temperature; 56 57 /** @var float The top-p setting for nucleus sampling */ 58 private $top_p; 59 60 /** @var int The top-k setting for token selection */ 61 private $top_k; 62 63 /** @var float The min-p setting for minimum probability threshold */ 64 private $min_p; 65 66 /** @var bool Whether to enable thinking in the LLM responses */ 67 private $think; 68 69 /** 70 * Initialize the LLM client with configuration settings 71 * 72 * Retrieves configuration values from DokuWiki's configuration system 73 * for API URL, key, model, timeout, and LLM sampling parameters. 74 * 75 * Configuration values: 76 * - api_url: The LLM API endpoint URL 77 * - api_key: Authentication key for the API (optional) 78 * - model: The model identifier to use for requests 79 * - timeout: Request timeout in seconds 80 * - language: Language code for prompt templates 81 * - temperature: Temperature setting for response randomness (0.0-1.0) 82 * - top_p: Top-p (nucleus sampling) setting (0.0-1.0) 83 * - top_k: Top-k setting (integer >= 1) 84 * - min_p: Minimum probability threshold (0.0-1.0) 85 * - think: Whether to enable thinking in LLM responses (boolean) 86 */ 87 public function __construct() 88 { 89 global $conf; 90 $this->api_url = $conf['plugin']['dokullm']['api_url']; 91 $this->api_key = $conf['plugin']['dokullm']['api_key']; 92 $this->model = $conf['plugin']['dokullm']['model']; 93 $this->timeout = $conf['plugin']['dokullm']['timeout']; 94 $this->temperature = $conf['plugin']['dokullm']['temperature']; 95 $this->top_p = $conf['plugin']['dokullm']['top_p']; 96 $this->top_k = $conf['plugin']['dokullm']['top_k']; 97 $this->min_p = $conf['plugin']['dokullm']['min_p']; 98 $this->think = $conf['plugin']['dokullm']['think'] ?? false; 99 } 100 101 102 103 public function process($action, $text, $metadata = [], $useContext = true) 104 { 105 // Store the current text for tool usage 106 $this->currentText = $text; 107 108 // Add text, think and action to metadata 109 $metadata['text'] = $text; 110 $metadata['think'] = $this->think ? '/think' : '/no_think'; 111 $metadata['action'] = $action; 112 113 // If we have 'template' in metadata, move it to 'page_template' 114 if (isset($metadata['template'])) { 115 $metadata['page_template'] = $metadata['template']; 116 unset($metadata['template']); 117 } 118 119 // If we have 'examples' in metadata, move it to 'page_examples' 120 if (isset($metadata['examples'])) { 121 $metadata['page_examples'] = $metadata['examples']; 122 unset($metadata['examples']); 123 } 124 125 // If we have 'previous' in metadata, move it to 'page_previous' 126 if (isset($metadata['previous'])) { 127 $metadata['page_previous'] = $metadata['previous']; 128 unset($metadata['previous']); 129 } 130 131 $prompt = $this->loadPrompt($action, $metadata); 132 133 return $this->callAPI($action, $prompt, $metadata, $useContext); 134 } 135 136 137 138 /** 139 * Create the provided text using the LLM 140 * 141 * Sends a prompt to the LLM asking it to create the given text. 142 * First queries ChromaDB for relevant documents to include as examples. 143 * If no template is defined, queries ChromaDB for a template. 144 * 145 * @param string $text The text to create 146 * @param array $metadata Optional metadata containing template, examples, and snippets 147 * @param bool $useContext Whether to include template and examples in the context (default: true) 148 * @return string The created text 149 */ 150 public function createReport($text, $metadata = [], $useContext = true) 151 { 152 // Store the current text for tool usage 153 $this->currentText = $text; 154 155 // Check if tools should be used based on configuration 156 global $conf; 157 $useTools = $conf['plugin']['dokullm']['use_tools'] ?? false; 158 159 // Only try to find template and add snippets if tools are not enabled 160 // When tools are enabled, the LLM will call get_template and get_examples as needed 161 if (!$useTools) { 162 // If no template is defined, try to find one using ChromaDB 163 if (empty($metadata['template'])) { 164 $templateResult = $this->queryChromaDBTemplate($text); 165 if (!empty($templateResult)) { 166 // Use the first result as template 167 $metadata['template'] = $templateResult[0]; 168 } 169 } 170 171 // Query ChromaDB for relevant documents to use as examples 172 $chromaResults = $this->queryChromaDBSnippets($text, 10); 173 174 // Add ChromaDB results to metadata as snippets 175 if (!empty($chromaResults)) { 176 // Merge with existing snippets 177 $metadata['snippets'] = array_merge( 178 isset($metadata['snippets']) ? $metadata['snippets'] : [], 179 $chromaResults 180 ); 181 } 182 } 183 184 $think = $this->think ? '/think' : '/no_think'; 185 $prompt = $this->loadPrompt('create', ['text' => $text, 'think' => $think]); 186 187 return $this->callAPI('create', $prompt, $metadata, $useContext); 188 } 189 190 /** 191 * Compare two texts and highlight differences 192 * 193 * Sends a prompt to the LLM asking it to compare two texts and 194 * highlight their similarities and differences. 195 * 196 * @param string $text The current text to compare 197 * @param array $metadata Optional metadata containing template, examples, and previous report reference 198 * @return string The comparison results 199 */ 200 public function compareText($text, $metadata = [], $useContext = false) 201 { 202 // Store the current text for tool usage 203 $this->currentText = $text; 204 205 // Load previous report from metadata if specified 206 $previousText = ''; 207 if (!empty($metadata['previous_report_page'])) { 208 $previousText = $this->getPageContent($metadata['previous_report_page']); 209 if ($previousText === false) { 210 $previousText = ''; 211 } 212 } 213 214 // Extract dates for placeholders 215 $currentDate = $this->getPageDate(); 216 $previousDate = !empty($metadata['previous_report_page']) ? 217 $this->getPageDate($metadata['previous_report_page']) : 218 ''; 219 220 $think = $this->think ? '/think' : '/no_think'; 221 $prompt = $this->loadPrompt('compare', [ 222 'text' => $text, 223 'previous_text' => $previousText, 224 'current_date' => $currentDate, 225 'previous_date' => $previousDate, 226 'think' => $think 227 ]); 228 229 return $this->callAPI('compare', $prompt, $metadata, $useContext); 230 } 231 232 /** 233 * Process text with a custom user prompt 234 * 235 * Sends a custom prompt to the LLM along with the provided text. 236 * 237 * @param string $text The text to process 238 * @param string $customPrompt The custom prompt to use 239 * @param array $metadata Optional metadata containing template and examples 240 * @param bool $useContext Whether to include template and examples in the context (default: true) 241 * @return string The processed text 242 */ 243 public function processCustomPrompt($text, $metadata = [], $useContext = true) 244 { 245 // Store the current text for tool usage 246 $this->currentText = $text; 247 248 // Format the prompt with the text and custom prompt 249 $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text; 250 251 return $this->callAPI('custom', $prompt, $metadata, $useContext); 252 } 253 254 /** 255 * Get the list of available tools for the LLM 256 * 257 * Defines the tools that can be used by the LLM during processing. 258 * 259 * @return array List of tool definitions 260 */ 261 private function getAvailableTools() 262 { 263 return [ 264 [ 265 'type' => 'function', 266 'function' => [ 267 'name' => 'get_document', 268 'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.', 269 'parameters' => [ 270 'type' => 'object', 271 'properties' => [ 272 'id' => [ 273 'type' => 'string', 274 'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.' 275 ] 276 ], 277 'required' => ['id'] 278 ] 279 ] 280 ], 281 [ 282 'type' => 'function', 283 'function' => [ 284 'name' => 'get_template', 285 'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.', 286 'parameters' => [ 287 'type' => 'object', 288 'properties' => [ 289 'language' => [ 290 'type' => 'string', 291 'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).', 292 'default' => 'ro' 293 ] 294 ] 295 ] 296 ] 297 ], 298 [ 299 'type' => 'function', 300 'function' => [ 301 'name' => 'get_examples', 302 'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.', 303 'parameters' => [ 304 'type' => 'object', 305 'properties' => [ 306 'count' => [ 307 'type' => 'integer', 308 'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.', 309 'default' => 5 310 ] 311 ] 312 ] 313 ] 314 ] 315 ]; 316 } 317 318 /** 319 * Call the LLM API with the specified prompt 320 * 321 * Makes an HTTP POST request to the configured API endpoint with 322 * the prompt and other parameters. Handles authentication if an 323 * API key is configured. 324 * 325 * The method constructs a conversation with system and user messages, 326 * including context information from metadata when available. 327 * 328 * Complex logic includes: 329 * 1. Loading and enhancing the system prompt with metadata context 330 * 2. Building the API request with model parameters 331 * 3. Handling authentication with API key if configured 332 * 4. Making the HTTP request with proper error handling 333 * 5. Parsing and validating the API response 334 * 6. Supporting tool usage with automatic tool calling when enabled 335 * 7. Implementing context enhancement with templates, examples, and snippets 336 * 337 * The context information includes: 338 * - Template content: Used as a starting point for the response 339 * - Example pages: Full content of specified example pages 340 * - Text snippets: Relevant text examples from ChromaDB 341 * 342 * When tools are enabled, the method supports automatic tool calling: 343 * - Tools can retrieve documents, templates, and examples as needed 344 * - Tool responses are cached to avoid duplicate calls with identical parameters 345 * - Infinite loop protection prevents excessive tool calls 346 * 347 * @param string $command The command name for loading command-specific system prompts 348 * @param string $prompt The prompt to send to the LLM as user message 349 * @param array $metadata Optional metadata containing template, examples, and snippets 350 * @param bool $useContext Whether to include template and examples in the context (default: true) 351 * @return string The response content from the LLM 352 * @throws Exception If the API request fails or returns unexpected format 353 */ 354 355 private function callAPI($command, $prompt, $metadata = [], $useContext = true) 356 { 357 // Load system prompt which provides general instructions to the LLM 358 $systemPrompt = $this->loadSystemPrompt($command, []); 359 360 // Enhance the prompt with context information from metadata 361 // This provides the LLM with additional context about templates and examples 362 if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) { 363 $contextInfo = "\n\n<context>\n"; 364 365 // Add template content if specified in metadata 366 if (!empty($metadata['template'])) { 367 $templateContent = $this->getPageContent($metadata['template']); 368 if ($templateContent !== false) { 369 $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n"; 370 } 371 } 372 373 // Add example pages content if specified in metadata 374 if (!empty($metadata['examples'])) { 375 $examplesContent = []; 376 foreach ($metadata['examples'] as $example) { 377 $content = $this->getPageContent($example); 378 if ($content !== false) { 379 $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n"; 380 } 381 } 382 if (!empty($examplesContent)) { 383 $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n"; 384 } 385 } 386 387 // Add text snippets if specified in metadata 388 if (!empty($metadata['snippets'])) { 389 $snippetsContent = []; 390 foreach ($metadata['snippets'] as $index => $snippet) { 391 // These are text snippets from ChromaDB 392 $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n"; 393 } 394 if (!empty($snippetsContent)) { 395 $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n"; 396 } 397 } 398 399 $contextInfo .= "\n</context>\n"; 400 401 // Append context information to system prompt 402 $prompt = $contextInfo . "\n\n" . $prompt; 403 } 404 405 // Check if tools should be used based on configuration 406 global $conf; 407 $useTools = $conf['plugin']['dokullm']['use_tools'] ?? false; 408 409 // Prepare API request data with model parameters 410 $data = [ 411 'model' => $this->model, 412 'messages' => [ 413 ['role' => 'system', 'content' => $systemPrompt], 414 ['role' => 'user', 'content' => $prompt] 415 ], 416 'max_tokens' => 6144, 417 'stream' => false, 418 'keep_alive' => '30m', 419 'think' => true 420 ]; 421 422 // Add tools to the request only if useTools is true 423 if ($useTools) { 424 // Define available tools 425 $data['tools'] = $this->getAvailableTools(); 426 $data['tool_choice'] = 'auto'; 427 $data['parallel_tool_calls'] = false; 428 } 429 430 // Only add parameters if they are defined and not null 431 if ($this->temperature !== null) { 432 $data['temperature'] = $this->temperature; 433 } 434 if ($this->top_p !== null) { 435 $data['top_p'] = $this->top_p; 436 } 437 if ($this->top_k !== null) { 438 $data['top_k'] = $this->top_k; 439 } 440 if ($this->min_p !== null) { 441 $data['min_p'] = $this->min_p; 442 } 443 444 // Make an API call with tool responses 445 return $this->callAPIWithTools($data, false); 446 } 447 448 /** 449 * Handle tool calls from the LLM 450 * 451 * Processes tool calls made by the LLM and returns appropriate responses. 452 * Implements caching to avoid duplicate calls with identical parameters. 453 * 454 * @param array $toolCall The tool call data from the LLM 455 * @return array The tool response message 456 */ 457 private function handleToolCall($toolCall) 458 { 459 $toolName = $toolCall['function']['name']; 460 $arguments = json_decode($toolCall['function']['arguments'], true); 461 462 // Create a cache key from the tool name and arguments 463 $cacheKey = md5($toolName . serialize($arguments)); 464 465 // Check if we have a cached result for this tool call 466 if (isset($this->toolCallCache[$cacheKey])) { 467 // Return cached result and indicate it was found in cache 468 $toolResponse = $this->toolCallCache[$cacheKey]; 469 // Update with current tool call ID 470 $toolResponse['tool_call_id'] = $toolCall['id']; 471 $toolResponse['cached'] = true; // Indicate this response was cached 472 return $toolResponse; 473 } 474 475 $toolResponse = [ 476 'role' => 'tool', 477 'tool_call_id' => $toolCall['id'], 478 'cached' => false // Indicate this is a fresh response 479 ]; 480 481 switch ($toolName) { 482 case 'get_document': 483 $documentId = $arguments['id']; 484 $content = $this->getPageContent($documentId); 485 if ($content === false) { 486 $toolResponse['content'] = 'Document not found: ' . $documentId; 487 } else { 488 $toolResponse['content'] = $content; 489 } 490 break; 491 492 case 'get_template': 493 // Get template content using the convenience function 494 $toolResponse['content'] = $this->getTemplateContent(); 495 break; 496 497 case 'get_examples': 498 // Get examples content using the convenience function 499 $count = isset($arguments['count']) ? (int)$arguments['count'] : 5; 500 $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>'; 501 break; 502 503 default: 504 $toolResponse['content'] = 'Unknown tool: ' . $toolName; 505 } 506 507 // Cache the result for future calls with the same parameters 508 $cacheEntry = $toolResponse; 509 // Remove tool_call_id and cached flag from cache as they change per call 510 unset($cacheEntry['tool_call_id']); 511 unset($cacheEntry['cached']); 512 $this->toolCallCache[$cacheKey] = $cacheEntry; 513 514 return $toolResponse; 515 } 516 517 /** 518 * Make an API call with tool responses 519 * 520 * Sends a follow-up request to the LLM with tool responses. 521 * Implements complex logic for handling tool calls with caching and loop protection. 522 * 523 * Complex logic includes: 524 * 1. Making HTTP requests with proper authentication and error handling 525 * 2. Processing tool calls from the LLM response 526 * 3. Caching tool responses to avoid duplicate calls with identical parameters 527 * 4. Tracking tool call counts to prevent infinite loops 528 * 5. Implementing loop protection with call count limits 529 * 6. Handling recursive tool calls until final content is generated 530 * 531 * Loop protection works by: 532 * - Tracking individual tool call counts (max 3 per tool) 533 * - Tracking total tool calls (max 10 total) 534 * - Disabling tools when limits are exceeded to break potential loops 535 * 536 * @param array $data The API request data including messages with tool responses 537 * @param bool $toolsCalled Whether tools have already been called (used for loop protection) 538 * @param bool $useTools Whether to process tool calls (used for loop protection) 539 * @return string The final response content 540 */ 541 private function callAPIWithTools($data, $toolsCalled = false, $useTools = true) 542 { 543 // Set up HTTP headers, including authentication if API key is configured 544 $headers = [ 545 'Content-Type: application/json' 546 ]; 547 548 if (!empty($this->api_key)) { 549 $headers[] = 'Authorization: Bearer ' . $this->api_key; 550 } 551 552 // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops 553 if ($toolsCalled) { 554 unset($data['tools']); 555 unset($data['tool_choice']); 556 } 557 558 // Initialize and configure cURL for the API request 559 $ch = curl_init(); 560 curl_setopt($ch, CURLOPT_URL, $this->api_url); 561 curl_setopt($ch, CURLOPT_POST, true); 562 curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); 563 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 564 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 565 curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); 566 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 567 568 // Execute the API request 569 $response = curl_exec($ch); 570 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 571 $error = curl_error($ch); 572 curl_close($ch); 573 574 // Handle cURL errors 575 if ($error) { 576 throw new Exception('API request failed: ' . $error); 577 } 578 579 // Handle HTTP errors 580 if ($httpCode !== 200) { 581 throw new Exception('API request failed with HTTP code: ' . $httpCode); 582 } 583 584 // Parse and validate the JSON response 585 $result = json_decode($response, true); 586 587 // Extract the content from the response if available 588 if (isset($result['choices'][0]['message']['content'])) { 589 $content = trim($result['choices'][0]['message']['content']); 590 // Reset tool call counts when we get final content 591 $this->toolCallCounts = []; 592 return $content; 593 } 594 595 // Handle tool calls if present 596 if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) { 597 $toolCalls = $result['choices'][0]['message']['tool_calls']; 598 // Start with original messages 599 $messages = $data['messages']; 600 // Add assistant's message with tool calls, keeping all original fields except for content (which is null) 601 $assistantMessage = []; 602 foreach ($result['choices'][0]['message'] as $key => $value) { 603 if ($key !== 'content') { 604 $assistantMessage[$key] = $value; 605 } 606 } 607 // Add assistant's message with tool calls 608 $messages[] = $assistantMessage; 609 610 // Process each tool call and track counts to prevent infinite loops 611 foreach ($toolCalls as $toolCall) { 612 $toolName = $toolCall['function']['name']; 613 // Increment tool call count 614 if (!isset($this->toolCallCounts[$toolName])) { 615 $this->toolCallCounts[$toolName] = 0; 616 } 617 $this->toolCallCounts[$toolName]++; 618 619 $toolResponse = $this->handleToolCall($toolCall); 620 $messages[] = $toolResponse; 621 } 622 623 // Check if any tool has been called more than 3 times 624 $toolsCalledCount = 0; 625 foreach ($this->toolCallCounts as $count) { 626 if ($count > 3) { 627 // If any tool called more than 3 times, disable tools to break loop 628 $toolsCalled = true; 629 break; 630 } 631 $toolsCalledCount += $count; 632 } 633 634 // If total tool calls exceed 10, also disable tools 635 if ($toolsCalledCount > 10) { 636 $toolsCalled = true; 637 } 638 639 // Make another API call with tool responses 640 $data['messages'] = $messages; 641 return $this->callAPIWithTools($data, $toolsCalled, $useTools); 642 } 643 644 // Throw exception for unexpected response format 645 throw new Exception('Unexpected API response format'); 646 } 647 648 /** 649 * Load a prompt template from a DokuWiki page and replace placeholders 650 * 651 * Loads prompt templates from DokuWiki pages with IDs in the format 652 * dokullm:prompts:LANGUAGE:PROMPT_NAME 653 * 654 * The method implements a language fallback mechanism: 655 * 1. First tries to load the prompt in the configured language 656 * 2. If not found, falls back to English prompts 657 * 3. Throws an exception if neither is available 658 * 659 * After loading the prompt, it scans for placeholders and automatically 660 * adds missing ones with appropriate values before replacing all placeholders. 661 * 662 * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite') 663 * @param array $variables Associative array of placeholder => value pairs 664 * @return string The processed prompt with placeholders replaced 665 * @throws Exception If the prompt page cannot be loaded in any language 666 */ 667 private function loadPrompt($promptName, $variables = []) 668 { 669 global $conf; 670 $language = $conf['plugin']['dokullm']['language']; 671 672 // Default to 'en' if language is 'default' or not set 673 if ($language === 'default' || empty($language)) { 674 $language = 'en'; 675 } 676 677 // Construct the page ID for the prompt in the configured language 678 $promptPageId = 'dokullm:prompts:' . $language . ':' . $promptName; 679 680 // Try to get the content of the prompt page in the configured language 681 $prompt = $this->getPageContent($promptPageId); 682 683 // If the language-specific prompt doesn't exist, try English as fallback 684 if ($prompt === false && $language !== 'en') { 685 $promptPageId = 'dokullm:prompts:en:' . $promptName; 686 $prompt = $this->getPageContent($promptPageId); 687 } 688 689 // If still no prompt found, throw an exception 690 if ($prompt === false) { 691 throw new Exception('Prompt page not found: ' . $promptPageId); 692 } 693 694 // Find placeholders in the prompt 695 $placeholders = $this->findPlaceholders($prompt); 696 697 // Add missing placeholders with appropriate values 698 foreach ($placeholders as $placeholder) { 699 // Skip if already provided in variables 700 if (isset($variables[$placeholder])) { 701 continue; 702 } 703 704 // Add appropriate values for specific placeholders 705 switch ($placeholder) { 706 case 'template': 707 // If we have a page_template in variables, use it 708 $variables[$placeholder] = $this->getTemplateContent($variables['page_template']); 709 break; 710 711 case 'snippets': 712 $variables[$placeholder] = $this->getSnippets(10); 713 break; 714 715 case 'examples': 716 // If we have example page IDs in metadata, add examples content 717 $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']); 718 break; 719 720 case 'previous': 721 // If we have a previous report page ID in metadata, add previous content 722 $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']); 723 724 // Add current and previous dates to metadata 725 $variables['current_date'] = $this->getPageDate(); 726 $variables['previous_date'] = !empty($variables['page_previous']) ? 727 $this->getPageDate($variables['page_previous']) : 728 ''; 729 break; 730 731 default: 732 // For other placeholders, leave them empty or set a default value 733 $variables[$placeholder] = ''; 734 break; 735 } 736 } 737 738 // Replace placeholders with actual values 739 // Placeholders are in the format {placeholder_name} 740 foreach ($variables as $placeholder => $value) { 741 $prompt = str_replace('{' . $placeholder . '}', $value, $prompt); 742 } 743 744 // Return the processed prompt 745 return $prompt; 746 } 747 748 /** 749 * Load system prompt with optional command-specific appendage 750 * 751 * Loads the main system prompt and appends any command-specific system prompt 752 * if available. 753 * 754 * @param string $action The action/command name 755 * @param array $variables Associative array of placeholder => value pairs 756 * @return string The combined system prompt 757 */ 758 private function loadSystemPrompt($action, $variables = []) 759 { 760 // Load system prompt which provides general instructions to the LLM 761 $systemPrompt = $this->loadPrompt('system', $variables); 762 763 // Check if there's a command-specific system prompt appendage 764 if (!empty($action)) { 765 try { 766 $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables); 767 if ($commandSystemPrompt !== false) { 768 $systemPrompt .= "\n" . $commandSystemPrompt; 769 } 770 } catch (Exception $e) { 771 // Ignore exceptions when loading command-specific system prompt 772 // This allows the main system prompt to still be used 773 } 774 } 775 776 return $systemPrompt; 777 } 778 779 /** 780 * Get the content of a DokuWiki page 781 * 782 * Retrieves the raw content of a DokuWiki page by its ID. 783 * Used for loading template and example page content for context. 784 * 785 * @param string $pageId The page ID to retrieve 786 * @return string|false The page content or false if not found/readable 787 */ 788 public function getPageContent($pageId) 789 { 790 // Convert page ID to file path 791 $pageFile = wikiFN($pageId); 792 793 // Check if file exists and is readable 794 if (file_exists($pageFile) && is_readable($pageFile)) { 795 return file_get_contents($pageFile); 796 } 797 798 return false; 799 } 800 801 /** 802 * Extract date from page ID or file timestamp 803 * 804 * Attempts to extract a date in YYmmdd format from the page ID. 805 * If not found, uses the file's last modification timestamp. 806 * 807 * @param string $pageId Optional page ID to extract date from (defaults to current page) 808 * @return string Formatted date string (YYYY-MM-DD) 809 */ 810 private function getPageDate($pageId = null) 811 { 812 global $ID; 813 814 // Use provided page ID or current page ID 815 $targetPageId = $pageId ?: $ID; 816 817 // Try to extract date from page ID (looking for YYmmdd pattern) 818 if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) { 819 // Convert YYmmdd to YYYY-MM-DD 820 $year = $matches[1]; 821 $month = $matches[2]; 822 $day = $matches[3]; 823 824 // Assume 20xx for years 00-69, 19xx for years 70-99 825 $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year; 826 827 return $fullYear . '-' . $month . '-' . $day; 828 } 829 830 // Fallback to file timestamp 831 $pageFile = wikiFN($targetPageId); 832 if (file_exists($pageFile)) { 833 $timestamp = filemtime($pageFile); 834 return date('Y-m-d', $timestamp); 835 } 836 837 // Return empty string if no date can be determined 838 return ''; 839 } 840 841 /** 842 * Get current text 843 * 844 * Retrieves the current text stored from the process function. 845 * 846 * @return string The current text 847 */ 848 private function getCurrentText() 849 { 850 return $this->currentText; 851 } 852 853 /** 854 * Scan text for placeholders 855 * 856 * Finds all placeholders in the format {placeholder_name} in the provided text 857 * and returns an array of unique placeholder names. 858 * 859 * @param string $text The text to scan for placeholders 860 * @return array List of unique placeholder names found in the text 861 */ 862 public function findPlaceholders($text) 863 { 864 $placeholders = []; 865 $pattern = '/\{([^}]+)\}/'; 866 867 if (preg_match_all($pattern, $text, $matches)) { 868 // Get unique placeholder names 869 $placeholders = array_unique($matches[1]); 870 } 871 872 return $placeholders; 873 } 874 875 /** 876 * Get template content for the current text 877 * 878 * Convenience function to retrieve template content. If a pageId is provided, 879 * retrieves content directly from that page. Otherwise, queries ChromaDB for 880 * a relevant template based on the current text. 881 * 882 * @param string|null $pageId Optional page ID to retrieve template from directly 883 * @return string The template content or empty string if not found 884 */ 885 private function getTemplateContent($pageId = null) 886 { 887 // If pageId is provided, use it directly 888 if ($pageId !== null) { 889 $templateContent = $this->getPageContent($pageId); 890 if ($templateContent !== false) { 891 return $templateContent; 892 } 893 } 894 895 // Otherwise, get template suggestion for the current text 896 $pageId = $this->queryChromaDBTemplate($this->getCurrentText()); 897 if (!empty($pageId)) { 898 $templateContent = $this->getPageContent($pageId[0]); 899 if ($templateContent !== false) { 900 return $templateContent; 901 } 902 } 903 return '( no template )'; 904 } 905 906 /** 907 * Get snippets content for the current text 908 * 909 * Convenience function to retrieve relevant snippets for the current text. 910 * Queries ChromaDB for relevant snippets and returns them formatted. 911 * 912 * @param int $count Number of snippets to retrieve (default: 10) 913 * @return string Formatted snippets content or empty string if not found 914 */ 915 private function getSnippets($count = 10) 916 { 917 // Get example snippets for the current text 918 $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count); 919 if (!empty($snippets)) { 920 $formattedSnippets = []; 921 foreach ($snippets as $index => $snippet) { 922 $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>'; 923 } 924 return implode("\n", $formattedSnippets); 925 } 926 return '( no examples )'; 927 } 928 929 /** 930 * Get examples content from example page IDs 931 * 932 * Convenience function to retrieve content from example pages. 933 * Returns the content of each page packed in XML elements. 934 * 935 * @param array $exampleIds List of example page IDs 936 * @return string Formatted examples content or empty string if not found 937 */ 938 private function getExamplesContent($exampleIds = []) 939 { 940 if (empty($exampleIds) || !is_array($exampleIds)) { 941 return '( no examples )'; 942 } 943 944 $examplesContent = []; 945 foreach ($exampleIds as $index => $exampleId) { 946 $content = $this->getPageContent($exampleId); 947 if ($content !== false) { 948 $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>'; 949 } 950 } 951 952 return implode("\n", $examplesContent); 953 } 954 955 /** 956 * Get previous report content from previous page ID 957 * 958 * Convenience function to retrieve content from a previous report page. 959 * Returns the content of the previous page or a default message if not found. 960 * 961 * @param string $previousId Previous page ID 962 * @return string Previous report content or default message if not found 963 */ 964 private function getPreviousContent($previousId = '') 965 { 966 if (empty($previousId)) { 967 return '( no previous report )'; 968 } 969 970 $content = $this->getPageContent($previousId); 971 if ($content !== false) { 972 return $content; 973 } 974 975 return '( previous report not found )'; 976 } 977 978 /** 979 * Get ChromaDB client with configuration 980 * 981 * Creates and returns a ChromaDB client with the appropriate configuration. 982 * Extracts modality from the current page ID to use as the collection name. 983 * 984 * @return array Array containing the ChromaDB client and collection name 985 */ 986 private function getChromaDBClient() 987 { 988 // Include config.php to get ChromaDB configuration 989 require_once 'config.php'; 990 991 // Get ChromaDB configuration from config.php 992 $chromaHost = defined('CHROMA_HOST') ? CHROMA_HOST : 'localhost'; 993 $chromaPort = defined('CHROMA_PORT') ? CHROMA_PORT : 8000; 994 $chromaTenant = defined('CHROMA_TENANT') ? CHROMA_TENANT : 'dokullm'; 995 $chromaDatabase = defined('CHROMA_DATABASE') ? CHROMA_DATABASE : 'dokullm'; 996 $chromaDefaultCollection = defined('CHROMA_COLLECTION') ? CHROMA_COLLECTION : 'documents'; 997 998 // Use the first part of the current page ID as collection name, fallback to default 999 global $ID; 1000 $chromaCollection = $chromaDefaultCollection; // Default collection name 1001 1002 if (!empty($ID)) { 1003 // Split the page ID by ':' and take the first part as collection name 1004 $parts = explode(':', $ID); 1005 if (isset($parts[0]) && !empty($parts[0])) { 1006 // If the first part is 'playground', use the default collection 1007 // Otherwise, use the first part as the collection name 1008 if ($parts[0] === 'playground') { 1009 $chromaCollection = $chromaDefaultCollection; 1010 } else { 1011 $chromaCollection = $parts[0]; 1012 } 1013 } 1014 } 1015 1016 // Create ChromaDB client 1017 $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient($chromaHost, $chromaPort, $chromaTenant, $chromaDatabase); 1018 1019 1020 return [$chromaClient, $chromaCollection]; 1021 } 1022 1023 /** 1024 * Query ChromaDB for relevant documents 1025 * 1026 * Generates embeddings for the input text and queries ChromaDB for similar documents. 1027 * Extracts modality from the current page ID to use as the collection name. 1028 * 1029 * @param string $text The text to find similar documents for 1030 * @param int $limit Maximum number of documents to retrieve (default: 5) 1031 * @param array|null $where Optional filter conditions for metadata 1032 * @return array List of document IDs 1033 */ 1034 private function queryChromaDB($text, $limit = 5, $where = null) 1035 { 1036 try { 1037 // Get ChromaDB client and collection name 1038 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1039 // Query for similar documents 1040 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1041 1042 // Extract document IDs from results 1043 $documentIds = []; 1044 if (isset($results['ids'][0]) && is_array($results['ids'][0])) { 1045 foreach ($results['ids'][0] as $id) { 1046 // Use the ChromaDB ID directly without conversion 1047 $documentIds[] = $id; 1048 } 1049 } 1050 1051 return $documentIds; 1052 } catch (Exception $e) { 1053 // Log error but don't fail the operation 1054 error_log('ChromaDB query failed: ' . $e->getMessage()); 1055 return []; 1056 } 1057 } 1058 1059 /** 1060 * Query ChromaDB for relevant documents and return text snippets 1061 * 1062 * Generates embeddings for the input text and queries ChromaDB for similar documents. 1063 * Returns the actual text snippets instead of document IDs. 1064 * 1065 * @param string $text The text to find similar documents for 1066 * @param int $limit Maximum number of documents to retrieve (default: 10) 1067 * @param array|null $where Optional filter conditions for metadata 1068 * @return array List of text snippets 1069 */ 1070 private function queryChromaDBSnippets($text, $limit = 10, $where = null) 1071 { 1072 try { 1073 // Get ChromaDB client and collection name 1074 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1075 // Query for similar documents 1076 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1077 1078 // Extract document texts from results 1079 $snippets = []; 1080 if (isset($results['documents'][0]) && is_array($results['documents'][0])) { 1081 foreach ($results['documents'][0] as $document) { 1082 $snippets[] = $document; 1083 } 1084 } 1085 1086 return $snippets; 1087 } catch (Exception $e) { 1088 // Log error but don't fail the operation 1089 error_log('ChromaDB query failed: ' . $e->getMessage()); 1090 return []; 1091 } 1092 } 1093 1094 /** 1095 * Query ChromaDB for a template document 1096 * 1097 * Generates embeddings for the input text and queries ChromaDB for a template document 1098 * by filtering with metadata 'template=true'. 1099 * 1100 * @param string $text The text to find a template for 1101 * @return array List of template document IDs (maximum 1) 1102 */ 1103 public function queryChromaDBTemplate($text) 1104 { 1105 $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']); 1106 1107 // Remove chunk number (e.g., "@2") from the ID to get the base document ID 1108 if (!empty($templateIds)) { 1109 $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]); 1110 } 1111 1112 return $templateIds; 1113 } 1114 1115} 1116