1<?php 2namespace dokuwiki\plugin\dokullm; 3 4/** 5 * LLM Client for the dokullm plugin 6 * 7 * This class provides methods to interact with an LLM API for various 8 * text processing tasks such as completion, rewriting, grammar correction, 9 * summarization, conclusion creation, text analysis, and custom prompts. 10 * 11 * The client handles: 12 * - API configuration and authentication 13 * - Prompt template loading and processing 14 * - Context-aware requests with metadata 15 * - DokuWiki page content retrieval 16 */ 17 18// must be run within Dokuwiki 19if (!defined('DOKU_INC')) { 20 die(); 21} 22 23/** 24 * LLM Client class for handling API communications 25 * 26 * Manages configuration settings and provides methods for various 27 * text processing operations through an LLM API. 28 * Implements caching for tool calls to avoid duplicate processing. 29 */ 30class LlmClient 31{ 32 /** @var string The API endpoint URL */ 33 private $api_url; 34 35 /** @var array Cache for tool call results */ 36 private $toolCallCache = []; 37 38 /** @var string Current text for tool usage */ 39 private $currentText = ''; 40 41 /** @var array Track tool call counts to prevent infinite loops */ 42 private $toolCallCounts = []; 43 44 /** @var string The API authentication key */ 45 private $api_key; 46 47 /** @var string The model identifier to use */ 48 private $model; 49 50 /** @var int The request timeout in seconds */ 51 private $timeout; 52 53 /** @var float The temperature setting for response randomness */ 54 private $temperature; 55 56 /** @var float The top-p setting for nucleus sampling */ 57 private $top_p; 58 59 /** @var int The top-k setting for token selection */ 60 private $top_k; 61 62 /** @var float The min-p setting for minimum probability threshold */ 63 private $min_p; 64 65 /** @var bool Whether to enable thinking in the LLM responses */ 66 private $think; 67 68 /** 69 * Initialize the LLM client with configuration settings 70 * 71 * Retrieves configuration values from DokuWiki's configuration system 72 * for API URL, key, model, timeout, and LLM sampling parameters. 73 * 74 * Configuration values: 75 * - api_url: The LLM API endpoint URL 76 * - api_key: Authentication key for the API (optional) 77 * - model: The model identifier to use for requests 78 * - timeout: Request timeout in seconds 79 * - language: Language code for prompt templates 80 * - temperature: Temperature setting for response randomness (0.0-1.0) 81 * - top_p: Top-p (nucleus sampling) setting (0.0-1.0) 82 * - top_k: Top-k setting (integer >= 1) 83 * - min_p: Minimum probability threshold (0.0-1.0) 84 * - think: Whether to enable thinking in LLM responses (boolean) 85 */ 86 public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null) 87 { 88 $this->api_url = $api_url; 89 $this->api_key = $api_key; 90 $this->model = $model; 91 $this->timeout = $timeout; 92 $this->temperature = $temperature; 93 $this->top_p = $top_p; 94 $this->top_k = $top_k; 95 $this->min_p = $min_p; 96 $this->think = $think; 97 } 98 99 100 101 public function process($action, $language, $text, $metadata = [], $useContext = true) 102 { 103 // Store the current text for tool usage 104 $this->currentText = $text; 105 106 // Add text, think and action to metadata 107 $metadata['text'] = $text; 108 $metadata['think'] = $this->think ? '/think' : '/no_think'; 109 $metadata['action'] = $action; 110 111 // If we have 'template' in metadata, move it to 'page_template' 112 if (isset($metadata['template'])) { 113 $metadata['page_template'] = $metadata['template']; 114 unset($metadata['template']); 115 } 116 117 // If we have 'examples' in metadata, move it to 'page_examples' 118 if (isset($metadata['examples'])) { 119 $metadata['page_examples'] = $metadata['examples']; 120 unset($metadata['examples']); 121 } 122 123 // If we have 'previous' in metadata, move it to 'page_previous' 124 if (isset($metadata['previous'])) { 125 $metadata['page_previous'] = $metadata['previous']; 126 unset($metadata['previous']); 127 } 128 129 $prompt = $this->loadPrompt($action, $language, $metadata); 130 131 return $this->callAPI($action, $language, $prompt, $metadata, $useContext); 132 } 133 134 135 136 /** 137 * Create the provided text using the LLM 138 * 139 * Sends a prompt to the LLM asking it to create the given text. 140 * First queries ChromaDB for relevant documents to include as examples. 141 * If no template is defined, queries ChromaDB for a template. 142 * 143 * @param string $text The text to create 144 * @param array $metadata Optional metadata containing template, examples, and snippets 145 * @param bool $useContext Whether to include template and examples in the context (default: true) 146 * @return string The created text 147 */ 148 public function createReport($text, $metadata = [], $useContext = true, $useTools = false) 149 { 150 // Store the current text for tool usage 151 $this->currentText = $text; 152 153 // Only try to find template and add snippets if tools are not enabled 154 // When tools are enabled, the LLM will call get_template and get_examples as needed 155 if (!$useTools) { 156 // If no template is defined, try to find one using ChromaDB 157 if (empty($metadata['template'])) { 158 $templateResult = $this->queryChromaDBTemplate($text); 159 if (!empty($templateResult)) { 160 // Use the first result as template 161 $metadata['template'] = $templateResult[0]; 162 } 163 } 164 165 // Query ChromaDB for relevant documents to use as examples 166 $chromaResults = $this->queryChromaDBSnippets($text, 10); 167 168 // Add ChromaDB results to metadata as snippets 169 if (!empty($chromaResults)) { 170 // Merge with existing snippets 171 $metadata['snippets'] = array_merge( 172 isset($metadata['snippets']) ? $metadata['snippets'] : [], 173 $chromaResults 174 ); 175 } 176 } 177 178 $think = $this->think ? '/think' : '/no_think'; 179 $prompt = $this->loadPrompt('create', ['text' => $text, 'think' => $think]); 180 181 return $this->callAPI('create', $language, $prompt, $metadata, $useContext); 182 } 183 184 /** 185 * Compare two texts and highlight differences 186 * 187 * Sends a prompt to the LLM asking it to compare two texts and 188 * highlight their similarities and differences. 189 * 190 * @param string $text The current text to compare 191 * @param array $metadata Optional metadata containing template, examples, and previous report reference 192 * @return string The comparison results 193 */ 194 public function compareText($text, $metadata = [], $useContext = false) 195 { 196 // Store the current text for tool usage 197 $this->currentText = $text; 198 199 // Load previous report from metadata if specified 200 $previousText = ''; 201 if (!empty($metadata['previous_report_page'])) { 202 $previousText = $this->getPageContent($metadata['previous_report_page']); 203 if ($previousText === false) { 204 $previousText = ''; 205 } 206 } 207 208 // Extract dates for placeholders 209 $currentDate = $this->getPageDate(); 210 $previousDate = !empty($metadata['previous_report_page']) ? 211 $this->getPageDate($metadata['previous_report_page']) : 212 ''; 213 214 $think = $this->think ? '/think' : '/no_think'; 215 $prompt = $this->loadPrompt('compare', [ 216 'text' => $text, 217 'previous_text' => $previousText, 218 'current_date' => $currentDate, 219 'previous_date' => $previousDate, 220 'think' => $think 221 ]); 222 223 return $this->callAPI('compare', $language, $prompt, $metadata, $useContext); 224 } 225 226 /** 227 * Process text with a custom user prompt 228 * 229 * Sends a custom prompt to the LLM along with the provided text. 230 * 231 * @param string $text The text to process 232 * @param string $customPrompt The custom prompt to use 233 * @param array $metadata Optional metadata containing template and examples 234 * @param bool $useContext Whether to include template and examples in the context (default: true) 235 * @return string The processed text 236 */ 237 public function processCustomPrompt($text, $metadata = [], $useContext = true) 238 { 239 // Store the current text for tool usage 240 $this->currentText = $text; 241 242 // Format the prompt with the text and custom prompt 243 $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text; 244 245 return $this->callAPI('custom', $language, $prompt, $metadata, $useContext); 246 } 247 248 /** 249 * Get the list of available tools for the LLM 250 * 251 * Defines the tools that can be used by the LLM during processing. 252 * 253 * @return array List of tool definitions 254 */ 255 private function getAvailableTools() 256 { 257 return [ 258 [ 259 'type' => 'function', 260 'function' => [ 261 'name' => 'get_document', 262 'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.', 263 'parameters' => [ 264 'type' => 'object', 265 'properties' => [ 266 'id' => [ 267 'type' => 'string', 268 'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.' 269 ] 270 ], 271 'required' => ['id'] 272 ] 273 ] 274 ], 275 [ 276 'type' => 'function', 277 'function' => [ 278 'name' => 'get_template', 279 'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.', 280 'parameters' => [ 281 'type' => 'object', 282 'properties' => [ 283 'language' => [ 284 'type' => 'string', 285 'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).', 286 'default' => 'ro' 287 ] 288 ] 289 ] 290 ] 291 ], 292 [ 293 'type' => 'function', 294 'function' => [ 295 'name' => 'get_examples', 296 'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.', 297 'parameters' => [ 298 'type' => 'object', 299 'properties' => [ 300 'count' => [ 301 'type' => 'integer', 302 'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.', 303 'default' => 5 304 ] 305 ] 306 ] 307 ] 308 ] 309 ]; 310 } 311 312 /** 313 * Call the LLM API with the specified prompt 314 * 315 * Makes an HTTP POST request to the configured API endpoint with 316 * the prompt and other parameters. Handles authentication if an 317 * API key is configured. 318 * 319 * The method constructs a conversation with system and user messages, 320 * including context information from metadata when available. 321 * 322 * Complex logic includes: 323 * 1. Loading and enhancing the system prompt with metadata context 324 * 2. Building the API request with model parameters 325 * 3. Handling authentication with API key if configured 326 * 4. Making the HTTP request with proper error handling 327 * 5. Parsing and validating the API response 328 * 6. Supporting tool usage with automatic tool calling when enabled 329 * 7. Implementing context enhancement with templates, examples, and snippets 330 * 331 * The context information includes: 332 * - Template content: Used as a starting point for the response 333 * - Example pages: Full content of specified example pages 334 * - Text snippets: Relevant text examples from ChromaDB 335 * 336 * When tools are enabled, the method supports automatic tool calling: 337 * - Tools can retrieve documents, templates, and examples as needed 338 * - Tool responses are cached to avoid duplicate calls with identical parameters 339 * - Infinite loop protection prevents excessive tool calls 340 * 341 * @param string $command The command name for loading command-specific system prompts 342 * @param string $prompt The prompt to send to the LLM as user message 343 * @param array $metadata Optional metadata containing template, examples, and snippets 344 * @param bool $useContext Whether to include template and examples in the context (default: true) 345 * @return string The response content from the LLM 346 * @throws Exception If the API request fails or returns unexpected format 347 */ 348 349 private function callAPI($command, $language, $prompt, $metadata = [], $useContext = true, $useTools = false) 350 { 351 // Load system prompt which provides general instructions to the LLM 352 $systemPrompt = $this->loadSystemPrompt($command, $language, []); 353 354 // Enhance the prompt with context information from metadata 355 // This provides the LLM with additional context about templates and examples 356 if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) { 357 $contextInfo = "\n\n<context>\n"; 358 359 // Add template content if specified in metadata 360 if (!empty($metadata['template'])) { 361 $templateContent = $this->getPageContent($metadata['template']); 362 if ($templateContent !== false) { 363 $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n"; 364 } 365 } 366 367 // Add example pages content if specified in metadata 368 if (!empty($metadata['examples'])) { 369 $examplesContent = []; 370 foreach ($metadata['examples'] as $example) { 371 $content = $this->getPageContent($example); 372 if ($content !== false) { 373 $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n"; 374 } 375 } 376 if (!empty($examplesContent)) { 377 $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n"; 378 } 379 } 380 381 // Add text snippets if specified in metadata 382 if (!empty($metadata['snippets'])) { 383 $snippetsContent = []; 384 foreach ($metadata['snippets'] as $index => $snippet) { 385 // These are text snippets from ChromaDB 386 $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n"; 387 } 388 if (!empty($snippetsContent)) { 389 $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n"; 390 } 391 } 392 393 $contextInfo .= "\n</context>\n"; 394 395 // Append context information to system prompt 396 $prompt = $contextInfo . "\n\n" . $prompt; 397 } 398 399 // Prepare API request data with model parameters 400 $data = [ 401 'model' => $this->model, 402 'messages' => [ 403 ['role' => 'system', 'content' => $systemPrompt], 404 ['role' => 'user', 'content' => $prompt] 405 ], 406 'max_tokens' => 6144, 407 'stream' => false, 408 'keep_alive' => '30m', 409 'think' => true 410 ]; 411 412 // Add tools to the request only if useTools is true 413 if ($useTools) { 414 // Define available tools 415 $data['tools'] = $this->getAvailableTools(); 416 $data['tool_choice'] = 'auto'; 417 $data['parallel_tool_calls'] = false; 418 } 419 420 // Only add parameters if they are defined and not null 421 if ($this->temperature !== null) { 422 $data['temperature'] = $this->temperature; 423 } 424 if ($this->top_p !== null) { 425 $data['top_p'] = $this->top_p; 426 } 427 if ($this->top_k !== null) { 428 $data['top_k'] = $this->top_k; 429 } 430 if ($this->min_p !== null) { 431 $data['min_p'] = $this->min_p; 432 } 433 434 // Make an API call with tool responses 435 return $this->callAPIWithTools($data, false); 436 } 437 438 /** 439 * Handle tool calls from the LLM 440 * 441 * Processes tool calls made by the LLM and returns appropriate responses. 442 * Implements caching to avoid duplicate calls with identical parameters. 443 * 444 * @param array $toolCall The tool call data from the LLM 445 * @return array The tool response message 446 */ 447 private function handleToolCall($toolCall) 448 { 449 $toolName = $toolCall['function']['name']; 450 $arguments = json_decode($toolCall['function']['arguments'], true); 451 452 // Create a cache key from the tool name and arguments 453 $cacheKey = md5($toolName . serialize($arguments)); 454 455 // Check if we have a cached result for this tool call 456 if (isset($this->toolCallCache[$cacheKey])) { 457 // Return cached result and indicate it was found in cache 458 $toolResponse = $this->toolCallCache[$cacheKey]; 459 // Update with current tool call ID 460 $toolResponse['tool_call_id'] = $toolCall['id']; 461 $toolResponse['cached'] = true; // Indicate this response was cached 462 return $toolResponse; 463 } 464 465 $toolResponse = [ 466 'role' => 'tool', 467 'tool_call_id' => $toolCall['id'], 468 'cached' => false // Indicate this is a fresh response 469 ]; 470 471 switch ($toolName) { 472 case 'get_document': 473 $documentId = $arguments['id']; 474 $content = $this->getPageContent($documentId); 475 if ($content === false) { 476 $toolResponse['content'] = 'Document not found: ' . $documentId; 477 } else { 478 $toolResponse['content'] = $content; 479 } 480 break; 481 482 case 'get_template': 483 // Get template content using the convenience function 484 $toolResponse['content'] = $this->getTemplateContent(); 485 break; 486 487 case 'get_examples': 488 // Get examples content using the convenience function 489 $count = isset($arguments['count']) ? (int)$arguments['count'] : 5; 490 $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>'; 491 break; 492 493 default: 494 $toolResponse['content'] = 'Unknown tool: ' . $toolName; 495 } 496 497 // Cache the result for future calls with the same parameters 498 $cacheEntry = $toolResponse; 499 // Remove tool_call_id and cached flag from cache as they change per call 500 unset($cacheEntry['tool_call_id']); 501 unset($cacheEntry['cached']); 502 $this->toolCallCache[$cacheKey] = $cacheEntry; 503 504 return $toolResponse; 505 } 506 507 /** 508 * Make an API call with tool responses 509 * 510 * Sends a follow-up request to the LLM with tool responses. 511 * Implements complex logic for handling tool calls with caching and loop protection. 512 * 513 * Complex logic includes: 514 * 1. Making HTTP requests with proper authentication and error handling 515 * 2. Processing tool calls from the LLM response 516 * 3. Caching tool responses to avoid duplicate calls with identical parameters 517 * 4. Tracking tool call counts to prevent infinite loops 518 * 5. Implementing loop protection with call count limits 519 * 6. Handling recursive tool calls until final content is generated 520 * 521 * Loop protection works by: 522 * - Tracking individual tool call counts (max 3 per tool) 523 * - Tracking total tool calls (max 10 total) 524 * - Disabling tools when limits are exceeded to break potential loops 525 * 526 * @param array $data The API request data including messages with tool responses 527 * @param bool $toolsCalled Whether tools have already been called (used for loop protection) 528 * @param bool $useTools Whether to process tool calls (used for loop protection) 529 * @return string The final response content 530 */ 531 private function callAPIWithTools($data, $toolsCalled = false, $useTools = true) 532 { 533 // Set up HTTP headers, including authentication if API key is configured 534 $headers = [ 535 'Content-Type: application/json' 536 ]; 537 538 if (!empty($this->api_key)) { 539 $headers[] = 'Authorization: Bearer ' . $this->api_key; 540 } 541 542 // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops 543 if ($toolsCalled) { 544 unset($data['tools']); 545 unset($data['tool_choice']); 546 } 547 548 // Initialize and configure cURL for the API request 549 $ch = curl_init(); 550 curl_setopt($ch, CURLOPT_URL, $this->api_url); 551 curl_setopt($ch, CURLOPT_POST, true); 552 curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); 553 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 554 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 555 curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); 556 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 557 558 // Execute the API request 559 $response = curl_exec($ch); 560 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 561 $error = curl_error($ch); 562 curl_close($ch); 563 564 // Handle cURL errors 565 if ($error) { 566 throw new Exception('API request failed: ' . $error); 567 } 568 569 // Handle HTTP errors 570 if ($httpCode !== 200) { 571 throw new Exception('API request failed with HTTP code: ' . $httpCode); 572 } 573 574 // Parse and validate the JSON response 575 $result = json_decode($response, true); 576 577 // Extract the content from the response if available 578 if (isset($result['choices'][0]['message']['content'])) { 579 $content = trim($result['choices'][0]['message']['content']); 580 // Reset tool call counts when we get final content 581 $this->toolCallCounts = []; 582 return $content; 583 } 584 585 // Handle tool calls if present 586 if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) { 587 $toolCalls = $result['choices'][0]['message']['tool_calls']; 588 // Start with original messages 589 $messages = $data['messages']; 590 // Add assistant's message with tool calls, keeping all original fields except for content (which is null) 591 $assistantMessage = []; 592 foreach ($result['choices'][0]['message'] as $key => $value) { 593 if ($key !== 'content') { 594 $assistantMessage[$key] = $value; 595 } 596 } 597 // Add assistant's message with tool calls 598 $messages[] = $assistantMessage; 599 600 // Process each tool call and track counts to prevent infinite loops 601 foreach ($toolCalls as $toolCall) { 602 $toolName = $toolCall['function']['name']; 603 // Increment tool call count 604 if (!isset($this->toolCallCounts[$toolName])) { 605 $this->toolCallCounts[$toolName] = 0; 606 } 607 $this->toolCallCounts[$toolName]++; 608 609 $toolResponse = $this->handleToolCall($toolCall); 610 $messages[] = $toolResponse; 611 } 612 613 // Check if any tool has been called more than 3 times 614 $toolsCalledCount = 0; 615 foreach ($this->toolCallCounts as $count) { 616 if ($count > 3) { 617 // If any tool called more than 3 times, disable tools to break loop 618 $toolsCalled = true; 619 break; 620 } 621 $toolsCalledCount += $count; 622 } 623 624 // If total tool calls exceed 10, also disable tools 625 if ($toolsCalledCount > 10) { 626 $toolsCalled = true; 627 } 628 629 // Make another API call with tool responses 630 $data['messages'] = $messages; 631 return $this->callAPIWithTools($data, $toolsCalled, $useTools); 632 } 633 634 // Throw exception for unexpected response format 635 throw new Exception('Unexpected API response format'); 636 } 637 638 /** 639 * Load a prompt template from a DokuWiki page and replace placeholders 640 * 641 * Loads prompt templates from DokuWiki pages with IDs in the format 642 * dokullm:prompts:LANGUAGE:PROMPT_NAME 643 * 644 * The method implements a language fallback mechanism: 645 * 1. First tries to load the prompt in the configured language 646 * 2. If not found, falls back to English prompts 647 * 3. Throws an exception if neither is available 648 * 649 * After loading the prompt, it scans for placeholders and automatically 650 * adds missing ones with appropriate values before replacing all placeholders. 651 * 652 * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite') 653 * @param array $variables Associative array of placeholder => value pairs 654 * @return string The processed prompt with placeholders replaced 655 * @throws Exception If the prompt page cannot be loaded in any language 656 */ 657 private function loadPrompt($promptName, $language, $variables = []) 658 { 659 // Default to 'en' if language is 'default' or not set 660 if ($language === 'default' || empty($language)) { 661 $language = 'en'; 662 } 663 664 // Construct the page ID for the prompt in the configured language 665 $promptPageId = 'dokullm:prompts:' . $language . ':' . $promptName; 666 667 // Try to get the content of the prompt page in the configured language 668 $prompt = $this->getPageContent($promptPageId); 669 670 // If the language-specific prompt doesn't exist, try English as fallback 671 if ($prompt === false && $language !== 'en') { 672 $promptPageId = 'dokullm:prompts:en:' . $promptName; 673 $prompt = $this->getPageContent($promptPageId); 674 } 675 676 // If still no prompt found, throw an exception 677 if ($prompt === false) { 678 throw new Exception('Prompt page not found: ' . $promptPageId); 679 } 680 681 // Find placeholders in the prompt 682 $placeholders = $this->findPlaceholders($prompt); 683 684 // Add missing placeholders with appropriate values 685 foreach ($placeholders as $placeholder) { 686 // Skip if already provided in variables 687 if (isset($variables[$placeholder])) { 688 continue; 689 } 690 691 // Add appropriate values for specific placeholders 692 switch ($placeholder) { 693 case 'template': 694 // If we have a page_template in variables, use it 695 $variables[$placeholder] = $this->getTemplateContent($variables['page_template']); 696 break; 697 698 case 'snippets': 699 $variables[$placeholder] = $this->getSnippets(10); 700 break; 701 702 case 'examples': 703 // If we have example page IDs in metadata, add examples content 704 $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']); 705 break; 706 707 case 'previous': 708 // If we have a previous report page ID in metadata, add previous content 709 $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']); 710 711 // Add current and previous dates to metadata 712 $variables['current_date'] = $this->getPageDate(); 713 $variables['previous_date'] = !empty($variables['page_previous']) ? 714 $this->getPageDate($variables['page_previous']) : 715 ''; 716 break; 717 718 default: 719 // For other placeholders, leave them empty or set a default value 720 $variables[$placeholder] = ''; 721 break; 722 } 723 } 724 725 // Replace placeholders with actual values 726 // Placeholders are in the format {placeholder_name} 727 foreach ($variables as $placeholder => $value) { 728 $prompt = str_replace('{' . $placeholder . '}', $value, $prompt); 729 } 730 731 // Return the processed prompt 732 return $prompt; 733 } 734 735 /** 736 * Load system prompt with optional command-specific appendage 737 * 738 * Loads the main system prompt and appends any command-specific system prompt 739 * if available. 740 * 741 * @param string $action The action/command name 742 * @param array $variables Associative array of placeholder => value pairs 743 * @return string The combined system prompt 744 */ 745 private function loadSystemPrompt($action, $language, $variables = []) 746 { 747 // Load system prompt which provides general instructions to the LLM 748 $systemPrompt = $this->loadPrompt('system', $language, $variables); 749 750 // Check if there's a command-specific system prompt appendage 751 if (!empty($action)) { 752 try { 753 $commandSystemPrompt = $this->loadPrompt($action . ':system', $language, $variables); 754 if ($commandSystemPrompt !== false) { 755 $systemPrompt .= "\n" . $commandSystemPrompt; 756 } 757 } catch (Exception $e) { 758 // Ignore exceptions when loading command-specific system prompt 759 // This allows the main system prompt to still be used 760 } 761 } 762 763 return $systemPrompt; 764 } 765 766 /** 767 * Get the content of a DokuWiki page 768 * 769 * Retrieves the raw content of a DokuWiki page by its ID. 770 * Used for loading template and example page content for context. 771 * 772 * @param string $pageId The page ID to retrieve 773 * @return string|false The page content or false if not found/readable 774 */ 775 public function getPageContent($pageId) 776 { 777 // Convert page ID to file path 778 $pageFile = wikiFN($pageId); 779 780 // Check if file exists and is readable 781 if (file_exists($pageFile) && is_readable($pageFile)) { 782 return file_get_contents($pageFile); 783 } 784 785 return false; 786 } 787 788 /** 789 * Extract date from page ID or file timestamp 790 * 791 * Attempts to extract a date in YYmmdd format from the page ID. 792 * If not found, uses the file's last modification timestamp. 793 * 794 * @param string $pageId Optional page ID to extract date from (defaults to current page) 795 * @return string Formatted date string (YYYY-MM-DD) 796 */ 797 private function getPageDate($pageId = null) 798 { 799 global $ID; 800 801 // Use provided page ID or current page ID 802 $targetPageId = $pageId ?: $ID; 803 804 // Try to extract date from page ID (looking for YYmmdd pattern) 805 if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) { 806 // Convert YYmmdd to YYYY-MM-DD 807 $year = $matches[1]; 808 $month = $matches[2]; 809 $day = $matches[3]; 810 811 // Assume 20xx for years 00-69, 19xx for years 70-99 812 $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year; 813 814 return $fullYear . '-' . $month . '-' . $day; 815 } 816 817 // Fallback to file timestamp 818 $pageFile = wikiFN($targetPageId); 819 if (file_exists($pageFile)) { 820 $timestamp = filemtime($pageFile); 821 return date('Y-m-d', $timestamp); 822 } 823 824 // Return empty string if no date can be determined 825 return ''; 826 } 827 828 /** 829 * Get current text 830 * 831 * Retrieves the current text stored from the process function. 832 * 833 * @return string The current text 834 */ 835 private function getCurrentText() 836 { 837 return $this->currentText; 838 } 839 840 /** 841 * Scan text for placeholders 842 * 843 * Finds all placeholders in the format {placeholder_name} in the provided text 844 * and returns an array of unique placeholder names. 845 * 846 * @param string $text The text to scan for placeholders 847 * @return array List of unique placeholder names found in the text 848 */ 849 public function findPlaceholders($text) 850 { 851 $placeholders = []; 852 $pattern = '/\{([^}]+)\}/'; 853 854 if (preg_match_all($pattern, $text, $matches)) { 855 // Get unique placeholder names 856 $placeholders = array_unique($matches[1]); 857 } 858 859 return $placeholders; 860 } 861 862 /** 863 * Get template content for the current text 864 * 865 * Convenience function to retrieve template content. If a pageId is provided, 866 * retrieves content directly from that page. Otherwise, queries ChromaDB for 867 * a relevant template based on the current text. 868 * 869 * @param string|null $pageId Optional page ID to retrieve template from directly 870 * @return string The template content or empty string if not found 871 */ 872 private function getTemplateContent($pageId = null) 873 { 874 // If pageId is provided, use it directly 875 if ($pageId !== null) { 876 $templateContent = $this->getPageContent($pageId); 877 if ($templateContent !== false) { 878 return $templateContent; 879 } 880 } 881 882 // Otherwise, get template suggestion for the current text 883 $pageId = $this->queryChromaDBTemplate($this->getCurrentText()); 884 if (!empty($pageId)) { 885 $templateContent = $this->getPageContent($pageId[0]); 886 if ($templateContent !== false) { 887 return $templateContent; 888 } 889 } 890 return '( no template )'; 891 } 892 893 /** 894 * Get snippets content for the current text 895 * 896 * Convenience function to retrieve relevant snippets for the current text. 897 * Queries ChromaDB for relevant snippets and returns them formatted. 898 * 899 * @param int $count Number of snippets to retrieve (default: 10) 900 * @return string Formatted snippets content or empty string if not found 901 */ 902 private function getSnippets($count = 10) 903 { 904 // Get example snippets for the current text 905 $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count); 906 if (!empty($snippets)) { 907 $formattedSnippets = []; 908 foreach ($snippets as $index => $snippet) { 909 $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>'; 910 } 911 return implode("\n", $formattedSnippets); 912 } 913 return '( no examples )'; 914 } 915 916 /** 917 * Get examples content from example page IDs 918 * 919 * Convenience function to retrieve content from example pages. 920 * Returns the content of each page packed in XML elements. 921 * 922 * @param array $exampleIds List of example page IDs 923 * @return string Formatted examples content or empty string if not found 924 */ 925 private function getExamplesContent($exampleIds = []) 926 { 927 if (empty($exampleIds) || !is_array($exampleIds)) { 928 return '( no examples )'; 929 } 930 931 $examplesContent = []; 932 foreach ($exampleIds as $index => $exampleId) { 933 $content = $this->getPageContent($exampleId); 934 if ($content !== false) { 935 $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>'; 936 } 937 } 938 939 return implode("\n", $examplesContent); 940 } 941 942 /** 943 * Get previous report content from previous page ID 944 * 945 * Convenience function to retrieve content from a previous report page. 946 * Returns the content of the previous page or a default message if not found. 947 * 948 * @param string $previousId Previous page ID 949 * @return string Previous report content or default message if not found 950 */ 951 private function getPreviousContent($previousId = '') 952 { 953 if (empty($previousId)) { 954 return '( no previous report )'; 955 } 956 957 $content = $this->getPageContent($previousId); 958 if ($content !== false) { 959 return $content; 960 } 961 962 return '( previous report not found )'; 963 } 964 965 /** 966 * Get ChromaDB client with configuration 967 * 968 * Creates and returns a ChromaDB client with the appropriate configuration. 969 * Extracts modality from the current page ID to use as the collection name. 970 * 971 * @return array Array containing the ChromaDB client and collection name 972 */ 973 private function getChromaDBClient() 974 { 975 // Get ChromaDB configuration from DokuWiki plugin configuration 976 $chromaHost = $this->getConf('chroma_host', 'localhost'); 977 $chromaPort = $this->getConf('chroma_port', 8000); 978 $chromaTenant = $this->getConf('chroma_tenant', 'dokullm'); 979 $chromaDatabase = $this->getConf('chroma_database', 'dokullm'); 980 $chromaDefaultCollection = $this->getConf('chroma_collection', 'documents'); 981 $ollamaHost = $this->getConf('ollama_host', 'localhost'); 982 $ollamaPort = $this->getConf('ollama_port', 11434); 983 $ollamaModel = $this->getConf('ollama_embeddings_model', 'nomic-embed-text'); 984 985 // Use the first part of the current page ID as collection name, fallback to default 986 global $ID; 987 $chromaCollection = $chromaDefaultCollection; // Default collection name 988 989 if (!empty($ID)) { 990 // Split the page ID by ':' and take the first part as collection name 991 $parts = explode(':', $ID); 992 if (isset($parts[0]) && !empty($parts[0])) { 993 // If the first part is 'playground', use the default collection 994 // Otherwise, use the first part as the collection name 995 if ($parts[0] === 'playground') { 996 $chromaCollection = $chromaDefaultCollection; 997 } else { 998 $chromaCollection = $parts[0]; 999 } 1000 } 1001 } 1002 1003 // Create ChromaDB client with all required parameters 1004 $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient( 1005 $chromaHost, 1006 $chromaPort, 1007 $chromaTenant, 1008 $chromaDatabase, 1009 $ollamaHost, 1010 $ollamaPort, 1011 $ollamaModel 1012 ); 1013 1014 1015 return [$chromaClient, $chromaCollection]; 1016 } 1017 1018 /** 1019 * Query ChromaDB for relevant documents 1020 * 1021 * Generates embeddings for the input text and queries ChromaDB for similar documents. 1022 * Extracts modality from the current page ID to use as the collection name. 1023 * 1024 * @param string $text The text to find similar documents for 1025 * @param int $limit Maximum number of documents to retrieve (default: 5) 1026 * @param array|null $where Optional filter conditions for metadata 1027 * @return array List of document IDs 1028 */ 1029 private function queryChromaDB($text, $limit = 5, $where = null) 1030 { 1031 try { 1032 // Get ChromaDB client and collection name 1033 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1034 // Query for similar documents 1035 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1036 1037 // Extract document IDs from results 1038 $documentIds = []; 1039 if (isset($results['ids'][0]) && is_array($results['ids'][0])) { 1040 foreach ($results['ids'][0] as $id) { 1041 // Use the ChromaDB ID directly without conversion 1042 $documentIds[] = $id; 1043 } 1044 } 1045 1046 return $documentIds; 1047 } catch (Exception $e) { 1048 // Log error but don't fail the operation 1049 error_log('ChromaDB query failed: ' . $e->getMessage()); 1050 return []; 1051 } 1052 } 1053 1054 /** 1055 * Query ChromaDB for relevant documents and return text snippets 1056 * 1057 * Generates embeddings for the input text and queries ChromaDB for similar documents. 1058 * Returns the actual text snippets instead of document IDs. 1059 * 1060 * @param string $text The text to find similar documents for 1061 * @param int $limit Maximum number of documents to retrieve (default: 10) 1062 * @param array|null $where Optional filter conditions for metadata 1063 * @return array List of text snippets 1064 */ 1065 private function queryChromaDBSnippets($text, $limit = 10, $where = null) 1066 { 1067 try { 1068 // Get ChromaDB client and collection name 1069 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1070 // Query for similar documents 1071 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1072 1073 // Extract document texts from results 1074 $snippets = []; 1075 if (isset($results['documents'][0]) && is_array($results['documents'][0])) { 1076 foreach ($results['documents'][0] as $document) { 1077 $snippets[] = $document; 1078 } 1079 } 1080 1081 return $snippets; 1082 } catch (Exception $e) { 1083 // Log error but don't fail the operation 1084 error_log('ChromaDB query failed: ' . $e->getMessage()); 1085 return []; 1086 } 1087 } 1088 1089 /** 1090 * Query ChromaDB for a template document 1091 * 1092 * Generates embeddings for the input text and queries ChromaDB for a template document 1093 * by filtering with metadata 'template=true'. 1094 * 1095 * @param string $text The text to find a template for 1096 * @return array List of template document IDs (maximum 1) 1097 */ 1098 public function queryChromaDBTemplate($text) 1099 { 1100 $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']); 1101 1102 // Remove chunk number (e.g., "@2") from the ID to get the base document ID 1103 if (!empty($templateIds)) { 1104 $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]); 1105 } 1106 1107 return $templateIds; 1108 } 1109 1110} 1111