1<?php 2namespace dokuwiki\plugin\dokullm; 3 4/** 5 * LLM Client for the dokullm plugin 6 * 7 * This class provides methods to interact with an LLM API for various 8 * text processing tasks such as completion, rewriting, grammar correction, 9 * summarization, conclusion creation, text analysis, and custom prompts. 10 * 11 * The client handles: 12 * - API configuration and authentication 13 * - Prompt template loading and processing 14 * - Context-aware requests with metadata 15 * - DokuWiki page content retrieval 16 */ 17 18// must be run within Dokuwiki 19if (!defined('DOKU_INC')) { 20 die(); 21} 22 23/** 24 * LLM Client class for handling API communications 25 * 26 * Manages configuration settings and provides methods for various 27 * text processing operations through an LLM API. 28 * Implements caching for tool calls to avoid duplicate processing. 29 */ 30class LlmClient 31{ 32 /** @var string The API endpoint URL */ 33 private $api_url; 34 35 /** @var array Cache for tool call results */ 36 private $toolCallCache = []; 37 38 /** @var string Current text for tool usage */ 39 private $currentText = ''; 40 41 /** @var array Track tool call counts to prevent infinite loops */ 42 private $toolCallCounts = []; 43 44 /** @var string The API authentication key */ 45 private $api_key; 46 47 /** @var string The model identifier to use */ 48 private $model; 49 50 /** @var int The request timeout in seconds */ 51 private $timeout; 52 53 /** @var float The temperature setting for response randomness */ 54 private $temperature; 55 56 /** @var float The top-p setting for nucleus sampling */ 57 private $top_p; 58 59 /** @var int The top-k setting for token selection */ 60 private $top_k; 61 62 /** @var float The min-p setting for minimum probability threshold */ 63 private $min_p; 64 65 /** @var bool Whether to enable thinking in the LLM responses */ 66 private $think; 67 68 /** 69 * Initialize the LLM client with configuration settings 70 * 71 * Retrieves configuration values from DokuWiki's configuration system 72 * for API URL, key, model, timeout, and LLM sampling parameters. 73 * 74 * Configuration values: 75 * - api_url: The LLM API endpoint URL 76 * - api_key: Authentication key for the API (optional) 77 * - model: The model identifier to use for requests 78 * - timeout: Request timeout in seconds 79 * - language: Language code for prompt templates 80 * - temperature: Temperature setting for response randomness (0.0-1.0) 81 * - top_p: Top-p (nucleus sampling) setting (0.0-1.0) 82 * - top_k: Top-k setting (integer >= 1) 83 * - min_p: Minimum probability threshold (0.0-1.0) 84 * - think: Whether to enable thinking in LLM responses (boolean) 85 */ 86 public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $language = null) 87 { 88 $this->api_url = $api_url; 89 $this->api_key = $api_key; 90 $this->model = $model; 91 $this->timeout = $timeout; 92 $this->temperature = $temperature; 93 $this->top_p = $top_p; 94 $this->top_k = $top_k; 95 $this->min_p = $min_p; 96 $this->think = $think; 97 $this->language = $language; 98 } 99 100 101 102 public function process($action, $text, $metadata = [], $useContext = true) 103 { 104 // Store the current text for tool usage 105 $this->currentText = $text; 106 107 // Add text, think and action to metadata 108 $metadata['text'] = $text; 109 $metadata['think'] = $this->think ? '/think' : '/no_think'; 110 $metadata['action'] = $action; 111 112 // If we have 'template' in metadata, move it to 'page_template' 113 if (isset($metadata['template'])) { 114 $metadata['page_template'] = $metadata['template']; 115 unset($metadata['template']); 116 } 117 118 // If we have 'examples' in metadata, move it to 'page_examples' 119 if (isset($metadata['examples'])) { 120 $metadata['page_examples'] = $metadata['examples']; 121 unset($metadata['examples']); 122 } 123 124 // If we have 'previous' in metadata, move it to 'page_previous' 125 if (isset($metadata['previous'])) { 126 $metadata['page_previous'] = $metadata['previous']; 127 unset($metadata['previous']); 128 } 129 130 $prompt = $this->loadPrompt($action, $this->language, $metadata); 131 132 return $this->callAPI($action, $this->language, $prompt, $metadata, $useContext); 133 } 134 135 136 137 /** 138 * Create the provided text using the LLM 139 * 140 * Sends a prompt to the LLM asking it to create the given text. 141 * First queries ChromaDB for relevant documents to include as examples. 142 * If no template is defined, queries ChromaDB for a template. 143 * 144 * @param string $text The text to create 145 * @param array $metadata Optional metadata containing template, examples, and snippets 146 * @param bool $useContext Whether to include template and examples in the context (default: true) 147 * @return string The created text 148 */ 149 public function createReport($text, $metadata = [], $useContext = true, $useTools = false) 150 { 151 // Store the current text for tool usage 152 $this->currentText = $text; 153 154 // Only try to find template and add snippets if tools are not enabled 155 // When tools are enabled, the LLM will call get_template and get_examples as needed 156 if (!$useTools) { 157 // If no template is defined, try to find one using ChromaDB 158 if (empty($metadata['template'])) { 159 $templateResult = $this->queryChromaDBTemplate($text); 160 if (!empty($templateResult)) { 161 // Use the first result as template 162 $metadata['template'] = $templateResult[0]; 163 } 164 } 165 166 // Query ChromaDB for relevant documents to use as examples 167 $chromaResults = $this->queryChromaDBSnippets($text, 10); 168 169 // Add ChromaDB results to metadata as snippets 170 if (!empty($chromaResults)) { 171 // Merge with existing snippets 172 $metadata['snippets'] = array_merge( 173 isset($metadata['snippets']) ? $metadata['snippets'] : [], 174 $chromaResults 175 ); 176 } 177 } 178 179 $think = $this->think ? '/think' : '/no_think'; 180 $prompt = $this->loadPrompt('create', ['text' => $text, 'think' => $think]); 181 182 return $this->callAPI('create', $this->language, $prompt, $metadata, $useContext); 183 } 184 185 /** 186 * Compare two texts and highlight differences 187 * 188 * Sends a prompt to the LLM asking it to compare two texts and 189 * highlight their similarities and differences. 190 * 191 * @param string $text The current text to compare 192 * @param array $metadata Optional metadata containing template, examples, and previous report reference 193 * @return string The comparison results 194 */ 195 public function compareText($text, $metadata = [], $useContext = false) 196 { 197 // Store the current text for tool usage 198 $this->currentText = $text; 199 200 // Load previous report from metadata if specified 201 $previousText = ''; 202 if (!empty($metadata['previous_report_page'])) { 203 $previousText = $this->getPageContent($metadata['previous_report_page']); 204 if ($previousText === false) { 205 $previousText = ''; 206 } 207 } 208 209 // Extract dates for placeholders 210 $currentDate = $this->getPageDate(); 211 $previousDate = !empty($metadata['previous_report_page']) ? 212 $this->getPageDate($metadata['previous_report_page']) : 213 ''; 214 215 $think = $this->think ? '/think' : '/no_think'; 216 $prompt = $this->loadPrompt('compare', [ 217 'text' => $text, 218 'previous_text' => $previousText, 219 'current_date' => $currentDate, 220 'previous_date' => $previousDate, 221 'think' => $think 222 ]); 223 224 return $this->callAPI('compare', $this->language, $prompt, $metadata, $useContext); 225 } 226 227 /** 228 * Process text with a custom user prompt 229 * 230 * Sends a custom prompt to the LLM along with the provided text. 231 * 232 * @param string $text The text to process 233 * @param string $customPrompt The custom prompt to use 234 * @param array $metadata Optional metadata containing template and examples 235 * @param bool $useContext Whether to include template and examples in the context (default: true) 236 * @return string The processed text 237 */ 238 public function processCustomPrompt($text, $metadata = [], $useContext = true) 239 { 240 // Store the current text for tool usage 241 $this->currentText = $text; 242 243 // Format the prompt with the text and custom prompt 244 $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text; 245 246 return $this->callAPI('custom', $this->language, $prompt, $metadata, $useContext); 247 } 248 249 /** 250 * Get the list of available tools for the LLM 251 * 252 * Defines the tools that can be used by the LLM during processing. 253 * 254 * @return array List of tool definitions 255 */ 256 private function getAvailableTools() 257 { 258 return [ 259 [ 260 'type' => 'function', 261 'function' => [ 262 'name' => 'get_document', 263 'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.', 264 'parameters' => [ 265 'type' => 'object', 266 'properties' => [ 267 'id' => [ 268 'type' => 'string', 269 'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.' 270 ] 271 ], 272 'required' => ['id'] 273 ] 274 ] 275 ], 276 [ 277 'type' => 'function', 278 'function' => [ 279 'name' => 'get_template', 280 'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.', 281 'parameters' => [ 282 'type' => 'object', 283 'properties' => [ 284 'language' => [ 285 'type' => 'string', 286 'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).', 287 'default' => 'ro' 288 ] 289 ] 290 ] 291 ] 292 ], 293 [ 294 'type' => 'function', 295 'function' => [ 296 'name' => 'get_examples', 297 'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.', 298 'parameters' => [ 299 'type' => 'object', 300 'properties' => [ 301 'count' => [ 302 'type' => 'integer', 303 'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.', 304 'default' => 5 305 ] 306 ] 307 ] 308 ] 309 ] 310 ]; 311 } 312 313 /** 314 * Call the LLM API with the specified prompt 315 * 316 * Makes an HTTP POST request to the configured API endpoint with 317 * the prompt and other parameters. Handles authentication if an 318 * API key is configured. 319 * 320 * The method constructs a conversation with system and user messages, 321 * including context information from metadata when available. 322 * 323 * Complex logic includes: 324 * 1. Loading and enhancing the system prompt with metadata context 325 * 2. Building the API request with model parameters 326 * 3. Handling authentication with API key if configured 327 * 4. Making the HTTP request with proper error handling 328 * 5. Parsing and validating the API response 329 * 6. Supporting tool usage with automatic tool calling when enabled 330 * 7. Implementing context enhancement with templates, examples, and snippets 331 * 332 * The context information includes: 333 * - Template content: Used as a starting point for the response 334 * - Example pages: Full content of specified example pages 335 * - Text snippets: Relevant text examples from ChromaDB 336 * 337 * When tools are enabled, the method supports automatic tool calling: 338 * - Tools can retrieve documents, templates, and examples as needed 339 * - Tool responses are cached to avoid duplicate calls with identical parameters 340 * - Infinite loop protection prevents excessive tool calls 341 * 342 * @param string $command The command name for loading command-specific system prompts 343 * @param string $prompt The prompt to send to the LLM as user message 344 * @param array $metadata Optional metadata containing template, examples, and snippets 345 * @param bool $useContext Whether to include template and examples in the context (default: true) 346 * @return string The response content from the LLM 347 * @throws Exception If the API request fails or returns unexpected format 348 */ 349 350 private function callAPI($command, $language, $prompt, $metadata = [], $useContext = true, $useTools = false) 351 { 352 // Load system prompt which provides general instructions to the LLM 353 $systemPrompt = $this->loadSystemPrompt($command, $language, []); 354 355 // Enhance the prompt with context information from metadata 356 // This provides the LLM with additional context about templates and examples 357 if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) { 358 $contextInfo = "\n\n<context>\n"; 359 360 // Add template content if specified in metadata 361 if (!empty($metadata['template'])) { 362 $templateContent = $this->getPageContent($metadata['template']); 363 if ($templateContent !== false) { 364 $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n"; 365 } 366 } 367 368 // Add example pages content if specified in metadata 369 if (!empty($metadata['examples'])) { 370 $examplesContent = []; 371 foreach ($metadata['examples'] as $example) { 372 $content = $this->getPageContent($example); 373 if ($content !== false) { 374 $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n"; 375 } 376 } 377 if (!empty($examplesContent)) { 378 $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n"; 379 } 380 } 381 382 // Add text snippets if specified in metadata 383 if (!empty($metadata['snippets'])) { 384 $snippetsContent = []; 385 foreach ($metadata['snippets'] as $index => $snippet) { 386 // These are text snippets from ChromaDB 387 $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n"; 388 } 389 if (!empty($snippetsContent)) { 390 $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n"; 391 } 392 } 393 394 $contextInfo .= "\n</context>\n"; 395 396 // Append context information to system prompt 397 $prompt = $contextInfo . "\n\n" . $prompt; 398 } 399 400 // Prepare API request data with model parameters 401 $data = [ 402 'model' => $this->model, 403 'messages' => [ 404 ['role' => 'system', 'content' => $systemPrompt], 405 ['role' => 'user', 'content' => $prompt] 406 ], 407 'max_tokens' => 6144, 408 'stream' => false, 409 'keep_alive' => '30m', 410 'think' => true 411 ]; 412 413 // Add tools to the request only if useTools is true 414 if ($useTools) { 415 // Define available tools 416 $data['tools'] = $this->getAvailableTools(); 417 $data['tool_choice'] = 'auto'; 418 $data['parallel_tool_calls'] = false; 419 } 420 421 // Only add parameters if they are defined and not null 422 if ($this->temperature !== null) { 423 $data['temperature'] = $this->temperature; 424 } 425 if ($this->top_p !== null) { 426 $data['top_p'] = $this->top_p; 427 } 428 if ($this->top_k !== null) { 429 $data['top_k'] = $this->top_k; 430 } 431 if ($this->min_p !== null) { 432 $data['min_p'] = $this->min_p; 433 } 434 435 // Make an API call with tool responses 436 return $this->callAPIWithTools($data, false); 437 } 438 439 /** 440 * Handle tool calls from the LLM 441 * 442 * Processes tool calls made by the LLM and returns appropriate responses. 443 * Implements caching to avoid duplicate calls with identical parameters. 444 * 445 * @param array $toolCall The tool call data from the LLM 446 * @return array The tool response message 447 */ 448 private function handleToolCall($toolCall) 449 { 450 $toolName = $toolCall['function']['name']; 451 $arguments = json_decode($toolCall['function']['arguments'], true); 452 453 // Create a cache key from the tool name and arguments 454 $cacheKey = md5($toolName . serialize($arguments)); 455 456 // Check if we have a cached result for this tool call 457 if (isset($this->toolCallCache[$cacheKey])) { 458 // Return cached result and indicate it was found in cache 459 $toolResponse = $this->toolCallCache[$cacheKey]; 460 // Update with current tool call ID 461 $toolResponse['tool_call_id'] = $toolCall['id']; 462 $toolResponse['cached'] = true; // Indicate this response was cached 463 return $toolResponse; 464 } 465 466 $toolResponse = [ 467 'role' => 'tool', 468 'tool_call_id' => $toolCall['id'], 469 'cached' => false // Indicate this is a fresh response 470 ]; 471 472 switch ($toolName) { 473 case 'get_document': 474 $documentId = $arguments['id']; 475 $content = $this->getPageContent($documentId); 476 if ($content === false) { 477 $toolResponse['content'] = 'Document not found: ' . $documentId; 478 } else { 479 $toolResponse['content'] = $content; 480 } 481 break; 482 483 case 'get_template': 484 // Get template content using the convenience function 485 $toolResponse['content'] = $this->getTemplateContent(); 486 break; 487 488 case 'get_examples': 489 // Get examples content using the convenience function 490 $count = isset($arguments['count']) ? (int)$arguments['count'] : 5; 491 $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>'; 492 break; 493 494 default: 495 $toolResponse['content'] = 'Unknown tool: ' . $toolName; 496 } 497 498 // Cache the result for future calls with the same parameters 499 $cacheEntry = $toolResponse; 500 // Remove tool_call_id and cached flag from cache as they change per call 501 unset($cacheEntry['tool_call_id']); 502 unset($cacheEntry['cached']); 503 $this->toolCallCache[$cacheKey] = $cacheEntry; 504 505 return $toolResponse; 506 } 507 508 /** 509 * Make an API call with tool responses 510 * 511 * Sends a follow-up request to the LLM with tool responses. 512 * Implements complex logic for handling tool calls with caching and loop protection. 513 * 514 * Complex logic includes: 515 * 1. Making HTTP requests with proper authentication and error handling 516 * 2. Processing tool calls from the LLM response 517 * 3. Caching tool responses to avoid duplicate calls with identical parameters 518 * 4. Tracking tool call counts to prevent infinite loops 519 * 5. Implementing loop protection with call count limits 520 * 6. Handling recursive tool calls until final content is generated 521 * 522 * Loop protection works by: 523 * - Tracking individual tool call counts (max 3 per tool) 524 * - Tracking total tool calls (max 10 total) 525 * - Disabling tools when limits are exceeded to break potential loops 526 * 527 * @param array $data The API request data including messages with tool responses 528 * @param bool $toolsCalled Whether tools have already been called (used for loop protection) 529 * @param bool $useTools Whether to process tool calls (used for loop protection) 530 * @return string The final response content 531 */ 532 private function callAPIWithTools($data, $toolsCalled = false, $useTools = true) 533 { 534 // Set up HTTP headers, including authentication if API key is configured 535 $headers = [ 536 'Content-Type: application/json' 537 ]; 538 539 if (!empty($this->api_key)) { 540 $headers[] = 'Authorization: Bearer ' . $this->api_key; 541 } 542 543 // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops 544 if ($toolsCalled) { 545 unset($data['tools']); 546 unset($data['tool_choice']); 547 } 548 549 // Initialize and configure cURL for the API request 550 $ch = curl_init(); 551 curl_setopt($ch, CURLOPT_URL, $this->api_url); 552 curl_setopt($ch, CURLOPT_POST, true); 553 curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); 554 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 555 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 556 curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); 557 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 558 559 // Execute the API request 560 $response = curl_exec($ch); 561 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 562 $error = curl_error($ch); 563 curl_close($ch); 564 565 // Handle cURL errors 566 if ($error) { 567 throw new Exception('API request failed: ' . $error); 568 } 569 570 // Handle HTTP errors 571 if ($httpCode !== 200) { 572 throw new Exception('API request failed with HTTP code: ' . $httpCode); 573 } 574 575 // Parse and validate the JSON response 576 $result = json_decode($response, true); 577 578 // Extract the content from the response if available 579 if (isset($result['choices'][0]['message']['content'])) { 580 $content = trim($result['choices'][0]['message']['content']); 581 // Reset tool call counts when we get final content 582 $this->toolCallCounts = []; 583 return $content; 584 } 585 586 // Handle tool calls if present 587 if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) { 588 $toolCalls = $result['choices'][0]['message']['tool_calls']; 589 // Start with original messages 590 $messages = $data['messages']; 591 // Add assistant's message with tool calls, keeping all original fields except for content (which is null) 592 $assistantMessage = []; 593 foreach ($result['choices'][0]['message'] as $key => $value) { 594 if ($key !== 'content') { 595 $assistantMessage[$key] = $value; 596 } 597 } 598 // Add assistant's message with tool calls 599 $messages[] = $assistantMessage; 600 601 // Process each tool call and track counts to prevent infinite loops 602 foreach ($toolCalls as $toolCall) { 603 $toolName = $toolCall['function']['name']; 604 // Increment tool call count 605 if (!isset($this->toolCallCounts[$toolName])) { 606 $this->toolCallCounts[$toolName] = 0; 607 } 608 $this->toolCallCounts[$toolName]++; 609 610 $toolResponse = $this->handleToolCall($toolCall); 611 $messages[] = $toolResponse; 612 } 613 614 // Check if any tool has been called more than 3 times 615 $toolsCalledCount = 0; 616 foreach ($this->toolCallCounts as $count) { 617 if ($count > 3) { 618 // If any tool called more than 3 times, disable tools to break loop 619 $toolsCalled = true; 620 break; 621 } 622 $toolsCalledCount += $count; 623 } 624 625 // If total tool calls exceed 10, also disable tools 626 if ($toolsCalledCount > 10) { 627 $toolsCalled = true; 628 } 629 630 // Make another API call with tool responses 631 $data['messages'] = $messages; 632 return $this->callAPIWithTools($data, $toolsCalled, $useTools); 633 } 634 635 // Throw exception for unexpected response format 636 throw new Exception('Unexpected API response format'); 637 } 638 639 /** 640 * Load a prompt template from a DokuWiki page and replace placeholders 641 * 642 * Loads prompt templates from DokuWiki pages with IDs in the format 643 * dokullm:prompts:LANGUAGE:PROMPT_NAME 644 * 645 * The method implements a language fallback mechanism: 646 * 1. First tries to load the prompt in the configured language 647 * 2. If not found, falls back to English prompts 648 * 3. Throws an exception if neither is available 649 * 650 * After loading the prompt, it scans for placeholders and automatically 651 * adds missing ones with appropriate values before replacing all placeholders. 652 * 653 * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite') 654 * @param array $variables Associative array of placeholder => value pairs 655 * @return string The processed prompt with placeholders replaced 656 * @throws Exception If the prompt page cannot be loaded in any language 657 */ 658 private function loadPrompt($promptName, $language, $variables = []) 659 { 660 // Default to 'en' if language is 'default' or not set 661 if ($language === 'default' || empty($language)) { 662 $language = 'en'; 663 } 664 665 // Construct the page ID for the prompt in the configured language 666 $promptPageId = 'dokullm:prompts:' . $language . ':' . $promptName; 667 668 // Try to get the content of the prompt page in the configured language 669 $prompt = $this->getPageContent($promptPageId); 670 671 // If the language-specific prompt doesn't exist, try English as fallback 672 if ($prompt === false && $language !== 'en') { 673 $promptPageId = 'dokullm:prompts:en:' . $promptName; 674 $prompt = $this->getPageContent($promptPageId); 675 } 676 677 // If still no prompt found, throw an exception 678 if ($prompt === false) { 679 throw new Exception('Prompt page not found: ' . $promptPageId); 680 } 681 682 // Find placeholders in the prompt 683 $placeholders = $this->findPlaceholders($prompt); 684 685 // Add missing placeholders with appropriate values 686 foreach ($placeholders as $placeholder) { 687 // Skip if already provided in variables 688 if (isset($variables[$placeholder])) { 689 continue; 690 } 691 692 // Add appropriate values for specific placeholders 693 switch ($placeholder) { 694 case 'template': 695 // If we have a page_template in variables, use it 696 $variables[$placeholder] = $this->getTemplateContent($variables['page_template']); 697 break; 698 699 case 'snippets': 700 $variables[$placeholder] = $this->getSnippets(10); 701 break; 702 703 case 'examples': 704 // If we have example page IDs in metadata, add examples content 705 $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']); 706 break; 707 708 case 'previous': 709 // If we have a previous report page ID in metadata, add previous content 710 $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']); 711 712 // Add current and previous dates to metadata 713 $variables['current_date'] = $this->getPageDate(); 714 $variables['previous_date'] = !empty($variables['page_previous']) ? 715 $this->getPageDate($variables['page_previous']) : 716 ''; 717 break; 718 719 default: 720 // For other placeholders, leave them empty or set a default value 721 $variables[$placeholder] = ''; 722 break; 723 } 724 } 725 726 // Replace placeholders with actual values 727 // Placeholders are in the format {placeholder_name} 728 foreach ($variables as $placeholder => $value) { 729 $prompt = str_replace('{' . $placeholder . '}', $value, $prompt); 730 } 731 732 // Return the processed prompt 733 return $prompt; 734 } 735 736 /** 737 * Load system prompt with optional command-specific appendage 738 * 739 * Loads the main system prompt and appends any command-specific system prompt 740 * if available. 741 * 742 * @param string $action The action/command name 743 * @param array $variables Associative array of placeholder => value pairs 744 * @return string The combined system prompt 745 */ 746 private function loadSystemPrompt($action, $language, $variables = []) 747 { 748 // Load system prompt which provides general instructions to the LLM 749 $systemPrompt = $this->loadPrompt('system', $language, $variables); 750 751 // Check if there's a command-specific system prompt appendage 752 if (!empty($action)) { 753 try { 754 $commandSystemPrompt = $this->loadPrompt($action . ':system', $language, $variables); 755 if ($commandSystemPrompt !== false) { 756 $systemPrompt .= "\n" . $commandSystemPrompt; 757 } 758 } catch (Exception $e) { 759 // Ignore exceptions when loading command-specific system prompt 760 // This allows the main system prompt to still be used 761 } 762 } 763 764 return $systemPrompt; 765 } 766 767 /** 768 * Get the content of a DokuWiki page 769 * 770 * Retrieves the raw content of a DokuWiki page by its ID. 771 * Used for loading template and example page content for context. 772 * 773 * @param string $pageId The page ID to retrieve 774 * @return string|false The page content or false if not found/readable 775 */ 776 public function getPageContent($pageId) 777 { 778 // Convert page ID to file path 779 $pageFile = wikiFN($pageId); 780 781 // Check if file exists and is readable 782 if (file_exists($pageFile) && is_readable($pageFile)) { 783 return file_get_contents($pageFile); 784 } 785 786 return false; 787 } 788 789 /** 790 * Extract date from page ID or file timestamp 791 * 792 * Attempts to extract a date in YYmmdd format from the page ID. 793 * If not found, uses the file's last modification timestamp. 794 * 795 * @param string $pageId Optional page ID to extract date from (defaults to current page) 796 * @return string Formatted date string (YYYY-MM-DD) 797 */ 798 private function getPageDate($pageId = null) 799 { 800 global $ID; 801 802 // Use provided page ID or current page ID 803 $targetPageId = $pageId ?: $ID; 804 805 // Try to extract date from page ID (looking for YYmmdd pattern) 806 if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) { 807 // Convert YYmmdd to YYYY-MM-DD 808 $year = $matches[1]; 809 $month = $matches[2]; 810 $day = $matches[3]; 811 812 // Assume 20xx for years 00-69, 19xx for years 70-99 813 $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year; 814 815 return $fullYear . '-' . $month . '-' . $day; 816 } 817 818 // Fallback to file timestamp 819 $pageFile = wikiFN($targetPageId); 820 if (file_exists($pageFile)) { 821 $timestamp = filemtime($pageFile); 822 return date('Y-m-d', $timestamp); 823 } 824 825 // Return empty string if no date can be determined 826 return ''; 827 } 828 829 /** 830 * Get current text 831 * 832 * Retrieves the current text stored from the process function. 833 * 834 * @return string The current text 835 */ 836 private function getCurrentText() 837 { 838 return $this->currentText; 839 } 840 841 /** 842 * Scan text for placeholders 843 * 844 * Finds all placeholders in the format {placeholder_name} in the provided text 845 * and returns an array of unique placeholder names. 846 * 847 * @param string $text The text to scan for placeholders 848 * @return array List of unique placeholder names found in the text 849 */ 850 public function findPlaceholders($text) 851 { 852 $placeholders = []; 853 $pattern = '/\{([^}]+)\}/'; 854 855 if (preg_match_all($pattern, $text, $matches)) { 856 // Get unique placeholder names 857 $placeholders = array_unique($matches[1]); 858 } 859 860 return $placeholders; 861 } 862 863 /** 864 * Get template content for the current text 865 * 866 * Convenience function to retrieve template content. If a pageId is provided, 867 * retrieves content directly from that page. Otherwise, queries ChromaDB for 868 * a relevant template based on the current text. 869 * 870 * @param string|null $pageId Optional page ID to retrieve template from directly 871 * @return string The template content or empty string if not found 872 */ 873 private function getTemplateContent($pageId = null) 874 { 875 // If pageId is provided, use it directly 876 if ($pageId !== null) { 877 $templateContent = $this->getPageContent($pageId); 878 if ($templateContent !== false) { 879 return $templateContent; 880 } 881 } 882 883 // Otherwise, get template suggestion for the current text 884 $pageId = $this->queryChromaDBTemplate($this->getCurrentText()); 885 if (!empty($pageId)) { 886 $templateContent = $this->getPageContent($pageId[0]); 887 if ($templateContent !== false) { 888 return $templateContent; 889 } 890 } 891 return '( no template )'; 892 } 893 894 /** 895 * Get snippets content for the current text 896 * 897 * Convenience function to retrieve relevant snippets for the current text. 898 * Queries ChromaDB for relevant snippets and returns them formatted. 899 * 900 * @param int $count Number of snippets to retrieve (default: 10) 901 * @return string Formatted snippets content or empty string if not found 902 */ 903 private function getSnippets($count = 10) 904 { 905 // Get example snippets for the current text 906 $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count); 907 if (!empty($snippets)) { 908 $formattedSnippets = []; 909 foreach ($snippets as $index => $snippet) { 910 $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>'; 911 } 912 return implode("\n", $formattedSnippets); 913 } 914 return '( no examples )'; 915 } 916 917 /** 918 * Get examples content from example page IDs 919 * 920 * Convenience function to retrieve content from example pages. 921 * Returns the content of each page packed in XML elements. 922 * 923 * @param array $exampleIds List of example page IDs 924 * @return string Formatted examples content or empty string if not found 925 */ 926 private function getExamplesContent($exampleIds = []) 927 { 928 if (empty($exampleIds) || !is_array($exampleIds)) { 929 return '( no examples )'; 930 } 931 932 $examplesContent = []; 933 foreach ($exampleIds as $index => $exampleId) { 934 $content = $this->getPageContent($exampleId); 935 if ($content !== false) { 936 $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>'; 937 } 938 } 939 940 return implode("\n", $examplesContent); 941 } 942 943 /** 944 * Get previous report content from previous page ID 945 * 946 * Convenience function to retrieve content from a previous report page. 947 * Returns the content of the previous page or a default message if not found. 948 * 949 * @param string $previousId Previous page ID 950 * @return string Previous report content or default message if not found 951 */ 952 private function getPreviousContent($previousId = '') 953 { 954 if (empty($previousId)) { 955 return '( no previous report )'; 956 } 957 958 $content = $this->getPageContent($previousId); 959 if ($content !== false) { 960 return $content; 961 } 962 963 return '( previous report not found )'; 964 } 965 966 /** 967 * Get ChromaDB client with configuration 968 * 969 * Creates and returns a ChromaDB client with the appropriate configuration. 970 * Extracts modality from the current page ID to use as the collection name. 971 * 972 * @return array Array containing the ChromaDB client and collection name 973 */ 974 private function getChromaDBClient() 975 { 976 // Get ChromaDB configuration from DokuWiki plugin configuration 977 $chromaHost = $this->getConf('chroma_host', 'localhost'); 978 $chromaPort = $this->getConf('chroma_port', 8000); 979 $chromaTenant = $this->getConf('chroma_tenant', 'dokullm'); 980 $chromaDatabase = $this->getConf('chroma_database', 'dokullm'); 981 $chromaDefaultCollection = $this->getConf('chroma_collection', 'documents'); 982 $ollamaHost = $this->getConf('ollama_host', 'localhost'); 983 $ollamaPort = $this->getConf('ollama_port', 11434); 984 $ollamaModel = $this->getConf('ollama_embeddings_model', 'nomic-embed-text'); 985 986 // Use the first part of the current page ID as collection name, fallback to default 987 global $ID; 988 $chromaCollection = $chromaDefaultCollection; // Default collection name 989 990 if (!empty($ID)) { 991 // Split the page ID by ':' and take the first part as collection name 992 $parts = explode(':', $ID); 993 if (isset($parts[0]) && !empty($parts[0])) { 994 // If the first part is 'playground', use the default collection 995 // Otherwise, use the first part as the collection name 996 if ($parts[0] === 'playground') { 997 $chromaCollection = $chromaDefaultCollection; 998 } else { 999 $chromaCollection = $parts[0]; 1000 } 1001 } 1002 } 1003 1004 // Create ChromaDB client with all required parameters 1005 $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient( 1006 $chromaHost, 1007 $chromaPort, 1008 $chromaTenant, 1009 $chromaDatabase, 1010 $ollamaHost, 1011 $ollamaPort, 1012 $ollamaModel 1013 ); 1014 1015 1016 return [$chromaClient, $chromaCollection]; 1017 } 1018 1019 /** 1020 * Query ChromaDB for relevant documents 1021 * 1022 * Generates embeddings for the input text and queries ChromaDB for similar documents. 1023 * Extracts modality from the current page ID to use as the collection name. 1024 * 1025 * @param string $text The text to find similar documents for 1026 * @param int $limit Maximum number of documents to retrieve (default: 5) 1027 * @param array|null $where Optional filter conditions for metadata 1028 * @return array List of document IDs 1029 */ 1030 private function queryChromaDB($text, $limit = 5, $where = null) 1031 { 1032 try { 1033 // Get ChromaDB client and collection name 1034 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1035 // Query for similar documents 1036 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1037 1038 // Extract document IDs from results 1039 $documentIds = []; 1040 if (isset($results['ids'][0]) && is_array($results['ids'][0])) { 1041 foreach ($results['ids'][0] as $id) { 1042 // Use the ChromaDB ID directly without conversion 1043 $documentIds[] = $id; 1044 } 1045 } 1046 1047 return $documentIds; 1048 } catch (Exception $e) { 1049 // Log error but don't fail the operation 1050 error_log('ChromaDB query failed: ' . $e->getMessage()); 1051 return []; 1052 } 1053 } 1054 1055 /** 1056 * Query ChromaDB for relevant documents and return text snippets 1057 * 1058 * Generates embeddings for the input text and queries ChromaDB for similar documents. 1059 * Returns the actual text snippets instead of document IDs. 1060 * 1061 * @param string $text The text to find similar documents for 1062 * @param int $limit Maximum number of documents to retrieve (default: 10) 1063 * @param array|null $where Optional filter conditions for metadata 1064 * @return array List of text snippets 1065 */ 1066 private function queryChromaDBSnippets($text, $limit = 10, $where = null) 1067 { 1068 try { 1069 // Get ChromaDB client and collection name 1070 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1071 // Query for similar documents 1072 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1073 1074 // Extract document texts from results 1075 $snippets = []; 1076 if (isset($results['documents'][0]) && is_array($results['documents'][0])) { 1077 foreach ($results['documents'][0] as $document) { 1078 $snippets[] = $document; 1079 } 1080 } 1081 1082 return $snippets; 1083 } catch (Exception $e) { 1084 // Log error but don't fail the operation 1085 error_log('ChromaDB query failed: ' . $e->getMessage()); 1086 return []; 1087 } 1088 } 1089 1090 /** 1091 * Query ChromaDB for a template document 1092 * 1093 * Generates embeddings for the input text and queries ChromaDB for a template document 1094 * by filtering with metadata 'template=true'. 1095 * 1096 * @param string $text The text to find a template for 1097 * @return array List of template document IDs (maximum 1) 1098 */ 1099 public function queryChromaDBTemplate($text) 1100 { 1101 $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']); 1102 1103 // Remove chunk number (e.g., "@2") from the ID to get the base document ID 1104 if (!empty($templateIds)) { 1105 $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]); 1106 } 1107 1108 return $templateIds; 1109 } 1110 1111} 1112