1<?php 2namespace dokuwiki\plugin\dokullm; 3 4/** 5 * LLM Client for the dokullm plugin 6 * 7 * This class provides methods to interact with an LLM API for various 8 * text processing tasks such as completion, rewriting, grammar correction, 9 * summarization, conclusion creation, text analysis, and custom prompts. 10 * 11 * The client handles: 12 * - API configuration and authentication 13 * - Prompt template loading and processing 14 * - Context-aware requests with metadata 15 * - DokuWiki page content retrieval 16 */ 17 18// must be run within Dokuwiki 19if (!defined('DOKU_INC')) { 20 die(); 21} 22 23/** 24 * LLM Client class for handling API communications 25 * 26 * Manages configuration settings and provides methods for various 27 * text processing operations through an LLM API. 28 * Implements caching for tool calls to avoid duplicate processing. 29 */ 30class LlmClient 31{ 32 /** @var string The API endpoint URL */ 33 private $api_url; 34 35 /** @var array Cache for tool call results */ 36 private $toolCallCache = []; 37 38 /** @var string Current text for tool usage */ 39 private $currentText = ''; 40 41 /** @var array Track tool call counts to prevent infinite loops */ 42 private $toolCallCounts = []; 43 44 /** @var string The API authentication key */ 45 private $api_key; 46 47 /** @var string The model identifier to use */ 48 private $model; 49 50 /** @var int The request timeout in seconds */ 51 private $timeout; 52 53 /** @var float The temperature setting for response randomness */ 54 private $temperature; 55 56 /** @var float The top-p setting for nucleus sampling */ 57 private $top_p; 58 59 /** @var int The top-k setting for token selection */ 60 private $top_k; 61 62 /** @var float The min-p setting for minimum probability threshold */ 63 private $min_p; 64 65 /** @var bool Whether to enable thinking in the LLM responses */ 66 private $think; 67 68 /** 69 * Initialize the LLM client with configuration settings 70 * 71 * Retrieves configuration values from DokuWiki's configuration system 72 * for API URL, key, model, timeout, and LLM sampling parameters. 73 * 74 * Configuration values: 75 * - api_url: The LLM API endpoint URL 76 * - api_key: Authentication key for the API (optional) 77 * - model: The model identifier to use for requests 78 * - timeout: Request timeout in seconds 79 * - language: Language code for prompt templates 80 * - temperature: Temperature setting for response randomness (0.0-1.0) 81 * - top_p: Top-p (nucleus sampling) setting (0.0-1.0) 82 * - top_k: Top-k setting (integer >= 1) 83 * - min_p: Minimum probability threshold (0.0-1.0) 84 * - think: Whether to enable thinking in LLM responses (boolean) 85 */ 86 public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $language = null) 87 { 88 $this->api_url = $api_url; 89 $this->api_key = $api_key; 90 $this->model = $model; 91 $this->timeout = $timeout; 92 $this->temperature = $temperature; 93 $this->top_p = $top_p; 94 $this->top_k = $top_k; 95 $this->min_p = $min_p; 96 $this->think = $think; 97 $this->language = $language; 98 } 99 100 101 102 public function process($action, $text, $metadata = [], $useContext = true) 103 { 104 // Store the current text for tool usage 105 $this->currentText = $text; 106 107 // Add text, think and action to metadata 108 $metadata['text'] = $text; 109 $metadata['think'] = $this->think ? '/think' : '/no_think'; 110 $metadata['action'] = $action; 111 112 // If we have 'template' in metadata, move it to 'page_template' 113 if (isset($metadata['template'])) { 114 $metadata['page_template'] = $metadata['template']; 115 unset($metadata['template']); 116 } 117 118 // If we have 'examples' in metadata, move it to 'page_examples' 119 if (isset($metadata['examples'])) { 120 $metadata['page_examples'] = $metadata['examples']; 121 unset($metadata['examples']); 122 } 123 124 // If we have 'previous' in metadata, move it to 'page_previous' 125 if (isset($metadata['previous'])) { 126 $metadata['page_previous'] = $metadata['previous']; 127 unset($metadata['previous']); 128 } 129 130 $prompt = $this->loadPrompt($action, $this->language, $metadata); 131 132 return $this->callAPI($action, $this->language, $prompt, $metadata, $useContext); 133 } 134 135 136 137 /** 138 * Create the provided text using the LLM 139 * 140 * Sends a prompt to the LLM asking it to create the given text. 141 * First queries ChromaDB for relevant documents to include as examples. 142 * If no template is defined, queries ChromaDB for a template. 143 * 144 * @param string $text The text to create 145 * @param array $metadata Optional metadata containing template, examples, and snippets 146 * @param bool $useContext Whether to include template and examples in the context (default: true) 147 * @return string The created text 148 */ 149 public function createReport($text, $metadata = [], $useContext = true, $useTools = false) 150 { 151 // Store the current text for tool usage 152 $this->currentText = $text; 153 154 // Only try to find template and add snippets if tools are not enabled 155 // When tools are enabled, the LLM will call get_template and get_examples as needed 156 if (!$useTools) { 157 // If no template is defined, try to find one using ChromaDB 158 if (empty($metadata['template'])) { 159 $templateResult = $this->queryChromaDBTemplate($text); 160 if (!empty($templateResult)) { 161 // Use the first result as template 162 $metadata['template'] = $templateResult[0]; 163 } 164 } 165 166 // Query ChromaDB for relevant documents to use as examples 167 $chromaResults = $this->queryChromaDBSnippets($text, 10); 168 169 // Add ChromaDB results to metadata as snippets 170 if (!empty($chromaResults)) { 171 // Merge with existing snippets 172 $metadata['snippets'] = array_merge( 173 isset($metadata['snippets']) ? $metadata['snippets'] : [], 174 $chromaResults 175 ); 176 } 177 } 178 179 $think = $this->think ? '/think' : '/no_think'; 180 $prompt = $this->loadPrompt('create', ['text' => $text, 'think' => $think]); 181 182 return $this->callAPI('create', $this->language, $prompt, $metadata, $useContext); 183 } 184 185 /** 186 * Compare two texts and highlight differences 187 * 188 * Sends a prompt to the LLM asking it to compare two texts and 189 * highlight their similarities and differences. 190 * 191 * @param string $text The current text to compare 192 * @param array $metadata Optional metadata containing template, examples, and previous report reference 193 * @return string The comparison results 194 */ 195 public function compareText($text, $metadata = [], $useContext = false) 196 { 197 // Store the current text for tool usage 198 $this->currentText = $text; 199 200 // Load previous report from metadata if specified 201 $previousText = ''; 202 if (!empty($metadata['previous_report_page'])) { 203 $previousText = $this->getPageContent($metadata['previous_report_page']); 204 if ($previousText === false) { 205 $previousText = ''; 206 } 207 } 208 209 // Extract dates for placeholders 210 $currentDate = $this->getPageDate(); 211 $previousDate = !empty($metadata['previous_report_page']) ? 212 $this->getPageDate($metadata['previous_report_page']) : 213 ''; 214 215 $think = $this->think ? '/think' : '/no_think'; 216 $prompt = $this->loadPrompt('compare', [ 217 'text' => $text, 218 'previous_text' => $previousText, 219 'current_date' => $currentDate, 220 'previous_date' => $previousDate, 221 'think' => $think 222 ]); 223 224 return $this->callAPI('compare', $this->language, $prompt, $metadata, $useContext); 225 } 226 227 /** 228 * Process text with a custom user prompt 229 * 230 * Sends a custom prompt to the LLM along with the provided text. 231 * 232 * @param string $text The text to process 233 * @param string $customPrompt The custom prompt to use 234 * @param array $metadata Optional metadata containing template and examples 235 * @param bool $useContext Whether to include template and examples in the context (default: true) 236 * @return string The processed text 237 */ 238 public function processCustomPrompt($text, $metadata = [], $useContext = true) 239 { 240 // Store the current text for tool usage 241 $this->currentText = $text; 242 243 // Format the prompt with the text and custom prompt 244 $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text; 245 246 return $this->callAPI('custom', $this->language, $prompt, $metadata, $useContext); 247 } 248 249 /** 250 * Get the list of available tools for the LLM 251 * 252 * Defines the tools that can be used by the LLM during processing. 253 * 254 * @return array List of tool definitions 255 */ 256 private function getAvailableTools() 257 { 258 return [ 259 [ 260 'type' => 'function', 261 'function' => [ 262 'name' => 'get_document', 263 'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.', 264 'parameters' => [ 265 'type' => 'object', 266 'properties' => [ 267 'id' => [ 268 'type' => 'string', 269 'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.' 270 ] 271 ], 272 'required' => ['id'] 273 ] 274 ] 275 ], 276 [ 277 'type' => 'function', 278 'function' => [ 279 'name' => 'get_template', 280 'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.', 281 'parameters' => [ 282 'type' => 'object', 283 'properties' => [ 284 'language' => [ 285 'type' => 'string', 286 'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).', 287 'default' => 'ro' 288 ] 289 ] 290 ] 291 ] 292 ], 293 [ 294 'type' => 'function', 295 'function' => [ 296 'name' => 'get_examples', 297 'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.', 298 'parameters' => [ 299 'type' => 'object', 300 'properties' => [ 301 'count' => [ 302 'type' => 'integer', 303 'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.', 304 'default' => 5 305 ] 306 ] 307 ] 308 ] 309 ] 310 ]; 311 } 312 313 /** 314 * Call the LLM API with the specified prompt 315 * 316 * Makes an HTTP POST request to the configured API endpoint with 317 * the prompt and other parameters. Handles authentication if an 318 * API key is configured. 319 * 320 * The method constructs a conversation with system and user messages, 321 * including context information from metadata when available. 322 * 323 * Complex logic includes: 324 * 1. Loading and enhancing the system prompt with metadata context 325 * 2. Building the API request with model parameters 326 * 3. Handling authentication with API key if configured 327 * 4. Making the HTTP request with proper error handling 328 * 5. Parsing and validating the API response 329 * 6. Supporting tool usage with automatic tool calling when enabled 330 * 7. Implementing context enhancement with templates, examples, and snippets 331 * 332 * The context information includes: 333 * - Template content: Used as a starting point for the response 334 * - Example pages: Full content of specified example pages 335 * - Text snippets: Relevant text examples from ChromaDB 336 * 337 * When tools are enabled, the method supports automatic tool calling: 338 * - Tools can retrieve documents, templates, and examples as needed 339 * - Tool responses are cached to avoid duplicate calls with identical parameters 340 * - Infinite loop protection prevents excessive tool calls 341 * 342 * @param string $command The command name for loading command-specific system prompts 343 * @param string $prompt The prompt to send to the LLM as user message 344 * @param array $metadata Optional metadata containing template, examples, and snippets 345 * @param bool $useContext Whether to include template and examples in the context (default: true) 346 * @return string The response content from the LLM 347 * @throws Exception If the API request fails or returns unexpected format 348 */ 349 350 private function callAPI($command, $prompt, $metadata = [], $useContext = true, $useTools = false) 351 { 352 // Load system prompt which provides general instructions to the LLM 353 $systemPrompt = $this->loadSystemPrompt($command, $this->language, []); 354 355 // Enhance the prompt with context information from metadata 356 // This provides the LLM with additional context about templates and examples 357 if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) { 358 $contextInfo = "\n\n<context>\n"; 359 360 // Add template content if specified in metadata 361 if (!empty($metadata['template'])) { 362 $templateContent = $this->getPageContent($metadata['template']); 363 if ($templateContent !== false) { 364 $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n"; 365 } 366 } 367 368 // Add example pages content if specified in metadata 369 if (!empty($metadata['examples'])) { 370 $examplesContent = []; 371 foreach ($metadata['examples'] as $example) { 372 $content = $this->getPageContent($example); 373 if ($content !== false) { 374 $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n"; 375 } 376 } 377 if (!empty($examplesContent)) { 378 $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n"; 379 } 380 } 381 382 // Add text snippets if specified in metadata 383 if (!empty($metadata['snippets'])) { 384 $snippetsContent = []; 385 foreach ($metadata['snippets'] as $index => $snippet) { 386 // These are text snippets from ChromaDB 387 $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n"; 388 } 389 if (!empty($snippetsContent)) { 390 $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n"; 391 } 392 } 393 394 $contextInfo .= "\n</context>\n"; 395 396 // Append context information to system prompt 397 $prompt = $contextInfo . "\n\n" . $prompt; 398 } 399 400 // Prepare API request data with model parameters 401 $data = [ 402 'model' => $this->model, 403 'messages' => [ 404 ['role' => 'system', 'content' => $systemPrompt], 405 ['role' => 'user', 'content' => $prompt] 406 ], 407 'max_tokens' => 6144, 408 'stream' => false, 409 'keep_alive' => '30m', 410 'think' => true 411 ]; 412 413 // Add tools to the request only if useTools is true 414 if ($useTools) { 415 // Define available tools 416 $data['tools'] = $this->getAvailableTools(); 417 $data['tool_choice'] = 'auto'; 418 $data['parallel_tool_calls'] = false; 419 } 420 421 // Only add parameters if they are defined and not null 422 if ($this->temperature !== null) { 423 $data['temperature'] = $this->temperature; 424 } 425 if ($this->top_p !== null) { 426 $data['top_p'] = $this->top_p; 427 } 428 if ($this->top_k !== null) { 429 $data['top_k'] = $this->top_k; 430 } 431 if ($this->min_p !== null) { 432 $data['min_p'] = $this->min_p; 433 } 434 435 // Make an API call with tool responses 436 return $this->callAPIWithTools($data, false); 437 } 438 439 /** 440 * Handle tool calls from the LLM 441 * 442 * Processes tool calls made by the LLM and returns appropriate responses. 443 * Implements caching to avoid duplicate calls with identical parameters. 444 * 445 * @param array $toolCall The tool call data from the LLM 446 * @return array The tool response message 447 */ 448 private function handleToolCall($toolCall) 449 { 450 $toolName = $toolCall['function']['name']; 451 $arguments = json_decode($toolCall['function']['arguments'], true); 452 453 // Create a cache key from the tool name and arguments 454 $cacheKey = md5($toolName . serialize($arguments)); 455 456 // Check if we have a cached result for this tool call 457 if (isset($this->toolCallCache[$cacheKey])) { 458 // Return cached result and indicate it was found in cache 459 $toolResponse = $this->toolCallCache[$cacheKey]; 460 // Update with current tool call ID 461 $toolResponse['tool_call_id'] = $toolCall['id']; 462 $toolResponse['cached'] = true; // Indicate this response was cached 463 return $toolResponse; 464 } 465 466 $toolResponse = [ 467 'role' => 'tool', 468 'tool_call_id' => $toolCall['id'], 469 'cached' => false // Indicate this is a fresh response 470 ]; 471 472 switch ($toolName) { 473 case 'get_document': 474 $documentId = $arguments['id']; 475 $content = $this->getPageContent($documentId); 476 if ($content === false) { 477 $toolResponse['content'] = 'Document not found: ' . $documentId; 478 } else { 479 $toolResponse['content'] = $content; 480 } 481 break; 482 483 case 'get_template': 484 // Get template content using the convenience function 485 $toolResponse['content'] = $this->getTemplateContent(); 486 break; 487 488 case 'get_examples': 489 // Get examples content using the convenience function 490 $count = isset($arguments['count']) ? (int)$arguments['count'] : 5; 491 $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>'; 492 break; 493 494 default: 495 $toolResponse['content'] = 'Unknown tool: ' . $toolName; 496 } 497 498 // Cache the result for future calls with the same parameters 499 $cacheEntry = $toolResponse; 500 // Remove tool_call_id and cached flag from cache as they change per call 501 unset($cacheEntry['tool_call_id']); 502 unset($cacheEntry['cached']); 503 $this->toolCallCache[$cacheKey] = $cacheEntry; 504 505 return $toolResponse; 506 } 507 508 /** 509 * Make an API call with tool responses 510 * 511 * Sends a follow-up request to the LLM with tool responses. 512 * Implements complex logic for handling tool calls with caching and loop protection. 513 * 514 * Complex logic includes: 515 * 1. Making HTTP requests with proper authentication and error handling 516 * 2. Processing tool calls from the LLM response 517 * 3. Caching tool responses to avoid duplicate calls with identical parameters 518 * 4. Tracking tool call counts to prevent infinite loops 519 * 5. Implementing loop protection with call count limits 520 * 6. Handling recursive tool calls until final content is generated 521 * 522 * Loop protection works by: 523 * - Tracking individual tool call counts (max 3 per tool) 524 * - Tracking total tool calls (max 10 total) 525 * - Disabling tools when limits are exceeded to break potential loops 526 * 527 * @param array $data The API request data including messages with tool responses 528 * @param bool $toolsCalled Whether tools have already been called (used for loop protection) 529 * @param bool $useTools Whether to process tool calls (used for loop protection) 530 * @return string The final response content 531 */ 532 private function callAPIWithTools($data, $toolsCalled = false, $useTools = true) 533 { 534 // Set up HTTP headers, including authentication if API key is configured 535 $headers = [ 536 'Content-Type: application/json' 537 ]; 538 539 if (!empty($this->api_key)) { 540 $headers[] = 'Authorization: Bearer ' . $this->api_key; 541 } 542 543 // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops 544 if ($toolsCalled) { 545 unset($data['tools']); 546 unset($data['tool_choice']); 547 } 548 549 // Initialize and configure cURL for the API request 550 $ch = curl_init(); 551 curl_setopt($ch, CURLOPT_URL, $this->api_url); 552 curl_setopt($ch, CURLOPT_POST, true); 553 curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); 554 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 555 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 556 curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); 557 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 558 559 // Execute the API request 560 $response = curl_exec($ch); 561 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 562 $error = curl_error($ch); 563 curl_close($ch); 564 565 // Handle cURL errors 566 if ($error) { 567 throw new Exception('API request failed: ' . $error); 568 } 569 570 // Handle HTTP errors 571 if ($httpCode !== 200) { 572 throw new Exception('API request failed with HTTP code: ' . $httpCode); 573 } 574 575 // Parse and validate the JSON response 576 $result = json_decode($response, true); 577 578 // Extract the content from the response if available 579 if (isset($result['choices'][0]['message']['content'])) { 580 $content = trim($result['choices'][0]['message']['content']); 581 // Reset tool call counts when we get final content 582 $this->toolCallCounts = []; 583 return $content; 584 } 585 586 // Handle tool calls if present 587 if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) { 588 $toolCalls = $result['choices'][0]['message']['tool_calls']; 589 // Start with original messages 590 $messages = $data['messages']; 591 // Add assistant's message with tool calls, keeping all original fields except for content (which is null) 592 $assistantMessage = []; 593 foreach ($result['choices'][0]['message'] as $key => $value) { 594 if ($key !== 'content') { 595 $assistantMessage[$key] = $value; 596 } 597 } 598 // Add assistant's message with tool calls 599 $messages[] = $assistantMessage; 600 601 // Process each tool call and track counts to prevent infinite loops 602 foreach ($toolCalls as $toolCall) { 603 $toolName = $toolCall['function']['name']; 604 // Increment tool call count 605 if (!isset($this->toolCallCounts[$toolName])) { 606 $this->toolCallCounts[$toolName] = 0; 607 } 608 $this->toolCallCounts[$toolName]++; 609 610 $toolResponse = $this->handleToolCall($toolCall); 611 $messages[] = $toolResponse; 612 } 613 614 // Check if any tool has been called more than 3 times 615 $toolsCalledCount = 0; 616 foreach ($this->toolCallCounts as $count) { 617 if ($count > 3) { 618 // If any tool called more than 3 times, disable tools to break loop 619 $toolsCalled = true; 620 break; 621 } 622 $toolsCalledCount += $count; 623 } 624 625 // If total tool calls exceed 10, also disable tools 626 if ($toolsCalledCount > 10) { 627 $toolsCalled = true; 628 } 629 630 // Make another API call with tool responses 631 $data['messages'] = $messages; 632 return $this->callAPIWithTools($data, $toolsCalled, $useTools); 633 } 634 635 // Throw exception for unexpected response format 636 throw new Exception('Unexpected API response format'); 637 } 638 639 /** 640 * Load a prompt template from a DokuWiki page and replace placeholders 641 * 642 * Loads prompt templates from DokuWiki pages with IDs in the format 643 * dokullm:prompts:LANGUAGE:PROMPT_NAME 644 * 645 * The method implements a language fallback mechanism: 646 * 1. First tries to load the prompt in the configured language 647 * 2. If not found, falls back to English prompts 648 * 3. Throws an exception if neither is available 649 * 650 * After loading the prompt, it scans for placeholders and automatically 651 * adds missing ones with appropriate values before replacing all placeholders. 652 * 653 * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite') 654 * @param array $variables Associative array of placeholder => value pairs 655 * @return string The processed prompt with placeholders replaced 656 * @throws Exception If the prompt page cannot be loaded in any language 657 */ 658 private function loadPrompt($promptName, $variables = []) 659 { 660 $language = $this->language; 661 // Default to 'en' if language is 'default' or not set 662 if ($language === 'default' || empty($language)) { 663 $language = 'en'; 664 } 665 666 // Construct the page ID for the prompt in the configured language 667 $promptPageId = 'dokullm:prompts:' . $language . ':' . $promptName; 668 669 // Try to get the content of the prompt page in the configured language 670 $prompt = $this->getPageContent($promptPageId); 671 672 // If the language-specific prompt doesn't exist, try English as fallback 673 if ($prompt === false && $language !== 'en') { 674 $promptPageId = 'dokullm:prompts:en:' . $promptName; 675 $prompt = $this->getPageContent($promptPageId); 676 } 677 678 // If still no prompt found, throw an exception 679 if ($prompt === false) { 680 throw new Exception('Prompt page not found: ' . $promptPageId); 681 } 682 683 // Find placeholders in the prompt 684 $placeholders = $this->findPlaceholders($prompt); 685 686 // Add missing placeholders with appropriate values 687 foreach ($placeholders as $placeholder) { 688 // Skip if already provided in variables 689 if (isset($variables[$placeholder])) { 690 continue; 691 } 692 693 // Add appropriate values for specific placeholders 694 switch ($placeholder) { 695 case 'template': 696 // If we have a page_template in variables, use it 697 $variables[$placeholder] = $this->getTemplateContent($variables['page_template']); 698 break; 699 700 case 'snippets': 701 $variables[$placeholder] = $this->getSnippets(10); 702 break; 703 704 case 'examples': 705 // If we have example page IDs in metadata, add examples content 706 $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']); 707 break; 708 709 case 'previous': 710 // If we have a previous report page ID in metadata, add previous content 711 $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']); 712 713 // Add current and previous dates to metadata 714 $variables['current_date'] = $this->getPageDate(); 715 $variables['previous_date'] = !empty($variables['page_previous']) ? 716 $this->getPageDate($variables['page_previous']) : 717 ''; 718 break; 719 720 default: 721 // For other placeholders, leave them empty or set a default value 722 $variables[$placeholder] = ''; 723 break; 724 } 725 } 726 727 // Replace placeholders with actual values 728 // Placeholders are in the format {placeholder_name} 729 foreach ($variables as $placeholder => $value) { 730 $prompt = str_replace('{' . $placeholder . '}', $value, $prompt); 731 } 732 733 // Return the processed prompt 734 return $prompt; 735 } 736 737 /** 738 * Load system prompt with optional command-specific appendage 739 * 740 * Loads the main system prompt and appends any command-specific system prompt 741 * if available. 742 * 743 * @param string $action The action/command name 744 * @param array $variables Associative array of placeholder => value pairs 745 * @return string The combined system prompt 746 */ 747 private function loadSystemPrompt($action, $variables = []) 748 { 749 // Load system prompt which provides general instructions to the LLM 750 $systemPrompt = $this->loadPrompt('system', $variables); 751 752 // Check if there's a command-specific system prompt appendage 753 if (!empty($action)) { 754 try { 755 $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables); 756 if ($commandSystemPrompt !== false) { 757 $systemPrompt .= "\n" . $commandSystemPrompt; 758 } 759 } catch (Exception $e) { 760 // Ignore exceptions when loading command-specific system prompt 761 // This allows the main system prompt to still be used 762 } 763 } 764 765 return $systemPrompt; 766 } 767 768 /** 769 * Get the content of a DokuWiki page 770 * 771 * Retrieves the raw content of a DokuWiki page by its ID. 772 * Used for loading template and example page content for context. 773 * 774 * @param string $pageId The page ID to retrieve 775 * @return string|false The page content or false if not found/readable 776 */ 777 public function getPageContent($pageId) 778 { 779 // Convert page ID to file path 780 $pageFile = wikiFN($pageId); 781 782 // Check if file exists and is readable 783 if (file_exists($pageFile) && is_readable($pageFile)) { 784 return file_get_contents($pageFile); 785 } 786 787 return false; 788 } 789 790 /** 791 * Extract date from page ID or file timestamp 792 * 793 * Attempts to extract a date in YYmmdd format from the page ID. 794 * If not found, uses the file's last modification timestamp. 795 * 796 * @param string $pageId Optional page ID to extract date from (defaults to current page) 797 * @return string Formatted date string (YYYY-MM-DD) 798 */ 799 private function getPageDate($pageId = null) 800 { 801 global $ID; 802 803 // Use provided page ID or current page ID 804 $targetPageId = $pageId ?: $ID; 805 806 // Try to extract date from page ID (looking for YYmmdd pattern) 807 if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) { 808 // Convert YYmmdd to YYYY-MM-DD 809 $year = $matches[1]; 810 $month = $matches[2]; 811 $day = $matches[3]; 812 813 // Assume 20xx for years 00-69, 19xx for years 70-99 814 $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year; 815 816 return $fullYear . '-' . $month . '-' . $day; 817 } 818 819 // Fallback to file timestamp 820 $pageFile = wikiFN($targetPageId); 821 if (file_exists($pageFile)) { 822 $timestamp = filemtime($pageFile); 823 return date('Y-m-d', $timestamp); 824 } 825 826 // Return empty string if no date can be determined 827 return ''; 828 } 829 830 /** 831 * Get current text 832 * 833 * Retrieves the current text stored from the process function. 834 * 835 * @return string The current text 836 */ 837 private function getCurrentText() 838 { 839 return $this->currentText; 840 } 841 842 /** 843 * Scan text for placeholders 844 * 845 * Finds all placeholders in the format {placeholder_name} in the provided text 846 * and returns an array of unique placeholder names. 847 * 848 * @param string $text The text to scan for placeholders 849 * @return array List of unique placeholder names found in the text 850 */ 851 public function findPlaceholders($text) 852 { 853 $placeholders = []; 854 $pattern = '/\{([^}]+)\}/'; 855 856 if (preg_match_all($pattern, $text, $matches)) { 857 // Get unique placeholder names 858 $placeholders = array_unique($matches[1]); 859 } 860 861 return $placeholders; 862 } 863 864 /** 865 * Get template content for the current text 866 * 867 * Convenience function to retrieve template content. If a pageId is provided, 868 * retrieves content directly from that page. Otherwise, queries ChromaDB for 869 * a relevant template based on the current text. 870 * 871 * @param string|null $pageId Optional page ID to retrieve template from directly 872 * @return string The template content or empty string if not found 873 */ 874 private function getTemplateContent($pageId = null) 875 { 876 // If pageId is provided, use it directly 877 if ($pageId !== null) { 878 $templateContent = $this->getPageContent($pageId); 879 if ($templateContent !== false) { 880 return $templateContent; 881 } 882 } 883 884 // Otherwise, get template suggestion for the current text 885 $pageId = $this->queryChromaDBTemplate($this->getCurrentText()); 886 if (!empty($pageId)) { 887 $templateContent = $this->getPageContent($pageId[0]); 888 if ($templateContent !== false) { 889 return $templateContent; 890 } 891 } 892 return '( no template )'; 893 } 894 895 /** 896 * Get snippets content for the current text 897 * 898 * Convenience function to retrieve relevant snippets for the current text. 899 * Queries ChromaDB for relevant snippets and returns them formatted. 900 * 901 * @param int $count Number of snippets to retrieve (default: 10) 902 * @return string Formatted snippets content or empty string if not found 903 */ 904 private function getSnippets($count = 10) 905 { 906 // Get example snippets for the current text 907 $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count); 908 if (!empty($snippets)) { 909 $formattedSnippets = []; 910 foreach ($snippets as $index => $snippet) { 911 $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>'; 912 } 913 return implode("\n", $formattedSnippets); 914 } 915 return '( no examples )'; 916 } 917 918 /** 919 * Get examples content from example page IDs 920 * 921 * Convenience function to retrieve content from example pages. 922 * Returns the content of each page packed in XML elements. 923 * 924 * @param array $exampleIds List of example page IDs 925 * @return string Formatted examples content or empty string if not found 926 */ 927 private function getExamplesContent($exampleIds = []) 928 { 929 if (empty($exampleIds) || !is_array($exampleIds)) { 930 return '( no examples )'; 931 } 932 933 $examplesContent = []; 934 foreach ($exampleIds as $index => $exampleId) { 935 $content = $this->getPageContent($exampleId); 936 if ($content !== false) { 937 $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>'; 938 } 939 } 940 941 return implode("\n", $examplesContent); 942 } 943 944 /** 945 * Get previous report content from previous page ID 946 * 947 * Convenience function to retrieve content from a previous report page. 948 * Returns the content of the previous page or a default message if not found. 949 * 950 * @param string $previousId Previous page ID 951 * @return string Previous report content or default message if not found 952 */ 953 private function getPreviousContent($previousId = '') 954 { 955 if (empty($previousId)) { 956 return '( no previous report )'; 957 } 958 959 $content = $this->getPageContent($previousId); 960 if ($content !== false) { 961 return $content; 962 } 963 964 return '( previous report not found )'; 965 } 966 967 /** 968 * Get ChromaDB client with configuration 969 * 970 * Creates and returns a ChromaDB client with the appropriate configuration. 971 * Extracts modality from the current page ID to use as the collection name. 972 * 973 * @return array Array containing the ChromaDB client and collection name 974 */ 975 private function getChromaDBClient() 976 { 977 // Get ChromaDB configuration from DokuWiki plugin configuration 978 $chromaHost = $this->getConf('chroma_host', 'localhost'); 979 $chromaPort = $this->getConf('chroma_port', 8000); 980 $chromaTenant = $this->getConf('chroma_tenant', 'dokullm'); 981 $chromaDatabase = $this->getConf('chroma_database', 'dokullm'); 982 $chromaDefaultCollection = $this->getConf('chroma_collection', 'documents'); 983 $ollamaHost = $this->getConf('ollama_host', 'localhost'); 984 $ollamaPort = $this->getConf('ollama_port', 11434); 985 $ollamaModel = $this->getConf('ollama_embeddings_model', 'nomic-embed-text'); 986 987 // Use the first part of the current page ID as collection name, fallback to default 988 global $ID; 989 $chromaCollection = $chromaDefaultCollection; // Default collection name 990 991 if (!empty($ID)) { 992 // Split the page ID by ':' and take the first part as collection name 993 $parts = explode(':', $ID); 994 if (isset($parts[0]) && !empty($parts[0])) { 995 // If the first part is 'playground', use the default collection 996 // Otherwise, use the first part as the collection name 997 if ($parts[0] === 'playground') { 998 $chromaCollection = $chromaDefaultCollection; 999 } else { 1000 $chromaCollection = $parts[0]; 1001 } 1002 } 1003 } 1004 1005 // Create ChromaDB client with all required parameters 1006 $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient( 1007 $chromaHost, 1008 $chromaPort, 1009 $chromaTenant, 1010 $chromaDatabase, 1011 $ollamaHost, 1012 $ollamaPort, 1013 $ollamaModel 1014 ); 1015 1016 1017 return [$chromaClient, $chromaCollection]; 1018 } 1019 1020 /** 1021 * Query ChromaDB for relevant documents 1022 * 1023 * Generates embeddings for the input text and queries ChromaDB for similar documents. 1024 * Extracts modality from the current page ID to use as the collection name. 1025 * 1026 * @param string $text The text to find similar documents for 1027 * @param int $limit Maximum number of documents to retrieve (default: 5) 1028 * @param array|null $where Optional filter conditions for metadata 1029 * @return array List of document IDs 1030 */ 1031 private function queryChromaDB($text, $limit = 5, $where = null) 1032 { 1033 try { 1034 // Get ChromaDB client and collection name 1035 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1036 // Query for similar documents 1037 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1038 1039 // Extract document IDs from results 1040 $documentIds = []; 1041 if (isset($results['ids'][0]) && is_array($results['ids'][0])) { 1042 foreach ($results['ids'][0] as $id) { 1043 // Use the ChromaDB ID directly without conversion 1044 $documentIds[] = $id; 1045 } 1046 } 1047 1048 return $documentIds; 1049 } catch (Exception $e) { 1050 // Log error but don't fail the operation 1051 error_log('ChromaDB query failed: ' . $e->getMessage()); 1052 return []; 1053 } 1054 } 1055 1056 /** 1057 * Query ChromaDB for relevant documents and return text snippets 1058 * 1059 * Generates embeddings for the input text and queries ChromaDB for similar documents. 1060 * Returns the actual text snippets instead of document IDs. 1061 * 1062 * @param string $text The text to find similar documents for 1063 * @param int $limit Maximum number of documents to retrieve (default: 10) 1064 * @param array|null $where Optional filter conditions for metadata 1065 * @return array List of text snippets 1066 */ 1067 private function queryChromaDBSnippets($text, $limit = 10, $where = null) 1068 { 1069 try { 1070 // Get ChromaDB client and collection name 1071 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1072 // Query for similar documents 1073 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1074 1075 // Extract document texts from results 1076 $snippets = []; 1077 if (isset($results['documents'][0]) && is_array($results['documents'][0])) { 1078 foreach ($results['documents'][0] as $document) { 1079 $snippets[] = $document; 1080 } 1081 } 1082 1083 return $snippets; 1084 } catch (Exception $e) { 1085 // Log error but don't fail the operation 1086 error_log('ChromaDB query failed: ' . $e->getMessage()); 1087 return []; 1088 } 1089 } 1090 1091 /** 1092 * Query ChromaDB for a template document 1093 * 1094 * Generates embeddings for the input text and queries ChromaDB for a template document 1095 * by filtering with metadata 'template=true'. 1096 * 1097 * @param string $text The text to find a template for 1098 * @return array List of template document IDs (maximum 1) 1099 */ 1100 public function queryChromaDBTemplate($text) 1101 { 1102 $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']); 1103 1104 // Remove chunk number (e.g., "@2") from the ID to get the base document ID 1105 if (!empty($templateIds)) { 1106 $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]); 1107 } 1108 1109 return $templateIds; 1110 } 1111 1112} 1113