1<?php 2namespace dokuwiki\plugin\dokullm; 3 4/** 5 * LLM Client for the dokullm plugin 6 * 7 * This class provides methods to interact with an LLM API for various 8 * text processing tasks such as completion, rewriting, grammar correction, 9 * summarization, conclusion creation, text analysis, and custom prompts. 10 * 11 * The client handles: 12 * - API configuration and authentication 13 * - Prompt template loading and processing 14 * - Context-aware requests with metadata 15 * - DokuWiki page content retrieval 16 */ 17 18// must be run within Dokuwiki 19if (!defined('DOKU_INC')) { 20 die(); 21} 22 23/** 24 * Get configuration value for the dokullm plugin 25 * 26 * @param string $key Configuration key 27 * @param mixed $default Default value if key not found 28 * @return mixed Configuration value 29 */ 30function getConf($key, $default = null) { 31 global $conf; 32 return isset($conf['plugin']['dokullm'][$key]) ? $conf['plugin']['dokullm'][$key] : $default; 33} 34 35 36/** 37 * LLM Client class for handling API communications 38 * 39 * Manages configuration settings and provides methods for various 40 * text processing operations through an LLM API. 41 * Implements caching for tool calls to avoid duplicate processing. 42 */ 43class LlmClient 44{ 45 /** @var string The API endpoint URL */ 46 private $api_url; 47 48 /** @var array Cache for tool call results */ 49 private $toolCallCache = []; 50 51 /** @var string Current text for tool usage */ 52 private $currentText = ''; 53 54 /** @var array Track tool call counts to prevent infinite loops */ 55 private $toolCallCounts = []; 56 57 /** @var string The API authentication key */ 58 private $api_key; 59 60 /** @var string The model identifier to use */ 61 private $model; 62 63 /** @var int The request timeout in seconds */ 64 private $timeout; 65 66 /** @var float The temperature setting for response randomness */ 67 private $temperature; 68 69 /** @var float The top-p setting for nucleus sampling */ 70 private $top_p; 71 72 /** @var int The top-k setting for token selection */ 73 private $top_k; 74 75 /** @var float The min-p setting for minimum probability threshold */ 76 private $min_p; 77 78 /** @var bool Whether to enable thinking in the LLM responses */ 79 private $think; 80 81 /** 82 * Initialize the LLM client with configuration settings 83 * 84 * Retrieves configuration values from DokuWiki's configuration system 85 * for API URL, key, model, timeout, and LLM sampling parameters. 86 * 87 * Configuration values: 88 * - api_url: The LLM API endpoint URL 89 * - api_key: Authentication key for the API (optional) 90 * - model: The model identifier to use for requests 91 * - timeout: Request timeout in seconds 92 * - language: Language code for prompt templates 93 * - temperature: Temperature setting for response randomness (0.0-1.0) 94 * - top_p: Top-p (nucleus sampling) setting (0.0-1.0) 95 * - top_k: Top-k setting (integer >= 1) 96 * - min_p: Minimum probability threshold (0.0-1.0) 97 * - think: Whether to enable thinking in LLM responses (boolean) 98 */ 99 public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null) 100 { 101 $this->api_url = $api_url ?? $this->getConf('api_url'); 102 $this->api_key = $api_key ?? $this->getConf('api_key'); 103 $this->model = $model ?? $this->getConf('model'); 104 $this->timeout = $timeout ?? $this->getConf('timeout'); 105 $this->temperature = $temperature ?? $this->getConf('temperature'); 106 $this->top_p = $top_p ?? $this->getConf('top_p'); 107 $this->top_k = $top_k ?? $this->getConf('top_k'); 108 $this->min_p = $min_p ?? $this->getConf('min_p'); 109 $this->think = $think ?? $this->getConf('think', false); 110 } 111 112 113 114 public function process($action, $text, $metadata = [], $useContext = true) 115 { 116 // Store the current text for tool usage 117 $this->currentText = $text; 118 119 // Add text, think and action to metadata 120 $metadata['text'] = $text; 121 $metadata['think'] = $this->think ? '/think' : '/no_think'; 122 $metadata['action'] = $action; 123 124 // If we have 'template' in metadata, move it to 'page_template' 125 if (isset($metadata['template'])) { 126 $metadata['page_template'] = $metadata['template']; 127 unset($metadata['template']); 128 } 129 130 // If we have 'examples' in metadata, move it to 'page_examples' 131 if (isset($metadata['examples'])) { 132 $metadata['page_examples'] = $metadata['examples']; 133 unset($metadata['examples']); 134 } 135 136 // If we have 'previous' in metadata, move it to 'page_previous' 137 if (isset($metadata['previous'])) { 138 $metadata['page_previous'] = $metadata['previous']; 139 unset($metadata['previous']); 140 } 141 142 $prompt = $this->loadPrompt($action, $metadata); 143 144 return $this->callAPI($action, $prompt, $metadata, $useContext); 145 } 146 147 148 149 /** 150 * Create the provided text using the LLM 151 * 152 * Sends a prompt to the LLM asking it to create the given text. 153 * First queries ChromaDB for relevant documents to include as examples. 154 * If no template is defined, queries ChromaDB for a template. 155 * 156 * @param string $text The text to create 157 * @param array $metadata Optional metadata containing template, examples, and snippets 158 * @param bool $useContext Whether to include template and examples in the context (default: true) 159 * @return string The created text 160 */ 161 public function createReport($text, $metadata = [], $useContext = true) 162 { 163 // Store the current text for tool usage 164 $this->currentText = $text; 165 166 // Check if tools should be used based on configuration 167 $useTools = $this->getConf('use_tools', false); 168 169 // Only try to find template and add snippets if tools are not enabled 170 // When tools are enabled, the LLM will call get_template and get_examples as needed 171 if (!$useTools) { 172 // If no template is defined, try to find one using ChromaDB 173 if (empty($metadata['template'])) { 174 $templateResult = $this->queryChromaDBTemplate($text); 175 if (!empty($templateResult)) { 176 // Use the first result as template 177 $metadata['template'] = $templateResult[0]; 178 } 179 } 180 181 // Query ChromaDB for relevant documents to use as examples 182 $chromaResults = $this->queryChromaDBSnippets($text, 10); 183 184 // Add ChromaDB results to metadata as snippets 185 if (!empty($chromaResults)) { 186 // Merge with existing snippets 187 $metadata['snippets'] = array_merge( 188 isset($metadata['snippets']) ? $metadata['snippets'] : [], 189 $chromaResults 190 ); 191 } 192 } 193 194 $think = $this->think ? '/think' : '/no_think'; 195 $prompt = $this->loadPrompt('create', ['text' => $text, 'think' => $think]); 196 197 return $this->callAPI('create', $prompt, $metadata, $useContext); 198 } 199 200 /** 201 * Compare two texts and highlight differences 202 * 203 * Sends a prompt to the LLM asking it to compare two texts and 204 * highlight their similarities and differences. 205 * 206 * @param string $text The current text to compare 207 * @param array $metadata Optional metadata containing template, examples, and previous report reference 208 * @return string The comparison results 209 */ 210 public function compareText($text, $metadata = [], $useContext = false) 211 { 212 // Store the current text for tool usage 213 $this->currentText = $text; 214 215 // Load previous report from metadata if specified 216 $previousText = ''; 217 if (!empty($metadata['previous_report_page'])) { 218 $previousText = $this->getPageContent($metadata['previous_report_page']); 219 if ($previousText === false) { 220 $previousText = ''; 221 } 222 } 223 224 // Extract dates for placeholders 225 $currentDate = $this->getPageDate(); 226 $previousDate = !empty($metadata['previous_report_page']) ? 227 $this->getPageDate($metadata['previous_report_page']) : 228 ''; 229 230 $think = $this->think ? '/think' : '/no_think'; 231 $prompt = $this->loadPrompt('compare', [ 232 'text' => $text, 233 'previous_text' => $previousText, 234 'current_date' => $currentDate, 235 'previous_date' => $previousDate, 236 'think' => $think 237 ]); 238 239 return $this->callAPI('compare', $prompt, $metadata, $useContext); 240 } 241 242 /** 243 * Process text with a custom user prompt 244 * 245 * Sends a custom prompt to the LLM along with the provided text. 246 * 247 * @param string $text The text to process 248 * @param string $customPrompt The custom prompt to use 249 * @param array $metadata Optional metadata containing template and examples 250 * @param bool $useContext Whether to include template and examples in the context (default: true) 251 * @return string The processed text 252 */ 253 public function processCustomPrompt($text, $metadata = [], $useContext = true) 254 { 255 // Store the current text for tool usage 256 $this->currentText = $text; 257 258 // Format the prompt with the text and custom prompt 259 $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text; 260 261 return $this->callAPI('custom', $prompt, $metadata, $useContext); 262 } 263 264 /** 265 * Get the list of available tools for the LLM 266 * 267 * Defines the tools that can be used by the LLM during processing. 268 * 269 * @return array List of tool definitions 270 */ 271 private function getAvailableTools() 272 { 273 return [ 274 [ 275 'type' => 'function', 276 'function' => [ 277 'name' => 'get_document', 278 'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.', 279 'parameters' => [ 280 'type' => 'object', 281 'properties' => [ 282 'id' => [ 283 'type' => 'string', 284 'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.' 285 ] 286 ], 287 'required' => ['id'] 288 ] 289 ] 290 ], 291 [ 292 'type' => 'function', 293 'function' => [ 294 'name' => 'get_template', 295 'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.', 296 'parameters' => [ 297 'type' => 'object', 298 'properties' => [ 299 'language' => [ 300 'type' => 'string', 301 'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).', 302 'default' => 'ro' 303 ] 304 ] 305 ] 306 ] 307 ], 308 [ 309 'type' => 'function', 310 'function' => [ 311 'name' => 'get_examples', 312 'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.', 313 'parameters' => [ 314 'type' => 'object', 315 'properties' => [ 316 'count' => [ 317 'type' => 'integer', 318 'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.', 319 'default' => 5 320 ] 321 ] 322 ] 323 ] 324 ] 325 ]; 326 } 327 328 /** 329 * Call the LLM API with the specified prompt 330 * 331 * Makes an HTTP POST request to the configured API endpoint with 332 * the prompt and other parameters. Handles authentication if an 333 * API key is configured. 334 * 335 * The method constructs a conversation with system and user messages, 336 * including context information from metadata when available. 337 * 338 * Complex logic includes: 339 * 1. Loading and enhancing the system prompt with metadata context 340 * 2. Building the API request with model parameters 341 * 3. Handling authentication with API key if configured 342 * 4. Making the HTTP request with proper error handling 343 * 5. Parsing and validating the API response 344 * 6. Supporting tool usage with automatic tool calling when enabled 345 * 7. Implementing context enhancement with templates, examples, and snippets 346 * 347 * The context information includes: 348 * - Template content: Used as a starting point for the response 349 * - Example pages: Full content of specified example pages 350 * - Text snippets: Relevant text examples from ChromaDB 351 * 352 * When tools are enabled, the method supports automatic tool calling: 353 * - Tools can retrieve documents, templates, and examples as needed 354 * - Tool responses are cached to avoid duplicate calls with identical parameters 355 * - Infinite loop protection prevents excessive tool calls 356 * 357 * @param string $command The command name for loading command-specific system prompts 358 * @param string $prompt The prompt to send to the LLM as user message 359 * @param array $metadata Optional metadata containing template, examples, and snippets 360 * @param bool $useContext Whether to include template and examples in the context (default: true) 361 * @return string The response content from the LLM 362 * @throws Exception If the API request fails or returns unexpected format 363 */ 364 365 private function callAPI($command, $prompt, $metadata = [], $useContext = true) 366 { 367 // Load system prompt which provides general instructions to the LLM 368 $systemPrompt = $this->loadSystemPrompt($command, []); 369 370 // Enhance the prompt with context information from metadata 371 // This provides the LLM with additional context about templates and examples 372 if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) { 373 $contextInfo = "\n\n<context>\n"; 374 375 // Add template content if specified in metadata 376 if (!empty($metadata['template'])) { 377 $templateContent = $this->getPageContent($metadata['template']); 378 if ($templateContent !== false) { 379 $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n"; 380 } 381 } 382 383 // Add example pages content if specified in metadata 384 if (!empty($metadata['examples'])) { 385 $examplesContent = []; 386 foreach ($metadata['examples'] as $example) { 387 $content = $this->getPageContent($example); 388 if ($content !== false) { 389 $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n"; 390 } 391 } 392 if (!empty($examplesContent)) { 393 $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n"; 394 } 395 } 396 397 // Add text snippets if specified in metadata 398 if (!empty($metadata['snippets'])) { 399 $snippetsContent = []; 400 foreach ($metadata['snippets'] as $index => $snippet) { 401 // These are text snippets from ChromaDB 402 $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n"; 403 } 404 if (!empty($snippetsContent)) { 405 $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n"; 406 } 407 } 408 409 $contextInfo .= "\n</context>\n"; 410 411 // Append context information to system prompt 412 $prompt = $contextInfo . "\n\n" . $prompt; 413 } 414 415 // Check if tools should be used based on configuration 416 $useTools = $this->getConf('use_tools', false); 417 418 // Prepare API request data with model parameters 419 $data = [ 420 'model' => $this->model, 421 'messages' => [ 422 ['role' => 'system', 'content' => $systemPrompt], 423 ['role' => 'user', 'content' => $prompt] 424 ], 425 'max_tokens' => 6144, 426 'stream' => false, 427 'keep_alive' => '30m', 428 'think' => true 429 ]; 430 431 // Add tools to the request only if useTools is true 432 if ($useTools) { 433 // Define available tools 434 $data['tools'] = $this->getAvailableTools(); 435 $data['tool_choice'] = 'auto'; 436 $data['parallel_tool_calls'] = false; 437 } 438 439 // Only add parameters if they are defined and not null 440 if ($this->temperature !== null) { 441 $data['temperature'] = $this->temperature; 442 } 443 if ($this->top_p !== null) { 444 $data['top_p'] = $this->top_p; 445 } 446 if ($this->top_k !== null) { 447 $data['top_k'] = $this->top_k; 448 } 449 if ($this->min_p !== null) { 450 $data['min_p'] = $this->min_p; 451 } 452 453 // Make an API call with tool responses 454 return $this->callAPIWithTools($data, false); 455 } 456 457 /** 458 * Handle tool calls from the LLM 459 * 460 * Processes tool calls made by the LLM and returns appropriate responses. 461 * Implements caching to avoid duplicate calls with identical parameters. 462 * 463 * @param array $toolCall The tool call data from the LLM 464 * @return array The tool response message 465 */ 466 private function handleToolCall($toolCall) 467 { 468 $toolName = $toolCall['function']['name']; 469 $arguments = json_decode($toolCall['function']['arguments'], true); 470 471 // Create a cache key from the tool name and arguments 472 $cacheKey = md5($toolName . serialize($arguments)); 473 474 // Check if we have a cached result for this tool call 475 if (isset($this->toolCallCache[$cacheKey])) { 476 // Return cached result and indicate it was found in cache 477 $toolResponse = $this->toolCallCache[$cacheKey]; 478 // Update with current tool call ID 479 $toolResponse['tool_call_id'] = $toolCall['id']; 480 $toolResponse['cached'] = true; // Indicate this response was cached 481 return $toolResponse; 482 } 483 484 $toolResponse = [ 485 'role' => 'tool', 486 'tool_call_id' => $toolCall['id'], 487 'cached' => false // Indicate this is a fresh response 488 ]; 489 490 switch ($toolName) { 491 case 'get_document': 492 $documentId = $arguments['id']; 493 $content = $this->getPageContent($documentId); 494 if ($content === false) { 495 $toolResponse['content'] = 'Document not found: ' . $documentId; 496 } else { 497 $toolResponse['content'] = $content; 498 } 499 break; 500 501 case 'get_template': 502 // Get template content using the convenience function 503 $toolResponse['content'] = $this->getTemplateContent(); 504 break; 505 506 case 'get_examples': 507 // Get examples content using the convenience function 508 $count = isset($arguments['count']) ? (int)$arguments['count'] : 5; 509 $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>'; 510 break; 511 512 default: 513 $toolResponse['content'] = 'Unknown tool: ' . $toolName; 514 } 515 516 // Cache the result for future calls with the same parameters 517 $cacheEntry = $toolResponse; 518 // Remove tool_call_id and cached flag from cache as they change per call 519 unset($cacheEntry['tool_call_id']); 520 unset($cacheEntry['cached']); 521 $this->toolCallCache[$cacheKey] = $cacheEntry; 522 523 return $toolResponse; 524 } 525 526 /** 527 * Make an API call with tool responses 528 * 529 * Sends a follow-up request to the LLM with tool responses. 530 * Implements complex logic for handling tool calls with caching and loop protection. 531 * 532 * Complex logic includes: 533 * 1. Making HTTP requests with proper authentication and error handling 534 * 2. Processing tool calls from the LLM response 535 * 3. Caching tool responses to avoid duplicate calls with identical parameters 536 * 4. Tracking tool call counts to prevent infinite loops 537 * 5. Implementing loop protection with call count limits 538 * 6. Handling recursive tool calls until final content is generated 539 * 540 * Loop protection works by: 541 * - Tracking individual tool call counts (max 3 per tool) 542 * - Tracking total tool calls (max 10 total) 543 * - Disabling tools when limits are exceeded to break potential loops 544 * 545 * @param array $data The API request data including messages with tool responses 546 * @param bool $toolsCalled Whether tools have already been called (used for loop protection) 547 * @param bool $useTools Whether to process tool calls (used for loop protection) 548 * @return string The final response content 549 */ 550 private function callAPIWithTools($data, $toolsCalled = false, $useTools = true) 551 { 552 // Set up HTTP headers, including authentication if API key is configured 553 $headers = [ 554 'Content-Type: application/json' 555 ]; 556 557 if (!empty($this->api_key)) { 558 $headers[] = 'Authorization: Bearer ' . $this->api_key; 559 } 560 561 // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops 562 if ($toolsCalled) { 563 unset($data['tools']); 564 unset($data['tool_choice']); 565 } 566 567 // Initialize and configure cURL for the API request 568 $ch = curl_init(); 569 curl_setopt($ch, CURLOPT_URL, $this->api_url); 570 curl_setopt($ch, CURLOPT_POST, true); 571 curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); 572 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 573 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 574 curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); 575 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 576 577 // Execute the API request 578 $response = curl_exec($ch); 579 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 580 $error = curl_error($ch); 581 curl_close($ch); 582 583 // Handle cURL errors 584 if ($error) { 585 throw new Exception('API request failed: ' . $error); 586 } 587 588 // Handle HTTP errors 589 if ($httpCode !== 200) { 590 throw new Exception('API request failed with HTTP code: ' . $httpCode); 591 } 592 593 // Parse and validate the JSON response 594 $result = json_decode($response, true); 595 596 // Extract the content from the response if available 597 if (isset($result['choices'][0]['message']['content'])) { 598 $content = trim($result['choices'][0]['message']['content']); 599 // Reset tool call counts when we get final content 600 $this->toolCallCounts = []; 601 return $content; 602 } 603 604 // Handle tool calls if present 605 if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) { 606 $toolCalls = $result['choices'][0]['message']['tool_calls']; 607 // Start with original messages 608 $messages = $data['messages']; 609 // Add assistant's message with tool calls, keeping all original fields except for content (which is null) 610 $assistantMessage = []; 611 foreach ($result['choices'][0]['message'] as $key => $value) { 612 if ($key !== 'content') { 613 $assistantMessage[$key] = $value; 614 } 615 } 616 // Add assistant's message with tool calls 617 $messages[] = $assistantMessage; 618 619 // Process each tool call and track counts to prevent infinite loops 620 foreach ($toolCalls as $toolCall) { 621 $toolName = $toolCall['function']['name']; 622 // Increment tool call count 623 if (!isset($this->toolCallCounts[$toolName])) { 624 $this->toolCallCounts[$toolName] = 0; 625 } 626 $this->toolCallCounts[$toolName]++; 627 628 $toolResponse = $this->handleToolCall($toolCall); 629 $messages[] = $toolResponse; 630 } 631 632 // Check if any tool has been called more than 3 times 633 $toolsCalledCount = 0; 634 foreach ($this->toolCallCounts as $count) { 635 if ($count > 3) { 636 // If any tool called more than 3 times, disable tools to break loop 637 $toolsCalled = true; 638 break; 639 } 640 $toolsCalledCount += $count; 641 } 642 643 // If total tool calls exceed 10, also disable tools 644 if ($toolsCalledCount > 10) { 645 $toolsCalled = true; 646 } 647 648 // Make another API call with tool responses 649 $data['messages'] = $messages; 650 return $this->callAPIWithTools($data, $toolsCalled, $useTools); 651 } 652 653 // Throw exception for unexpected response format 654 throw new Exception('Unexpected API response format'); 655 } 656 657 /** 658 * Load a prompt template from a DokuWiki page and replace placeholders 659 * 660 * Loads prompt templates from DokuWiki pages with IDs in the format 661 * dokullm:prompts:LANGUAGE:PROMPT_NAME 662 * 663 * The method implements a language fallback mechanism: 664 * 1. First tries to load the prompt in the configured language 665 * 2. If not found, falls back to English prompts 666 * 3. Throws an exception if neither is available 667 * 668 * After loading the prompt, it scans for placeholders and automatically 669 * adds missing ones with appropriate values before replacing all placeholders. 670 * 671 * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite') 672 * @param array $variables Associative array of placeholder => value pairs 673 * @return string The processed prompt with placeholders replaced 674 * @throws Exception If the prompt page cannot be loaded in any language 675 */ 676 private function loadPrompt($promptName, $variables = []) 677 { 678 $language = $this->getConf('language'); 679 680 // Default to 'en' if language is 'default' or not set 681 if ($language === 'default' || empty($language)) { 682 $language = 'en'; 683 } 684 685 // Construct the page ID for the prompt in the configured language 686 $promptPageId = 'dokullm:prompts:' . $language . ':' . $promptName; 687 688 // Try to get the content of the prompt page in the configured language 689 $prompt = $this->getPageContent($promptPageId); 690 691 // If the language-specific prompt doesn't exist, try English as fallback 692 if ($prompt === false && $language !== 'en') { 693 $promptPageId = 'dokullm:prompts:en:' . $promptName; 694 $prompt = $this->getPageContent($promptPageId); 695 } 696 697 // If still no prompt found, throw an exception 698 if ($prompt === false) { 699 throw new Exception('Prompt page not found: ' . $promptPageId); 700 } 701 702 // Find placeholders in the prompt 703 $placeholders = $this->findPlaceholders($prompt); 704 705 // Add missing placeholders with appropriate values 706 foreach ($placeholders as $placeholder) { 707 // Skip if already provided in variables 708 if (isset($variables[$placeholder])) { 709 continue; 710 } 711 712 // Add appropriate values for specific placeholders 713 switch ($placeholder) { 714 case 'template': 715 // If we have a page_template in variables, use it 716 $variables[$placeholder] = $this->getTemplateContent($variables['page_template']); 717 break; 718 719 case 'snippets': 720 $variables[$placeholder] = $this->getSnippets(10); 721 break; 722 723 case 'examples': 724 // If we have example page IDs in metadata, add examples content 725 $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']); 726 break; 727 728 case 'previous': 729 // If we have a previous report page ID in metadata, add previous content 730 $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']); 731 732 // Add current and previous dates to metadata 733 $variables['current_date'] = $this->getPageDate(); 734 $variables['previous_date'] = !empty($variables['page_previous']) ? 735 $this->getPageDate($variables['page_previous']) : 736 ''; 737 break; 738 739 default: 740 // For other placeholders, leave them empty or set a default value 741 $variables[$placeholder] = ''; 742 break; 743 } 744 } 745 746 // Replace placeholders with actual values 747 // Placeholders are in the format {placeholder_name} 748 foreach ($variables as $placeholder => $value) { 749 $prompt = str_replace('{' . $placeholder . '}', $value, $prompt); 750 } 751 752 // Return the processed prompt 753 return $prompt; 754 } 755 756 /** 757 * Load system prompt with optional command-specific appendage 758 * 759 * Loads the main system prompt and appends any command-specific system prompt 760 * if available. 761 * 762 * @param string $action The action/command name 763 * @param array $variables Associative array of placeholder => value pairs 764 * @return string The combined system prompt 765 */ 766 private function loadSystemPrompt($action, $variables = []) 767 { 768 // Load system prompt which provides general instructions to the LLM 769 $systemPrompt = $this->loadPrompt('system', $variables); 770 771 // Check if there's a command-specific system prompt appendage 772 if (!empty($action)) { 773 try { 774 $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables); 775 if ($commandSystemPrompt !== false) { 776 $systemPrompt .= "\n" . $commandSystemPrompt; 777 } 778 } catch (Exception $e) { 779 // Ignore exceptions when loading command-specific system prompt 780 // This allows the main system prompt to still be used 781 } 782 } 783 784 return $systemPrompt; 785 } 786 787 /** 788 * Get the content of a DokuWiki page 789 * 790 * Retrieves the raw content of a DokuWiki page by its ID. 791 * Used for loading template and example page content for context. 792 * 793 * @param string $pageId The page ID to retrieve 794 * @return string|false The page content or false if not found/readable 795 */ 796 public function getPageContent($pageId) 797 { 798 // Convert page ID to file path 799 $pageFile = wikiFN($pageId); 800 801 // Check if file exists and is readable 802 if (file_exists($pageFile) && is_readable($pageFile)) { 803 return file_get_contents($pageFile); 804 } 805 806 return false; 807 } 808 809 /** 810 * Extract date from page ID or file timestamp 811 * 812 * Attempts to extract a date in YYmmdd format from the page ID. 813 * If not found, uses the file's last modification timestamp. 814 * 815 * @param string $pageId Optional page ID to extract date from (defaults to current page) 816 * @return string Formatted date string (YYYY-MM-DD) 817 */ 818 private function getPageDate($pageId = null) 819 { 820 global $ID; 821 822 // Use provided page ID or current page ID 823 $targetPageId = $pageId ?: $ID; 824 825 // Try to extract date from page ID (looking for YYmmdd pattern) 826 if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) { 827 // Convert YYmmdd to YYYY-MM-DD 828 $year = $matches[1]; 829 $month = $matches[2]; 830 $day = $matches[3]; 831 832 // Assume 20xx for years 00-69, 19xx for years 70-99 833 $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year; 834 835 return $fullYear . '-' . $month . '-' . $day; 836 } 837 838 // Fallback to file timestamp 839 $pageFile = wikiFN($targetPageId); 840 if (file_exists($pageFile)) { 841 $timestamp = filemtime($pageFile); 842 return date('Y-m-d', $timestamp); 843 } 844 845 // Return empty string if no date can be determined 846 return ''; 847 } 848 849 /** 850 * Get current text 851 * 852 * Retrieves the current text stored from the process function. 853 * 854 * @return string The current text 855 */ 856 private function getCurrentText() 857 { 858 return $this->currentText; 859 } 860 861 /** 862 * Scan text for placeholders 863 * 864 * Finds all placeholders in the format {placeholder_name} in the provided text 865 * and returns an array of unique placeholder names. 866 * 867 * @param string $text The text to scan for placeholders 868 * @return array List of unique placeholder names found in the text 869 */ 870 public function findPlaceholders($text) 871 { 872 $placeholders = []; 873 $pattern = '/\{([^}]+)\}/'; 874 875 if (preg_match_all($pattern, $text, $matches)) { 876 // Get unique placeholder names 877 $placeholders = array_unique($matches[1]); 878 } 879 880 return $placeholders; 881 } 882 883 /** 884 * Get template content for the current text 885 * 886 * Convenience function to retrieve template content. If a pageId is provided, 887 * retrieves content directly from that page. Otherwise, queries ChromaDB for 888 * a relevant template based on the current text. 889 * 890 * @param string|null $pageId Optional page ID to retrieve template from directly 891 * @return string The template content or empty string if not found 892 */ 893 private function getTemplateContent($pageId = null) 894 { 895 // If pageId is provided, use it directly 896 if ($pageId !== null) { 897 $templateContent = $this->getPageContent($pageId); 898 if ($templateContent !== false) { 899 return $templateContent; 900 } 901 } 902 903 // Otherwise, get template suggestion for the current text 904 $pageId = $this->queryChromaDBTemplate($this->getCurrentText()); 905 if (!empty($pageId)) { 906 $templateContent = $this->getPageContent($pageId[0]); 907 if ($templateContent !== false) { 908 return $templateContent; 909 } 910 } 911 return '( no template )'; 912 } 913 914 /** 915 * Get snippets content for the current text 916 * 917 * Convenience function to retrieve relevant snippets for the current text. 918 * Queries ChromaDB for relevant snippets and returns them formatted. 919 * 920 * @param int $count Number of snippets to retrieve (default: 10) 921 * @return string Formatted snippets content or empty string if not found 922 */ 923 private function getSnippets($count = 10) 924 { 925 // Get example snippets for the current text 926 $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count); 927 if (!empty($snippets)) { 928 $formattedSnippets = []; 929 foreach ($snippets as $index => $snippet) { 930 $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>'; 931 } 932 return implode("\n", $formattedSnippets); 933 } 934 return '( no examples )'; 935 } 936 937 /** 938 * Get examples content from example page IDs 939 * 940 * Convenience function to retrieve content from example pages. 941 * Returns the content of each page packed in XML elements. 942 * 943 * @param array $exampleIds List of example page IDs 944 * @return string Formatted examples content or empty string if not found 945 */ 946 private function getExamplesContent($exampleIds = []) 947 { 948 if (empty($exampleIds) || !is_array($exampleIds)) { 949 return '( no examples )'; 950 } 951 952 $examplesContent = []; 953 foreach ($exampleIds as $index => $exampleId) { 954 $content = $this->getPageContent($exampleId); 955 if ($content !== false) { 956 $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>'; 957 } 958 } 959 960 return implode("\n", $examplesContent); 961 } 962 963 /** 964 * Get previous report content from previous page ID 965 * 966 * Convenience function to retrieve content from a previous report page. 967 * Returns the content of the previous page or a default message if not found. 968 * 969 * @param string $previousId Previous page ID 970 * @return string Previous report content or default message if not found 971 */ 972 private function getPreviousContent($previousId = '') 973 { 974 if (empty($previousId)) { 975 return '( no previous report )'; 976 } 977 978 $content = $this->getPageContent($previousId); 979 if ($content !== false) { 980 return $content; 981 } 982 983 return '( previous report not found )'; 984 } 985 986 /** 987 * Get ChromaDB client with configuration 988 * 989 * Creates and returns a ChromaDB client with the appropriate configuration. 990 * Extracts modality from the current page ID to use as the collection name. 991 * 992 * @return array Array containing the ChromaDB client and collection name 993 */ 994 private function getChromaDBClient() 995 { 996 // Get ChromaDB configuration from DokuWiki plugin configuration 997 $chromaHost = $this->getConf('chroma_host', 'localhost'); 998 $chromaPort = $this->getConf('chroma_port', 8000); 999 $chromaTenant = $this->getConf('chroma_tenant', 'dokullm'); 1000 $chromaDatabase = $this->getConf('chroma_database', 'dokullm'); 1001 $chromaDefaultCollection = $this->getConf('chroma_collection', 'documents'); 1002 $ollamaHost = $this->getConf('ollama_host', 'localhost'); 1003 $ollamaPort = $this->getConf('ollama_port', 11434); 1004 $ollamaModel = $this->getConf('ollama_embeddings_model', 'nomic-embed-text'); 1005 1006 // Use the first part of the current page ID as collection name, fallback to default 1007 global $ID; 1008 $chromaCollection = $chromaDefaultCollection; // Default collection name 1009 1010 if (!empty($ID)) { 1011 // Split the page ID by ':' and take the first part as collection name 1012 $parts = explode(':', $ID); 1013 if (isset($parts[0]) && !empty($parts[0])) { 1014 // If the first part is 'playground', use the default collection 1015 // Otherwise, use the first part as the collection name 1016 if ($parts[0] === 'playground') { 1017 $chromaCollection = $chromaDefaultCollection; 1018 } else { 1019 $chromaCollection = $parts[0]; 1020 } 1021 } 1022 } 1023 1024 // Create ChromaDB client with all required parameters 1025 $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient( 1026 $chromaHost, 1027 $chromaPort, 1028 $chromaTenant, 1029 $chromaDatabase, 1030 $ollamaHost, 1031 $ollamaPort, 1032 $ollamaModel 1033 ); 1034 1035 1036 return [$chromaClient, $chromaCollection]; 1037 } 1038 1039 /** 1040 * Query ChromaDB for relevant documents 1041 * 1042 * Generates embeddings for the input text and queries ChromaDB for similar documents. 1043 * Extracts modality from the current page ID to use as the collection name. 1044 * 1045 * @param string $text The text to find similar documents for 1046 * @param int $limit Maximum number of documents to retrieve (default: 5) 1047 * @param array|null $where Optional filter conditions for metadata 1048 * @return array List of document IDs 1049 */ 1050 private function queryChromaDB($text, $limit = 5, $where = null) 1051 { 1052 try { 1053 // Get ChromaDB client and collection name 1054 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1055 // Query for similar documents 1056 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1057 1058 // Extract document IDs from results 1059 $documentIds = []; 1060 if (isset($results['ids'][0]) && is_array($results['ids'][0])) { 1061 foreach ($results['ids'][0] as $id) { 1062 // Use the ChromaDB ID directly without conversion 1063 $documentIds[] = $id; 1064 } 1065 } 1066 1067 return $documentIds; 1068 } catch (Exception $e) { 1069 // Log error but don't fail the operation 1070 error_log('ChromaDB query failed: ' . $e->getMessage()); 1071 return []; 1072 } 1073 } 1074 1075 /** 1076 * Query ChromaDB for relevant documents and return text snippets 1077 * 1078 * Generates embeddings for the input text and queries ChromaDB for similar documents. 1079 * Returns the actual text snippets instead of document IDs. 1080 * 1081 * @param string $text The text to find similar documents for 1082 * @param int $limit Maximum number of documents to retrieve (default: 10) 1083 * @param array|null $where Optional filter conditions for metadata 1084 * @return array List of text snippets 1085 */ 1086 private function queryChromaDBSnippets($text, $limit = 10, $where = null) 1087 { 1088 try { 1089 // Get ChromaDB client and collection name 1090 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1091 // Query for similar documents 1092 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1093 1094 // Extract document texts from results 1095 $snippets = []; 1096 if (isset($results['documents'][0]) && is_array($results['documents'][0])) { 1097 foreach ($results['documents'][0] as $document) { 1098 $snippets[] = $document; 1099 } 1100 } 1101 1102 return $snippets; 1103 } catch (Exception $e) { 1104 // Log error but don't fail the operation 1105 error_log('ChromaDB query failed: ' . $e->getMessage()); 1106 return []; 1107 } 1108 } 1109 1110 /** 1111 * Query ChromaDB for a template document 1112 * 1113 * Generates embeddings for the input text and queries ChromaDB for a template document 1114 * by filtering with metadata 'template=true'. 1115 * 1116 * @param string $text The text to find a template for 1117 * @return array List of template document IDs (maximum 1) 1118 */ 1119 public function queryChromaDBTemplate($text) 1120 { 1121 $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']); 1122 1123 // Remove chunk number (e.g., "@2") from the ID to get the base document ID 1124 if (!empty($templateIds)) { 1125 $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]); 1126 } 1127 1128 return $templateIds; 1129 } 1130 1131} 1132