1<?php 2namespace dokuwiki\plugin\dokullm; 3 4/** 5 * LLM Client for the dokullm plugin 6 * 7 * This class provides methods to interact with an LLM API for various 8 * text processing tasks such as completion, rewriting, grammar correction, 9 * summarization, conclusion creation, text analysis, and custom prompts. 10 * 11 * The client handles: 12 * - API configuration and authentication 13 * - Prompt template loading and processing 14 * - Context-aware requests with metadata 15 * - DokuWiki page content retrieval 16 */ 17 18// must be run within Dokuwiki 19if (!defined('DOKU_INC')) { 20 die(); 21} 22 23 24 25/** 26 * LLM Client class for handling API communications 27 * 28 * Manages configuration settings and provides methods for various 29 * text processing operations through an LLM API. 30 * Implements caching for tool calls to avoid duplicate processing. 31 */ 32class LlmClient 33{ 34 /** @var string The API endpoint URL */ 35 private $api_url; 36 37 /** @var array Cache for tool call results */ 38 private $toolCallCache = []; 39 40 /** @var string Current text for tool usage */ 41 private $currentText = ''; 42 43 /** @var array Track tool call counts to prevent infinite loops */ 44 private $toolCallCounts = []; 45 46 /** @var string The API authentication key */ 47 private $api_key; 48 49 /** @var string The model identifier to use */ 50 private $model; 51 52 /** @var int The request timeout in seconds */ 53 private $timeout; 54 55 /** @var float The temperature setting for response randomness */ 56 private $temperature; 57 58 /** @var float The top-p setting for nucleus sampling */ 59 private $top_p; 60 61 /** @var int The top-k setting for token selection */ 62 private $top_k; 63 64 /** @var float The min-p setting for minimum probability threshold */ 65 private $min_p; 66 67 /** @var bool Whether to enable thinking in the LLM responses */ 68 private $think; 69 70 /** 71 * Initialize the LLM client with configuration settings 72 * 73 * Retrieves configuration values from DokuWiki's configuration system 74 * for API URL, key, model, timeout, and LLM sampling parameters. 75 * 76 * Configuration values: 77 * - api_url: The LLM API endpoint URL 78 * - api_key: Authentication key for the API (optional) 79 * - model: The model identifier to use for requests 80 * - timeout: Request timeout in seconds 81 * - language: Language code for prompt templates 82 * - temperature: Temperature setting for response randomness (0.0-1.0) 83 * - top_p: Top-p (nucleus sampling) setting (0.0-1.0) 84 * - top_k: Top-k setting (integer >= 1) 85 * - min_p: Minimum probability threshold (0.0-1.0) 86 * - think: Whether to enable thinking in LLM responses (boolean) 87 */ 88 public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null) 89 { 90 $this->api_url = $api_url ?? $this->getConf('api_url'); 91 $this->api_key = $api_key ?? $this->getConf('api_key'); 92 $this->model = $model ?? $this->getConf('model'); 93 $this->timeout = $timeout ?? $this->getConf('timeout'); 94 $this->temperature = $temperature ?? $this->getConf('temperature'); 95 $this->top_p = $top_p ?? $this->getConf('top_p'); 96 $this->top_k = $top_k ?? $this->getConf('top_k'); 97 $this->min_p = $min_p ?? $this->getConf('min_p'); 98 $this->think = $think ?? $this->getConf('think', false); 99 } 100 101 /** 102 * Get configuration value for the dokullm plugin 103 * 104 * @param string $key Configuration key 105 * @param mixed $default Default value if key not found 106 * @return mixed Configuration value 107 */ 108 private function getConf($key, $default = null) { 109 global $conf; 110 return isset($conf['plugin']['dokullm'][$key]) ? $conf['plugin']['dokullm'][$key] : $default; 111 } 112 113 114 115 public function process($action, $text, $metadata = [], $useContext = true) 116 { 117 // Store the current text for tool usage 118 $this->currentText = $text; 119 120 // Add text, think and action to metadata 121 $metadata['text'] = $text; 122 $metadata['think'] = $this->think ? '/think' : '/no_think'; 123 $metadata['action'] = $action; 124 125 // If we have 'template' in metadata, move it to 'page_template' 126 if (isset($metadata['template'])) { 127 $metadata['page_template'] = $metadata['template']; 128 unset($metadata['template']); 129 } 130 131 // If we have 'examples' in metadata, move it to 'page_examples' 132 if (isset($metadata['examples'])) { 133 $metadata['page_examples'] = $metadata['examples']; 134 unset($metadata['examples']); 135 } 136 137 // If we have 'previous' in metadata, move it to 'page_previous' 138 if (isset($metadata['previous'])) { 139 $metadata['page_previous'] = $metadata['previous']; 140 unset($metadata['previous']); 141 } 142 143 $prompt = $this->loadPrompt($action, $metadata); 144 145 return $this->callAPI($action, $prompt, $metadata, $useContext); 146 } 147 148 149 150 /** 151 * Create the provided text using the LLM 152 * 153 * Sends a prompt to the LLM asking it to create the given text. 154 * First queries ChromaDB for relevant documents to include as examples. 155 * If no template is defined, queries ChromaDB for a template. 156 * 157 * @param string $text The text to create 158 * @param array $metadata Optional metadata containing template, examples, and snippets 159 * @param bool $useContext Whether to include template and examples in the context (default: true) 160 * @return string The created text 161 */ 162 public function createReport($text, $metadata = [], $useContext = true) 163 { 164 // Store the current text for tool usage 165 $this->currentText = $text; 166 167 // Check if tools should be used based on configuration 168 $useTools = $this->getConf('use_tools', false); 169 170 // Only try to find template and add snippets if tools are not enabled 171 // When tools are enabled, the LLM will call get_template and get_examples as needed 172 if (!$useTools) { 173 // If no template is defined, try to find one using ChromaDB 174 if (empty($metadata['template'])) { 175 $templateResult = $this->queryChromaDBTemplate($text); 176 if (!empty($templateResult)) { 177 // Use the first result as template 178 $metadata['template'] = $templateResult[0]; 179 } 180 } 181 182 // Query ChromaDB for relevant documents to use as examples 183 $chromaResults = $this->queryChromaDBSnippets($text, 10); 184 185 // Add ChromaDB results to metadata as snippets 186 if (!empty($chromaResults)) { 187 // Merge with existing snippets 188 $metadata['snippets'] = array_merge( 189 isset($metadata['snippets']) ? $metadata['snippets'] : [], 190 $chromaResults 191 ); 192 } 193 } 194 195 $think = $this->think ? '/think' : '/no_think'; 196 $prompt = $this->loadPrompt('create', ['text' => $text, 'think' => $think]); 197 198 return $this->callAPI('create', $prompt, $metadata, $useContext); 199 } 200 201 /** 202 * Compare two texts and highlight differences 203 * 204 * Sends a prompt to the LLM asking it to compare two texts and 205 * highlight their similarities and differences. 206 * 207 * @param string $text The current text to compare 208 * @param array $metadata Optional metadata containing template, examples, and previous report reference 209 * @return string The comparison results 210 */ 211 public function compareText($text, $metadata = [], $useContext = false) 212 { 213 // Store the current text for tool usage 214 $this->currentText = $text; 215 216 // Load previous report from metadata if specified 217 $previousText = ''; 218 if (!empty($metadata['previous_report_page'])) { 219 $previousText = $this->getPageContent($metadata['previous_report_page']); 220 if ($previousText === false) { 221 $previousText = ''; 222 } 223 } 224 225 // Extract dates for placeholders 226 $currentDate = $this->getPageDate(); 227 $previousDate = !empty($metadata['previous_report_page']) ? 228 $this->getPageDate($metadata['previous_report_page']) : 229 ''; 230 231 $think = $this->think ? '/think' : '/no_think'; 232 $prompt = $this->loadPrompt('compare', [ 233 'text' => $text, 234 'previous_text' => $previousText, 235 'current_date' => $currentDate, 236 'previous_date' => $previousDate, 237 'think' => $think 238 ]); 239 240 return $this->callAPI('compare', $prompt, $metadata, $useContext); 241 } 242 243 /** 244 * Process text with a custom user prompt 245 * 246 * Sends a custom prompt to the LLM along with the provided text. 247 * 248 * @param string $text The text to process 249 * @param string $customPrompt The custom prompt to use 250 * @param array $metadata Optional metadata containing template and examples 251 * @param bool $useContext Whether to include template and examples in the context (default: true) 252 * @return string The processed text 253 */ 254 public function processCustomPrompt($text, $metadata = [], $useContext = true) 255 { 256 // Store the current text for tool usage 257 $this->currentText = $text; 258 259 // Format the prompt with the text and custom prompt 260 $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text; 261 262 return $this->callAPI('custom', $prompt, $metadata, $useContext); 263 } 264 265 /** 266 * Get the list of available tools for the LLM 267 * 268 * Defines the tools that can be used by the LLM during processing. 269 * 270 * @return array List of tool definitions 271 */ 272 private function getAvailableTools() 273 { 274 return [ 275 [ 276 'type' => 'function', 277 'function' => [ 278 'name' => 'get_document', 279 'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.', 280 'parameters' => [ 281 'type' => 'object', 282 'properties' => [ 283 'id' => [ 284 'type' => 'string', 285 'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.' 286 ] 287 ], 288 'required' => ['id'] 289 ] 290 ] 291 ], 292 [ 293 'type' => 'function', 294 'function' => [ 295 'name' => 'get_template', 296 'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.', 297 'parameters' => [ 298 'type' => 'object', 299 'properties' => [ 300 'language' => [ 301 'type' => 'string', 302 'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).', 303 'default' => 'ro' 304 ] 305 ] 306 ] 307 ] 308 ], 309 [ 310 'type' => 'function', 311 'function' => [ 312 'name' => 'get_examples', 313 'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.', 314 'parameters' => [ 315 'type' => 'object', 316 'properties' => [ 317 'count' => [ 318 'type' => 'integer', 319 'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.', 320 'default' => 5 321 ] 322 ] 323 ] 324 ] 325 ] 326 ]; 327 } 328 329 /** 330 * Call the LLM API with the specified prompt 331 * 332 * Makes an HTTP POST request to the configured API endpoint with 333 * the prompt and other parameters. Handles authentication if an 334 * API key is configured. 335 * 336 * The method constructs a conversation with system and user messages, 337 * including context information from metadata when available. 338 * 339 * Complex logic includes: 340 * 1. Loading and enhancing the system prompt with metadata context 341 * 2. Building the API request with model parameters 342 * 3. Handling authentication with API key if configured 343 * 4. Making the HTTP request with proper error handling 344 * 5. Parsing and validating the API response 345 * 6. Supporting tool usage with automatic tool calling when enabled 346 * 7. Implementing context enhancement with templates, examples, and snippets 347 * 348 * The context information includes: 349 * - Template content: Used as a starting point for the response 350 * - Example pages: Full content of specified example pages 351 * - Text snippets: Relevant text examples from ChromaDB 352 * 353 * When tools are enabled, the method supports automatic tool calling: 354 * - Tools can retrieve documents, templates, and examples as needed 355 * - Tool responses are cached to avoid duplicate calls with identical parameters 356 * - Infinite loop protection prevents excessive tool calls 357 * 358 * @param string $command The command name for loading command-specific system prompts 359 * @param string $prompt The prompt to send to the LLM as user message 360 * @param array $metadata Optional metadata containing template, examples, and snippets 361 * @param bool $useContext Whether to include template and examples in the context (default: true) 362 * @return string The response content from the LLM 363 * @throws Exception If the API request fails or returns unexpected format 364 */ 365 366 private function callAPI($command, $prompt, $metadata = [], $useContext = true) 367 { 368 // Load system prompt which provides general instructions to the LLM 369 $systemPrompt = $this->loadSystemPrompt($command, []); 370 371 // Enhance the prompt with context information from metadata 372 // This provides the LLM with additional context about templates and examples 373 if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) { 374 $contextInfo = "\n\n<context>\n"; 375 376 // Add template content if specified in metadata 377 if (!empty($metadata['template'])) { 378 $templateContent = $this->getPageContent($metadata['template']); 379 if ($templateContent !== false) { 380 $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n"; 381 } 382 } 383 384 // Add example pages content if specified in metadata 385 if (!empty($metadata['examples'])) { 386 $examplesContent = []; 387 foreach ($metadata['examples'] as $example) { 388 $content = $this->getPageContent($example); 389 if ($content !== false) { 390 $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n"; 391 } 392 } 393 if (!empty($examplesContent)) { 394 $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n"; 395 } 396 } 397 398 // Add text snippets if specified in metadata 399 if (!empty($metadata['snippets'])) { 400 $snippetsContent = []; 401 foreach ($metadata['snippets'] as $index => $snippet) { 402 // These are text snippets from ChromaDB 403 $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n"; 404 } 405 if (!empty($snippetsContent)) { 406 $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n"; 407 } 408 } 409 410 $contextInfo .= "\n</context>\n"; 411 412 // Append context information to system prompt 413 $prompt = $contextInfo . "\n\n" . $prompt; 414 } 415 416 // Check if tools should be used based on configuration 417 $useTools = $this->getConf('use_tools', false); 418 419 // Prepare API request data with model parameters 420 $data = [ 421 'model' => $this->model, 422 'messages' => [ 423 ['role' => 'system', 'content' => $systemPrompt], 424 ['role' => 'user', 'content' => $prompt] 425 ], 426 'max_tokens' => 6144, 427 'stream' => false, 428 'keep_alive' => '30m', 429 'think' => true 430 ]; 431 432 // Add tools to the request only if useTools is true 433 if ($useTools) { 434 // Define available tools 435 $data['tools'] = $this->getAvailableTools(); 436 $data['tool_choice'] = 'auto'; 437 $data['parallel_tool_calls'] = false; 438 } 439 440 // Only add parameters if they are defined and not null 441 if ($this->temperature !== null) { 442 $data['temperature'] = $this->temperature; 443 } 444 if ($this->top_p !== null) { 445 $data['top_p'] = $this->top_p; 446 } 447 if ($this->top_k !== null) { 448 $data['top_k'] = $this->top_k; 449 } 450 if ($this->min_p !== null) { 451 $data['min_p'] = $this->min_p; 452 } 453 454 // Make an API call with tool responses 455 return $this->callAPIWithTools($data, false); 456 } 457 458 /** 459 * Handle tool calls from the LLM 460 * 461 * Processes tool calls made by the LLM and returns appropriate responses. 462 * Implements caching to avoid duplicate calls with identical parameters. 463 * 464 * @param array $toolCall The tool call data from the LLM 465 * @return array The tool response message 466 */ 467 private function handleToolCall($toolCall) 468 { 469 $toolName = $toolCall['function']['name']; 470 $arguments = json_decode($toolCall['function']['arguments'], true); 471 472 // Create a cache key from the tool name and arguments 473 $cacheKey = md5($toolName . serialize($arguments)); 474 475 // Check if we have a cached result for this tool call 476 if (isset($this->toolCallCache[$cacheKey])) { 477 // Return cached result and indicate it was found in cache 478 $toolResponse = $this->toolCallCache[$cacheKey]; 479 // Update with current tool call ID 480 $toolResponse['tool_call_id'] = $toolCall['id']; 481 $toolResponse['cached'] = true; // Indicate this response was cached 482 return $toolResponse; 483 } 484 485 $toolResponse = [ 486 'role' => 'tool', 487 'tool_call_id' => $toolCall['id'], 488 'cached' => false // Indicate this is a fresh response 489 ]; 490 491 switch ($toolName) { 492 case 'get_document': 493 $documentId = $arguments['id']; 494 $content = $this->getPageContent($documentId); 495 if ($content === false) { 496 $toolResponse['content'] = 'Document not found: ' . $documentId; 497 } else { 498 $toolResponse['content'] = $content; 499 } 500 break; 501 502 case 'get_template': 503 // Get template content using the convenience function 504 $toolResponse['content'] = $this->getTemplateContent(); 505 break; 506 507 case 'get_examples': 508 // Get examples content using the convenience function 509 $count = isset($arguments['count']) ? (int)$arguments['count'] : 5; 510 $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>'; 511 break; 512 513 default: 514 $toolResponse['content'] = 'Unknown tool: ' . $toolName; 515 } 516 517 // Cache the result for future calls with the same parameters 518 $cacheEntry = $toolResponse; 519 // Remove tool_call_id and cached flag from cache as they change per call 520 unset($cacheEntry['tool_call_id']); 521 unset($cacheEntry['cached']); 522 $this->toolCallCache[$cacheKey] = $cacheEntry; 523 524 return $toolResponse; 525 } 526 527 /** 528 * Make an API call with tool responses 529 * 530 * Sends a follow-up request to the LLM with tool responses. 531 * Implements complex logic for handling tool calls with caching and loop protection. 532 * 533 * Complex logic includes: 534 * 1. Making HTTP requests with proper authentication and error handling 535 * 2. Processing tool calls from the LLM response 536 * 3. Caching tool responses to avoid duplicate calls with identical parameters 537 * 4. Tracking tool call counts to prevent infinite loops 538 * 5. Implementing loop protection with call count limits 539 * 6. Handling recursive tool calls until final content is generated 540 * 541 * Loop protection works by: 542 * - Tracking individual tool call counts (max 3 per tool) 543 * - Tracking total tool calls (max 10 total) 544 * - Disabling tools when limits are exceeded to break potential loops 545 * 546 * @param array $data The API request data including messages with tool responses 547 * @param bool $toolsCalled Whether tools have already been called (used for loop protection) 548 * @param bool $useTools Whether to process tool calls (used for loop protection) 549 * @return string The final response content 550 */ 551 private function callAPIWithTools($data, $toolsCalled = false, $useTools = true) 552 { 553 // Set up HTTP headers, including authentication if API key is configured 554 $headers = [ 555 'Content-Type: application/json' 556 ]; 557 558 if (!empty($this->api_key)) { 559 $headers[] = 'Authorization: Bearer ' . $this->api_key; 560 } 561 562 // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops 563 if ($toolsCalled) { 564 unset($data['tools']); 565 unset($data['tool_choice']); 566 } 567 568 // Initialize and configure cURL for the API request 569 $ch = curl_init(); 570 curl_setopt($ch, CURLOPT_URL, $this->api_url); 571 curl_setopt($ch, CURLOPT_POST, true); 572 curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); 573 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 574 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 575 curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); 576 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 577 578 // Execute the API request 579 $response = curl_exec($ch); 580 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 581 $error = curl_error($ch); 582 curl_close($ch); 583 584 // Handle cURL errors 585 if ($error) { 586 throw new Exception('API request failed: ' . $error); 587 } 588 589 // Handle HTTP errors 590 if ($httpCode !== 200) { 591 throw new Exception('API request failed with HTTP code: ' . $httpCode); 592 } 593 594 // Parse and validate the JSON response 595 $result = json_decode($response, true); 596 597 // Extract the content from the response if available 598 if (isset($result['choices'][0]['message']['content'])) { 599 $content = trim($result['choices'][0]['message']['content']); 600 // Reset tool call counts when we get final content 601 $this->toolCallCounts = []; 602 return $content; 603 } 604 605 // Handle tool calls if present 606 if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) { 607 $toolCalls = $result['choices'][0]['message']['tool_calls']; 608 // Start with original messages 609 $messages = $data['messages']; 610 // Add assistant's message with tool calls, keeping all original fields except for content (which is null) 611 $assistantMessage = []; 612 foreach ($result['choices'][0]['message'] as $key => $value) { 613 if ($key !== 'content') { 614 $assistantMessage[$key] = $value; 615 } 616 } 617 // Add assistant's message with tool calls 618 $messages[] = $assistantMessage; 619 620 // Process each tool call and track counts to prevent infinite loops 621 foreach ($toolCalls as $toolCall) { 622 $toolName = $toolCall['function']['name']; 623 // Increment tool call count 624 if (!isset($this->toolCallCounts[$toolName])) { 625 $this->toolCallCounts[$toolName] = 0; 626 } 627 $this->toolCallCounts[$toolName]++; 628 629 $toolResponse = $this->handleToolCall($toolCall); 630 $messages[] = $toolResponse; 631 } 632 633 // Check if any tool has been called more than 3 times 634 $toolsCalledCount = 0; 635 foreach ($this->toolCallCounts as $count) { 636 if ($count > 3) { 637 // If any tool called more than 3 times, disable tools to break loop 638 $toolsCalled = true; 639 break; 640 } 641 $toolsCalledCount += $count; 642 } 643 644 // If total tool calls exceed 10, also disable tools 645 if ($toolsCalledCount > 10) { 646 $toolsCalled = true; 647 } 648 649 // Make another API call with tool responses 650 $data['messages'] = $messages; 651 return $this->callAPIWithTools($data, $toolsCalled, $useTools); 652 } 653 654 // Throw exception for unexpected response format 655 throw new Exception('Unexpected API response format'); 656 } 657 658 /** 659 * Load a prompt template from a DokuWiki page and replace placeholders 660 * 661 * Loads prompt templates from DokuWiki pages with IDs in the format 662 * dokullm:prompts:LANGUAGE:PROMPT_NAME 663 * 664 * The method implements a language fallback mechanism: 665 * 1. First tries to load the prompt in the configured language 666 * 2. If not found, falls back to English prompts 667 * 3. Throws an exception if neither is available 668 * 669 * After loading the prompt, it scans for placeholders and automatically 670 * adds missing ones with appropriate values before replacing all placeholders. 671 * 672 * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite') 673 * @param array $variables Associative array of placeholder => value pairs 674 * @return string The processed prompt with placeholders replaced 675 * @throws Exception If the prompt page cannot be loaded in any language 676 */ 677 private function loadPrompt($promptName, $variables = []) 678 { 679 $language = $this->getConf('language'); 680 681 // Default to 'en' if language is 'default' or not set 682 if ($language === 'default' || empty($language)) { 683 $language = 'en'; 684 } 685 686 // Construct the page ID for the prompt in the configured language 687 $promptPageId = 'dokullm:prompts:' . $language . ':' . $promptName; 688 689 // Try to get the content of the prompt page in the configured language 690 $prompt = $this->getPageContent($promptPageId); 691 692 // If the language-specific prompt doesn't exist, try English as fallback 693 if ($prompt === false && $language !== 'en') { 694 $promptPageId = 'dokullm:prompts:en:' . $promptName; 695 $prompt = $this->getPageContent($promptPageId); 696 } 697 698 // If still no prompt found, throw an exception 699 if ($prompt === false) { 700 throw new Exception('Prompt page not found: ' . $promptPageId); 701 } 702 703 // Find placeholders in the prompt 704 $placeholders = $this->findPlaceholders($prompt); 705 706 // Add missing placeholders with appropriate values 707 foreach ($placeholders as $placeholder) { 708 // Skip if already provided in variables 709 if (isset($variables[$placeholder])) { 710 continue; 711 } 712 713 // Add appropriate values for specific placeholders 714 switch ($placeholder) { 715 case 'template': 716 // If we have a page_template in variables, use it 717 $variables[$placeholder] = $this->getTemplateContent($variables['page_template']); 718 break; 719 720 case 'snippets': 721 $variables[$placeholder] = $this->getSnippets(10); 722 break; 723 724 case 'examples': 725 // If we have example page IDs in metadata, add examples content 726 $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']); 727 break; 728 729 case 'previous': 730 // If we have a previous report page ID in metadata, add previous content 731 $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']); 732 733 // Add current and previous dates to metadata 734 $variables['current_date'] = $this->getPageDate(); 735 $variables['previous_date'] = !empty($variables['page_previous']) ? 736 $this->getPageDate($variables['page_previous']) : 737 ''; 738 break; 739 740 default: 741 // For other placeholders, leave them empty or set a default value 742 $variables[$placeholder] = ''; 743 break; 744 } 745 } 746 747 // Replace placeholders with actual values 748 // Placeholders are in the format {placeholder_name} 749 foreach ($variables as $placeholder => $value) { 750 $prompt = str_replace('{' . $placeholder . '}', $value, $prompt); 751 } 752 753 // Return the processed prompt 754 return $prompt; 755 } 756 757 /** 758 * Load system prompt with optional command-specific appendage 759 * 760 * Loads the main system prompt and appends any command-specific system prompt 761 * if available. 762 * 763 * @param string $action The action/command name 764 * @param array $variables Associative array of placeholder => value pairs 765 * @return string The combined system prompt 766 */ 767 private function loadSystemPrompt($action, $variables = []) 768 { 769 // Load system prompt which provides general instructions to the LLM 770 $systemPrompt = $this->loadPrompt('system', $variables); 771 772 // Check if there's a command-specific system prompt appendage 773 if (!empty($action)) { 774 try { 775 $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables); 776 if ($commandSystemPrompt !== false) { 777 $systemPrompt .= "\n" . $commandSystemPrompt; 778 } 779 } catch (Exception $e) { 780 // Ignore exceptions when loading command-specific system prompt 781 // This allows the main system prompt to still be used 782 } 783 } 784 785 return $systemPrompt; 786 } 787 788 /** 789 * Get the content of a DokuWiki page 790 * 791 * Retrieves the raw content of a DokuWiki page by its ID. 792 * Used for loading template and example page content for context. 793 * 794 * @param string $pageId The page ID to retrieve 795 * @return string|false The page content or false if not found/readable 796 */ 797 public function getPageContent($pageId) 798 { 799 // Convert page ID to file path 800 $pageFile = wikiFN($pageId); 801 802 // Check if file exists and is readable 803 if (file_exists($pageFile) && is_readable($pageFile)) { 804 return file_get_contents($pageFile); 805 } 806 807 return false; 808 } 809 810 /** 811 * Extract date from page ID or file timestamp 812 * 813 * Attempts to extract a date in YYmmdd format from the page ID. 814 * If not found, uses the file's last modification timestamp. 815 * 816 * @param string $pageId Optional page ID to extract date from (defaults to current page) 817 * @return string Formatted date string (YYYY-MM-DD) 818 */ 819 private function getPageDate($pageId = null) 820 { 821 global $ID; 822 823 // Use provided page ID or current page ID 824 $targetPageId = $pageId ?: $ID; 825 826 // Try to extract date from page ID (looking for YYmmdd pattern) 827 if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) { 828 // Convert YYmmdd to YYYY-MM-DD 829 $year = $matches[1]; 830 $month = $matches[2]; 831 $day = $matches[3]; 832 833 // Assume 20xx for years 00-69, 19xx for years 70-99 834 $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year; 835 836 return $fullYear . '-' . $month . '-' . $day; 837 } 838 839 // Fallback to file timestamp 840 $pageFile = wikiFN($targetPageId); 841 if (file_exists($pageFile)) { 842 $timestamp = filemtime($pageFile); 843 return date('Y-m-d', $timestamp); 844 } 845 846 // Return empty string if no date can be determined 847 return ''; 848 } 849 850 /** 851 * Get current text 852 * 853 * Retrieves the current text stored from the process function. 854 * 855 * @return string The current text 856 */ 857 private function getCurrentText() 858 { 859 return $this->currentText; 860 } 861 862 /** 863 * Scan text for placeholders 864 * 865 * Finds all placeholders in the format {placeholder_name} in the provided text 866 * and returns an array of unique placeholder names. 867 * 868 * @param string $text The text to scan for placeholders 869 * @return array List of unique placeholder names found in the text 870 */ 871 public function findPlaceholders($text) 872 { 873 $placeholders = []; 874 $pattern = '/\{([^}]+)\}/'; 875 876 if (preg_match_all($pattern, $text, $matches)) { 877 // Get unique placeholder names 878 $placeholders = array_unique($matches[1]); 879 } 880 881 return $placeholders; 882 } 883 884 /** 885 * Get template content for the current text 886 * 887 * Convenience function to retrieve template content. If a pageId is provided, 888 * retrieves content directly from that page. Otherwise, queries ChromaDB for 889 * a relevant template based on the current text. 890 * 891 * @param string|null $pageId Optional page ID to retrieve template from directly 892 * @return string The template content or empty string if not found 893 */ 894 private function getTemplateContent($pageId = null) 895 { 896 // If pageId is provided, use it directly 897 if ($pageId !== null) { 898 $templateContent = $this->getPageContent($pageId); 899 if ($templateContent !== false) { 900 return $templateContent; 901 } 902 } 903 904 // Otherwise, get template suggestion for the current text 905 $pageId = $this->queryChromaDBTemplate($this->getCurrentText()); 906 if (!empty($pageId)) { 907 $templateContent = $this->getPageContent($pageId[0]); 908 if ($templateContent !== false) { 909 return $templateContent; 910 } 911 } 912 return '( no template )'; 913 } 914 915 /** 916 * Get snippets content for the current text 917 * 918 * Convenience function to retrieve relevant snippets for the current text. 919 * Queries ChromaDB for relevant snippets and returns them formatted. 920 * 921 * @param int $count Number of snippets to retrieve (default: 10) 922 * @return string Formatted snippets content or empty string if not found 923 */ 924 private function getSnippets($count = 10) 925 { 926 // Get example snippets for the current text 927 $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count); 928 if (!empty($snippets)) { 929 $formattedSnippets = []; 930 foreach ($snippets as $index => $snippet) { 931 $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>'; 932 } 933 return implode("\n", $formattedSnippets); 934 } 935 return '( no examples )'; 936 } 937 938 /** 939 * Get examples content from example page IDs 940 * 941 * Convenience function to retrieve content from example pages. 942 * Returns the content of each page packed in XML elements. 943 * 944 * @param array $exampleIds List of example page IDs 945 * @return string Formatted examples content or empty string if not found 946 */ 947 private function getExamplesContent($exampleIds = []) 948 { 949 if (empty($exampleIds) || !is_array($exampleIds)) { 950 return '( no examples )'; 951 } 952 953 $examplesContent = []; 954 foreach ($exampleIds as $index => $exampleId) { 955 $content = $this->getPageContent($exampleId); 956 if ($content !== false) { 957 $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>'; 958 } 959 } 960 961 return implode("\n", $examplesContent); 962 } 963 964 /** 965 * Get previous report content from previous page ID 966 * 967 * Convenience function to retrieve content from a previous report page. 968 * Returns the content of the previous page or a default message if not found. 969 * 970 * @param string $previousId Previous page ID 971 * @return string Previous report content or default message if not found 972 */ 973 private function getPreviousContent($previousId = '') 974 { 975 if (empty($previousId)) { 976 return '( no previous report )'; 977 } 978 979 $content = $this->getPageContent($previousId); 980 if ($content !== false) { 981 return $content; 982 } 983 984 return '( previous report not found )'; 985 } 986 987 /** 988 * Get ChromaDB client with configuration 989 * 990 * Creates and returns a ChromaDB client with the appropriate configuration. 991 * Extracts modality from the current page ID to use as the collection name. 992 * 993 * @return array Array containing the ChromaDB client and collection name 994 */ 995 private function getChromaDBClient() 996 { 997 // Get ChromaDB configuration from DokuWiki plugin configuration 998 $chromaHost = $this->getConf('chroma_host', 'localhost'); 999 $chromaPort = $this->getConf('chroma_port', 8000); 1000 $chromaTenant = $this->getConf('chroma_tenant', 'dokullm'); 1001 $chromaDatabase = $this->getConf('chroma_database', 'dokullm'); 1002 $chromaDefaultCollection = $this->getConf('chroma_collection', 'documents'); 1003 $ollamaHost = $this->getConf('ollama_host', 'localhost'); 1004 $ollamaPort = $this->getConf('ollama_port', 11434); 1005 $ollamaModel = $this->getConf('ollama_embeddings_model', 'nomic-embed-text'); 1006 1007 // Use the first part of the current page ID as collection name, fallback to default 1008 global $ID; 1009 $chromaCollection = $chromaDefaultCollection; // Default collection name 1010 1011 if (!empty($ID)) { 1012 // Split the page ID by ':' and take the first part as collection name 1013 $parts = explode(':', $ID); 1014 if (isset($parts[0]) && !empty($parts[0])) { 1015 // If the first part is 'playground', use the default collection 1016 // Otherwise, use the first part as the collection name 1017 if ($parts[0] === 'playground') { 1018 $chromaCollection = $chromaDefaultCollection; 1019 } else { 1020 $chromaCollection = $parts[0]; 1021 } 1022 } 1023 } 1024 1025 // Create ChromaDB client with all required parameters 1026 $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient( 1027 $chromaHost, 1028 $chromaPort, 1029 $chromaTenant, 1030 $chromaDatabase, 1031 $ollamaHost, 1032 $ollamaPort, 1033 $ollamaModel 1034 ); 1035 1036 1037 return [$chromaClient, $chromaCollection]; 1038 } 1039 1040 /** 1041 * Query ChromaDB for relevant documents 1042 * 1043 * Generates embeddings for the input text and queries ChromaDB for similar documents. 1044 * Extracts modality from the current page ID to use as the collection name. 1045 * 1046 * @param string $text The text to find similar documents for 1047 * @param int $limit Maximum number of documents to retrieve (default: 5) 1048 * @param array|null $where Optional filter conditions for metadata 1049 * @return array List of document IDs 1050 */ 1051 private function queryChromaDB($text, $limit = 5, $where = null) 1052 { 1053 try { 1054 // Get ChromaDB client and collection name 1055 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1056 // Query for similar documents 1057 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1058 1059 // Extract document IDs from results 1060 $documentIds = []; 1061 if (isset($results['ids'][0]) && is_array($results['ids'][0])) { 1062 foreach ($results['ids'][0] as $id) { 1063 // Use the ChromaDB ID directly without conversion 1064 $documentIds[] = $id; 1065 } 1066 } 1067 1068 return $documentIds; 1069 } catch (Exception $e) { 1070 // Log error but don't fail the operation 1071 error_log('ChromaDB query failed: ' . $e->getMessage()); 1072 return []; 1073 } 1074 } 1075 1076 /** 1077 * Query ChromaDB for relevant documents and return text snippets 1078 * 1079 * Generates embeddings for the input text and queries ChromaDB for similar documents. 1080 * Returns the actual text snippets instead of document IDs. 1081 * 1082 * @param string $text The text to find similar documents for 1083 * @param int $limit Maximum number of documents to retrieve (default: 10) 1084 * @param array|null $where Optional filter conditions for metadata 1085 * @return array List of text snippets 1086 */ 1087 private function queryChromaDBSnippets($text, $limit = 10, $where = null) 1088 { 1089 try { 1090 // Get ChromaDB client and collection name 1091 list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1092 // Query for similar documents 1093 $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1094 1095 // Extract document texts from results 1096 $snippets = []; 1097 if (isset($results['documents'][0]) && is_array($results['documents'][0])) { 1098 foreach ($results['documents'][0] as $document) { 1099 $snippets[] = $document; 1100 } 1101 } 1102 1103 return $snippets; 1104 } catch (Exception $e) { 1105 // Log error but don't fail the operation 1106 error_log('ChromaDB query failed: ' . $e->getMessage()); 1107 return []; 1108 } 1109 } 1110 1111 /** 1112 * Query ChromaDB for a template document 1113 * 1114 * Generates embeddings for the input text and queries ChromaDB for a template document 1115 * by filtering with metadata 'template=true'. 1116 * 1117 * @param string $text The text to find a template for 1118 * @return array List of template document IDs (maximum 1) 1119 */ 1120 public function queryChromaDBTemplate($text) 1121 { 1122 $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']); 1123 1124 // Remove chunk number (e.g., "@2") from the ID to get the base document ID 1125 if (!empty($templateIds)) { 1126 $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]); 1127 } 1128 1129 return $templateIds; 1130 } 1131 1132} 1133