xref: /plugin/dokullm/LlmClient.php (revision 590368144294a28ecf0e0e39feb976bf79fefb1e)
1<?php
2namespace dokuwiki\plugin\dokullm;
3
4/**
5 * LLM Client for the dokullm plugin
6 *
7 * This class provides methods to interact with an LLM API for various
8 * text processing tasks such as completion, rewriting, grammar correction,
9 * summarization, conclusion creation, text analysis, and custom prompts.
10 *
11 * The client handles:
12 * - API configuration and authentication
13 * - Prompt template loading and processing
14 * - Context-aware requests with metadata
15 * - DokuWiki page content retrieval
16 */
17
18// must be run within Dokuwiki
19if (!defined('DOKU_INC')) {
20    die();
21}
22
23
24/**
25 * LLM Client class for handling API communications
26 *
27 * Manages configuration settings and provides methods for various
28 * text processing operations through an LLM API.
29 * Implements caching for tool calls to avoid duplicate processing.
30 */
31class LlmClient
32{
33    /** @var string The API endpoint URL */
34    private $api_url;
35
36    /** @var array Cache for tool call results */
37    private $toolCallCache = [];
38
39    /** @var string Current text for tool usage */
40    private $currentText = '';
41
42    /** @var array Track tool call counts to prevent infinite loops */
43    private $toolCallCounts = [];
44
45    /** @var string The API authentication key */
46    private $api_key;
47
48    /** @var string The model identifier to use */
49    private $model;
50
51    /** @var int The request timeout in seconds */
52    private $timeout;
53
54    /** @var float The temperature setting for response randomness */
55    private $temperature;
56
57    /** @var float The top-p setting for nucleus sampling */
58    private $top_p;
59
60    /** @var int The top-k setting for token selection */
61    private $top_k;
62
63    /** @var float The min-p setting for minimum probability threshold */
64    private $min_p;
65
66    /** @var bool Whether to enable thinking in the LLM responses */
67    private $think;
68
69    /**
70     * Initialize the LLM client with configuration settings
71     *
72     * Retrieves configuration values from DokuWiki's configuration system
73     * for API URL, key, model, timeout, and LLM sampling parameters.
74     *
75     * Configuration values:
76     * - api_url: The LLM API endpoint URL
77     * - api_key: Authentication key for the API (optional)
78     * - model: The model identifier to use for requests
79     * - timeout: Request timeout in seconds
80     * - language: Language code for prompt templates
81     * - temperature: Temperature setting for response randomness (0.0-1.0)
82     * - top_p: Top-p (nucleus sampling) setting (0.0-1.0)
83     * - top_k: Top-k setting (integer >= 1)
84     * - min_p: Minimum probability threshold (0.0-1.0)
85     * - think: Whether to enable thinking in LLM responses (boolean)
86     */
87    public function __construct()
88    {
89        global $conf;
90        $this->api_url = $conf['plugin']['dokullm']['api_url'];
91        $this->api_key = $conf['plugin']['dokullm']['api_key'];
92        $this->model = $conf['plugin']['dokullm']['model'];
93        $this->timeout = $conf['plugin']['dokullm']['timeout'];
94        $this->temperature = $conf['plugin']['dokullm']['temperature'];
95        $this->top_p = $conf['plugin']['dokullm']['top_p'];
96        $this->top_k = $conf['plugin']['dokullm']['top_k'];
97        $this->min_p = $conf['plugin']['dokullm']['min_p'];
98        $this->think = $conf['plugin']['dokullm']['think'] ?? false;
99    }
100
101
102
103    public function process($action, $text, $metadata = [], $useContext = true)
104    {
105        // Store the current text for tool usage
106        $this->currentText = $text;
107
108        // Add text, think and action to metadata
109        $metadata['text'] = $text;
110        $metadata['think'] = $this->think ? '/think' : '/no_think';
111        $metadata['action'] = $action;
112
113        // If we have 'template' in metadata, move it to 'page_template'
114        if (isset($metadata['template'])) {
115            $metadata['page_template'] = $metadata['template'];
116            unset($metadata['template']);
117        }
118
119        // If we have 'examples' in metadata, move it to 'page_examples'
120        if (isset($metadata['examples'])) {
121            $metadata['page_examples'] = $metadata['examples'];
122            unset($metadata['examples']);
123        }
124
125        // If we have 'previous' in metadata, move it to 'page_previous'
126        if (isset($metadata['previous'])) {
127            $metadata['page_previous'] = $metadata['previous'];
128            unset($metadata['previous']);
129        }
130
131        $prompt = $this->loadPrompt($action, $metadata);
132
133        return $this->callAPI($action, $prompt, $metadata, $useContext);
134    }
135
136
137
138    /**
139     * Create the provided text using the LLM
140     *
141     * Sends a prompt to the LLM asking it to create the given text.
142     * First queries ChromaDB for relevant documents to include as examples.
143     * If no template is defined, queries ChromaDB for a template.
144     *
145     * @param string $text The text to create
146     * @param array $metadata Optional metadata containing template, examples, and snippets
147     * @param bool $useContext Whether to include template and examples in the context (default: true)
148     * @return string The created text
149     */
150    public function createReport($text, $metadata = [], $useContext = true)
151    {
152        // Store the current text for tool usage
153        $this->currentText = $text;
154
155        // Check if tools should be used based on configuration
156        global $conf;
157        $useTools = $conf['plugin']['dokullm']['use_tools'] ?? false;
158
159        // Only try to find template and add snippets if tools are not enabled
160        // When tools are enabled, the LLM will call get_template and get_examples as needed
161        if (!$useTools) {
162            // If no template is defined, try to find one using ChromaDB
163            if (empty($metadata['template'])) {
164                $templateResult = $this->queryChromaDBTemplate($text);
165                if (!empty($templateResult)) {
166                    // Use the first result as template
167                    $metadata['template'] = $templateResult[0];
168                }
169            }
170
171            // Query ChromaDB for relevant documents to use as examples
172            $chromaResults = $this->queryChromaDBSnippets($text, 10);
173
174            // Add ChromaDB results to metadata as snippets
175            if (!empty($chromaResults)) {
176                // Merge with existing snippets
177                $metadata['snippets'] = array_merge(
178                    isset($metadata['snippets']) ? $metadata['snippets'] : [],
179                    $chromaResults
180                );
181            }
182        }
183
184        $think = $this->think ? '/think' : '/no_think';
185        $prompt = $this->loadPrompt('create', ['text' => $text, 'think' => $think]);
186
187        return $this->callAPI('create', $prompt, $metadata, $useContext);
188    }
189
190    /**
191     * Compare two texts and highlight differences
192     *
193     * Sends a prompt to the LLM asking it to compare two texts and
194     * highlight their similarities and differences.
195     *
196     * @param string $text The current text to compare
197     * @param array $metadata Optional metadata containing template, examples, and previous report reference
198     * @return string The comparison results
199     */
200    public function compareText($text, $metadata = [], $useContext = false)
201    {
202        // Store the current text for tool usage
203        $this->currentText = $text;
204
205        // Load previous report from metadata if specified
206        $previousText = '';
207        if (!empty($metadata['previous_report_page'])) {
208            $previousText = $this->getPageContent($metadata['previous_report_page']);
209            if ($previousText === false) {
210                $previousText = '';
211            }
212        }
213
214        // Extract dates for placeholders
215        $currentDate = $this->getPageDate();
216        $previousDate = !empty($metadata['previous_report_page']) ?
217                        $this->getPageDate($metadata['previous_report_page']) :
218                        '';
219
220        $think = $this->think ? '/think' : '/no_think';
221        $prompt = $this->loadPrompt('compare', [
222            'text' => $text,
223            'previous_text' => $previousText,
224            'current_date' => $currentDate,
225            'previous_date' => $previousDate,
226            'think' => $think
227        ]);
228
229        return $this->callAPI('compare', $prompt, $metadata, $useContext);
230    }
231
232    /**
233     * Process text with a custom user prompt
234     *
235     * Sends a custom prompt to the LLM along with the provided text.
236     *
237     * @param string $text The text to process
238     * @param string $customPrompt The custom prompt to use
239     * @param array $metadata Optional metadata containing template and examples
240     * @param bool $useContext Whether to include template and examples in the context (default: true)
241     * @return string The processed text
242     */
243    public function processCustomPrompt($text, $metadata = [], $useContext = true)
244    {
245        // Store the current text for tool usage
246        $this->currentText = $text;
247
248        // Format the prompt with the text and custom prompt
249        $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text;
250
251        return $this->callAPI('custom', $prompt, $metadata, $useContext);
252    }
253
254    /**
255     * Get the list of available tools for the LLM
256     *
257     * Defines the tools that can be used by the LLM during processing.
258     *
259     * @return array List of tool definitions
260     */
261    private function getAvailableTools()
262    {
263        return [
264            [
265                'type' => 'function',
266                'function' => [
267                    'name' => 'get_document',
268                    'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.',
269                    'parameters' => [
270                        'type' => 'object',
271                        'properties' => [
272                            'id' => [
273                                'type' => 'string',
274                                'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.'
275                            ]
276                        ],
277                        'required' => ['id']
278                    ]
279                ]
280            ],
281            [
282                'type' => 'function',
283                'function' => [
284                    'name' => 'get_template',
285                    'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.',
286                    'parameters' => [
287                        'type' => 'object',
288                        'properties' => [
289                            'language' => [
290                                'type' => 'string',
291                                'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).',
292                                'default' => 'ro'
293                            ]
294                        ]
295                    ]
296                ]
297            ],
298            [
299                'type' => 'function',
300                'function' => [
301                    'name' => 'get_examples',
302                    'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.',
303                    'parameters' => [
304                        'type' => 'object',
305                        'properties' => [
306                            'count' => [
307                                'type' => 'integer',
308                                'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.',
309                                'default' => 5
310                            ]
311                        ]
312                    ]
313                ]
314            ]
315        ];
316    }
317
318    /**
319     * Call the LLM API with the specified prompt
320     *
321     * Makes an HTTP POST request to the configured API endpoint with
322     * the prompt and other parameters. Handles authentication if an
323     * API key is configured.
324     *
325     * The method constructs a conversation with system and user messages,
326     * including context information from metadata when available.
327     *
328     * Complex logic includes:
329     * 1. Loading and enhancing the system prompt with metadata context
330     * 2. Building the API request with model parameters
331     * 3. Handling authentication with API key if configured
332     * 4. Making the HTTP request with proper error handling
333     * 5. Parsing and validating the API response
334     * 6. Supporting tool usage with automatic tool calling when enabled
335     * 7. Implementing context enhancement with templates, examples, and snippets
336     *
337     * The context information includes:
338     * - Template content: Used as a starting point for the response
339     * - Example pages: Full content of specified example pages
340     * - Text snippets: Relevant text examples from ChromaDB
341     *
342     * When tools are enabled, the method supports automatic tool calling:
343     * - Tools can retrieve documents, templates, and examples as needed
344     * - Tool responses are cached to avoid duplicate calls with identical parameters
345     * - Infinite loop protection prevents excessive tool calls
346     *
347     * @param string $command The command name for loading command-specific system prompts
348     * @param string $prompt The prompt to send to the LLM as user message
349     * @param array $metadata Optional metadata containing template, examples, and snippets
350     * @param bool $useContext Whether to include template and examples in the context (default: true)
351     * @return string The response content from the LLM
352     * @throws Exception If the API request fails or returns unexpected format
353     */
354
355    private function callAPI($command, $prompt, $metadata = [], $useContext = true)
356    {
357        // Load system prompt which provides general instructions to the LLM
358        $systemPrompt = $this->loadSystemPrompt($command, []);
359
360        // Enhance the prompt with context information from metadata
361        // This provides the LLM with additional context about templates and examples
362        if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) {
363            $contextInfo = "\n\n<context>\n";
364
365            // Add template content if specified in metadata
366            if (!empty($metadata['template'])) {
367                $templateContent = $this->getPageContent($metadata['template']);
368                if ($templateContent !== false) {
369                    $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n";
370                }
371            }
372
373            // Add example pages content if specified in metadata
374            if (!empty($metadata['examples'])) {
375                $examplesContent = [];
376                foreach ($metadata['examples'] as $example) {
377                    $content = $this->getPageContent($example);
378                    if ($content !== false) {
379                        $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n";
380                    }
381                }
382                if (!empty($examplesContent)) {
383                    $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n";
384                }
385            }
386
387            // Add text snippets if specified in metadata
388            if (!empty($metadata['snippets'])) {
389                $snippetsContent = [];
390                foreach ($metadata['snippets'] as $index => $snippet) {
391                    // These are text snippets from ChromaDB
392                    $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n";
393                }
394                if (!empty($snippetsContent)) {
395                    $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n";
396                }
397            }
398
399            $contextInfo .= "\n</context>\n";
400
401            // Append context information to system prompt
402            $prompt = $contextInfo . "\n\n" . $prompt;
403        }
404
405        // Check if tools should be used based on configuration
406        global $conf;
407        $useTools = $conf['plugin']['dokullm']['use_tools'] ?? false;
408
409        // Prepare API request data with model parameters
410        $data = [
411            'model' => $this->model,
412            'messages' => [
413                ['role' => 'system', 'content' => $systemPrompt],
414                ['role' => 'user', 'content' => $prompt]
415            ],
416            'max_tokens' => 6144,
417            'stream' => false,
418            'keep_alive' => '30m',
419            'think' => true
420        ];
421
422        // Add tools to the request only if useTools is true
423        if ($useTools) {
424            // Define available tools
425            $data['tools'] = $this->getAvailableTools();
426            $data['tool_choice'] = 'auto';
427            $data['parallel_tool_calls'] = false;
428        }
429
430        // Only add parameters if they are defined and not null
431        if ($this->temperature !== null) {
432            $data['temperature'] = $this->temperature;
433        }
434        if ($this->top_p !== null) {
435            $data['top_p'] = $this->top_p;
436        }
437        if ($this->top_k !== null) {
438            $data['top_k'] = $this->top_k;
439        }
440        if ($this->min_p !== null) {
441            $data['min_p'] = $this->min_p;
442        }
443
444        // Make an API call with tool responses
445        return $this->callAPIWithTools($data, false);
446    }
447
448    /**
449     * Handle tool calls from the LLM
450     *
451     * Processes tool calls made by the LLM and returns appropriate responses.
452     * Implements caching to avoid duplicate calls with identical parameters.
453     *
454     * @param array $toolCall The tool call data from the LLM
455     * @return array The tool response message
456     */
457    private function handleToolCall($toolCall)
458    {
459        $toolName = $toolCall['function']['name'];
460        $arguments = json_decode($toolCall['function']['arguments'], true);
461
462        // Create a cache key from the tool name and arguments
463        $cacheKey = md5($toolName . serialize($arguments));
464
465        // Check if we have a cached result for this tool call
466        if (isset($this->toolCallCache[$cacheKey])) {
467            // Return cached result and indicate it was found in cache
468            $toolResponse = $this->toolCallCache[$cacheKey];
469            // Update with current tool call ID
470            $toolResponse['tool_call_id'] = $toolCall['id'];
471            $toolResponse['cached'] = true; // Indicate this response was cached
472            return $toolResponse;
473        }
474
475        $toolResponse = [
476            'role' => 'tool',
477            'tool_call_id' => $toolCall['id'],
478            'cached' => false // Indicate this is a fresh response
479        ];
480
481        switch ($toolName) {
482            case 'get_document':
483                $documentId = $arguments['id'];
484                $content = $this->getPageContent($documentId);
485                if ($content === false) {
486                    $toolResponse['content'] = 'Document not found: ' . $documentId;
487                } else {
488                    $toolResponse['content'] = $content;
489                }
490                break;
491
492            case 'get_template':
493                // Get template content using the convenience function
494                $toolResponse['content'] = $this->getTemplateContent();
495                break;
496
497            case 'get_examples':
498                // Get examples content using the convenience function
499                $count = isset($arguments['count']) ? (int)$arguments['count'] : 5;
500                $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>';
501                break;
502
503            default:
504                $toolResponse['content'] = 'Unknown tool: ' . $toolName;
505        }
506
507        // Cache the result for future calls with the same parameters
508        $cacheEntry = $toolResponse;
509        // Remove tool_call_id and cached flag from cache as they change per call
510        unset($cacheEntry['tool_call_id']);
511        unset($cacheEntry['cached']);
512        $this->toolCallCache[$cacheKey] = $cacheEntry;
513
514        return $toolResponse;
515    }
516
517    /**
518     * Make an API call with tool responses
519     *
520     * Sends a follow-up request to the LLM with tool responses.
521     * Implements complex logic for handling tool calls with caching and loop protection.
522     *
523     * Complex logic includes:
524     * 1. Making HTTP requests with proper authentication and error handling
525     * 2. Processing tool calls from the LLM response
526     * 3. Caching tool responses to avoid duplicate calls with identical parameters
527     * 4. Tracking tool call counts to prevent infinite loops
528     * 5. Implementing loop protection with call count limits
529     * 6. Handling recursive tool calls until final content is generated
530     *
531     * Loop protection works by:
532     * - Tracking individual tool call counts (max 3 per tool)
533     * - Tracking total tool calls (max 10 total)
534     * - Disabling tools when limits are exceeded to break potential loops
535     *
536     * @param array $data The API request data including messages with tool responses
537     * @param bool $toolsCalled Whether tools have already been called (used for loop protection)
538     * @param bool $useTools Whether to process tool calls (used for loop protection)
539     * @return string The final response content
540     */
541    private function callAPIWithTools($data, $toolsCalled = false, $useTools = true)
542    {
543        // Set up HTTP headers, including authentication if API key is configured
544        $headers = [
545            'Content-Type: application/json'
546        ];
547
548        if (!empty($this->api_key)) {
549            $headers[] = 'Authorization: Bearer ' . $this->api_key;
550        }
551
552       // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops
553        if ($toolsCalled) {
554            unset($data['tools']);
555            unset($data['tool_choice']);
556        }
557
558        // Initialize and configure cURL for the API request
559        $ch = curl_init();
560        curl_setopt($ch, CURLOPT_URL, $this->api_url);
561        curl_setopt($ch, CURLOPT_POST, true);
562        curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
563        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
564        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
565        curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
566        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
567
568        // Execute the API request
569        $response = curl_exec($ch);
570        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
571        $error = curl_error($ch);
572        curl_close($ch);
573
574        // Handle cURL errors
575        if ($error) {
576            throw new Exception('API request failed: ' . $error);
577        }
578
579        // Handle HTTP errors
580        if ($httpCode !== 200) {
581            throw new Exception('API request failed with HTTP code: ' . $httpCode);
582        }
583
584        // Parse and validate the JSON response
585        $result = json_decode($response, true);
586
587        // Extract the content from the response if available
588        if (isset($result['choices'][0]['message']['content'])) {
589            $content = trim($result['choices'][0]['message']['content']);
590            // Reset tool call counts when we get final content
591            $this->toolCallCounts = [];
592            return $content;
593        }
594
595        // Handle tool calls if present
596        if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) {
597            $toolCalls = $result['choices'][0]['message']['tool_calls'];
598            // Start with original messages
599            $messages = $data['messages'];
600            // Add assistant's message with tool calls, keeping all original fields except for content (which is null)
601            $assistantMessage = [];
602            foreach ($result['choices'][0]['message'] as $key => $value) {
603                if ($key !== 'content') {
604                    $assistantMessage[$key] = $value;
605                }
606            }
607            // Add assistant's message with tool calls
608            $messages[] = $assistantMessage;
609
610            // Process each tool call and track counts to prevent infinite loops
611            foreach ($toolCalls as $toolCall) {
612                $toolName = $toolCall['function']['name'];
613                // Increment tool call count
614                if (!isset($this->toolCallCounts[$toolName])) {
615                    $this->toolCallCounts[$toolName] = 0;
616                }
617                $this->toolCallCounts[$toolName]++;
618
619                $toolResponse = $this->handleToolCall($toolCall);
620                $messages[] = $toolResponse;
621            }
622
623            // Check if any tool has been called more than 3 times
624            $toolsCalledCount = 0;
625            foreach ($this->toolCallCounts as $count) {
626                if ($count > 3) {
627                    // If any tool called more than 3 times, disable tools to break loop
628                    $toolsCalled = true;
629                    break;
630                }
631                $toolsCalledCount += $count;
632            }
633
634            // If total tool calls exceed 10, also disable tools
635            if ($toolsCalledCount > 10) {
636                $toolsCalled = true;
637            }
638
639            // Make another API call with tool responses
640            $data['messages'] = $messages;
641            return $this->callAPIWithTools($data, $toolsCalled, $useTools);
642        }
643
644        // Throw exception for unexpected response format
645        throw new Exception('Unexpected API response format');
646    }
647
648    /**
649     * Load a prompt template from a DokuWiki page and replace placeholders
650     *
651     * Loads prompt templates from DokuWiki pages with IDs in the format
652     * dokullm:prompts:LANGUAGE:PROMPT_NAME
653     *
654     * The method implements a language fallback mechanism:
655     * 1. First tries to load the prompt in the configured language
656     * 2. If not found, falls back to English prompts
657     * 3. Throws an exception if neither is available
658     *
659     * After loading the prompt, it scans for placeholders and automatically
660     * adds missing ones with appropriate values before replacing all placeholders.
661     *
662     * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite')
663     * @param array $variables Associative array of placeholder => value pairs
664     * @return string The processed prompt with placeholders replaced
665     * @throws Exception If the prompt page cannot be loaded in any language
666     */
667    private function loadPrompt($promptName, $variables = [])
668    {
669        global $conf;
670        $language = $conf['plugin']['dokullm']['language'];
671
672        // Default to 'en' if language is 'default' or not set
673        if ($language === 'default' || empty($language)) {
674            $language = 'en';
675        }
676
677        // Construct the page ID for the prompt in the configured language
678        $promptPageId = 'dokullm:prompts:' . $language . ':' . $promptName;
679
680        // Try to get the content of the prompt page in the configured language
681        $prompt = $this->getPageContent($promptPageId);
682
683        // If the language-specific prompt doesn't exist, try English as fallback
684        if ($prompt === false && $language !== 'en') {
685            $promptPageId = 'dokullm:prompts:en:' . $promptName;
686            $prompt = $this->getPageContent($promptPageId);
687        }
688
689        // If still no prompt found, throw an exception
690        if ($prompt === false) {
691            throw new Exception('Prompt page not found: ' . $promptPageId);
692        }
693
694        // Find placeholders in the prompt
695        $placeholders = $this->findPlaceholders($prompt);
696
697        // Add missing placeholders with appropriate values
698        foreach ($placeholders as $placeholder) {
699            // Skip if already provided in variables
700            if (isset($variables[$placeholder])) {
701                continue;
702            }
703
704            // Add appropriate values for specific placeholders
705            switch ($placeholder) {
706                case 'template':
707                    // If we have a page_template in variables, use it
708                    $variables[$placeholder] = $this->getTemplateContent($variables['page_template']);
709                    break;
710
711                case 'snippets':
712                    $variables[$placeholder] = $this->getSnippets(10);
713                    break;
714
715                case 'examples':
716                    // If we have example page IDs in metadata, add examples content
717                    $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']);
718                    break;
719
720                case 'previous':
721                    // If we have a previous report page ID in metadata, add previous content
722                    $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']);
723
724                    // Add current and previous dates to metadata
725                    $variables['current_date'] = $this->getPageDate();
726                    $variables['previous_date'] = !empty($variables['page_previous']) ?
727                                                $this->getPageDate($variables['page_previous']) :
728                                                '';
729                    break;
730
731                default:
732                    // For other placeholders, leave them empty or set a default value
733                    $variables[$placeholder] = '';
734                    break;
735            }
736        }
737
738        // Replace placeholders with actual values
739        // Placeholders are in the format {placeholder_name}
740        foreach ($variables as $placeholder => $value) {
741            $prompt = str_replace('{' . $placeholder . '}', $value, $prompt);
742        }
743
744        // Return the processed prompt
745        return $prompt;
746    }
747
748    /**
749     * Load system prompt with optional command-specific appendage
750     *
751     * Loads the main system prompt and appends any command-specific system prompt
752     * if available.
753     *
754     * @param string $action The action/command name
755     * @param array $variables Associative array of placeholder => value pairs
756     * @return string The combined system prompt
757     */
758    private function loadSystemPrompt($action, $variables = [])
759    {
760        // Load system prompt which provides general instructions to the LLM
761        $systemPrompt = $this->loadPrompt('system', $variables);
762
763        // Check if there's a command-specific system prompt appendage
764        if (!empty($action)) {
765            try {
766                $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables);
767                if ($commandSystemPrompt !== false) {
768                    $systemPrompt .= "\n" . $commandSystemPrompt;
769                }
770            } catch (Exception $e) {
771                // Ignore exceptions when loading command-specific system prompt
772                // This allows the main system prompt to still be used
773            }
774        }
775
776        return $systemPrompt;
777    }
778
779    /**
780     * Get the content of a DokuWiki page
781     *
782     * Retrieves the raw content of a DokuWiki page by its ID.
783     * Used for loading template and example page content for context.
784     *
785     * @param string $pageId The page ID to retrieve
786     * @return string|false The page content or false if not found/readable
787     */
788    public function getPageContent($pageId)
789    {
790        // Convert page ID to file path
791        $pageFile = wikiFN($pageId);
792
793        // Check if file exists and is readable
794        if (file_exists($pageFile) && is_readable($pageFile)) {
795            return file_get_contents($pageFile);
796        }
797
798        return false;
799    }
800
801    /**
802     * Extract date from page ID or file timestamp
803     *
804     * Attempts to extract a date in YYmmdd format from the page ID.
805     * If not found, uses the file's last modification timestamp.
806     *
807     * @param string $pageId Optional page ID to extract date from (defaults to current page)
808     * @return string Formatted date string (YYYY-MM-DD)
809     */
810    private function getPageDate($pageId = null)
811    {
812        global $ID;
813
814        // Use provided page ID or current page ID
815        $targetPageId = $pageId ?: $ID;
816
817        // Try to extract date from page ID (looking for YYmmdd pattern)
818        if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) {
819            // Convert YYmmdd to YYYY-MM-DD
820            $year = $matches[1];
821            $month = $matches[2];
822            $day = $matches[3];
823
824            // Assume 20xx for years 00-69, 19xx for years 70-99
825            $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year;
826
827            return $fullYear . '-' . $month . '-' . $day;
828        }
829
830        // Fallback to file timestamp
831        $pageFile = wikiFN($targetPageId);
832        if (file_exists($pageFile)) {
833            $timestamp = filemtime($pageFile);
834            return date('Y-m-d', $timestamp);
835        }
836
837        // Return empty string if no date can be determined
838        return '';
839    }
840
841    /**
842     * Get current text
843     *
844     * Retrieves the current text stored from the process function.
845     *
846     * @return string The current text
847     */
848    private function getCurrentText()
849    {
850        return $this->currentText;
851    }
852
853    /**
854     * Scan text for placeholders
855     *
856     * Finds all placeholders in the format {placeholder_name} in the provided text
857     * and returns an array of unique placeholder names.
858     *
859     * @param string $text The text to scan for placeholders
860     * @return array List of unique placeholder names found in the text
861     */
862    public function findPlaceholders($text)
863    {
864        $placeholders = [];
865        $pattern = '/\{([^}]+)\}/';
866
867        if (preg_match_all($pattern, $text, $matches)) {
868            // Get unique placeholder names
869            $placeholders = array_unique($matches[1]);
870        }
871
872        return $placeholders;
873    }
874
875    /**
876     * Get template content for the current text
877     *
878     * Convenience function to retrieve template content. If a pageId is provided,
879     * retrieves content directly from that page. Otherwise, queries ChromaDB for
880     * a relevant template based on the current text.
881     *
882     * @param string|null $pageId Optional page ID to retrieve template from directly
883     * @return string The template content or empty string if not found
884     */
885    private function getTemplateContent($pageId = null)
886    {
887        // If pageId is provided, use it directly
888        if ($pageId !== null) {
889            $templateContent = $this->getPageContent($pageId);
890            if ($templateContent !== false) {
891                return $templateContent;
892            }
893        }
894
895        // Otherwise, get template suggestion for the current text
896        $pageId = $this->queryChromaDBTemplate($this->getCurrentText());
897        if (!empty($pageId)) {
898            $templateContent = $this->getPageContent($pageId[0]);
899            if ($templateContent !== false) {
900                return $templateContent;
901            }
902        }
903        return '( no template )';
904    }
905
906    /**
907     * Get snippets content for the current text
908     *
909     * Convenience function to retrieve relevant snippets for the current text.
910     * Queries ChromaDB for relevant snippets and returns them formatted.
911     *
912     * @param int $count Number of snippets to retrieve (default: 10)
913     * @return string Formatted snippets content or empty string if not found
914     */
915    private function getSnippets($count = 10)
916    {
917        // Get example snippets for the current text
918        $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count);
919        if (!empty($snippets)) {
920            $formattedSnippets = [];
921            foreach ($snippets as $index => $snippet) {
922                $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>';
923            }
924            return implode("\n", $formattedSnippets);
925        }
926        return '( no examples )';
927    }
928
929    /**
930     * Get examples content from example page IDs
931     *
932     * Convenience function to retrieve content from example pages.
933     * Returns the content of each page packed in XML elements.
934     *
935     * @param array $exampleIds List of example page IDs
936     * @return string Formatted examples content or empty string if not found
937     */
938    private function getExamplesContent($exampleIds = [])
939    {
940        if (empty($exampleIds) || !is_array($exampleIds)) {
941            return '( no examples )';
942        }
943
944        $examplesContent = [];
945        foreach ($exampleIds as $index => $exampleId) {
946            $content = $this->getPageContent($exampleId);
947            if ($content !== false) {
948                $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>';
949            }
950        }
951
952        return implode("\n", $examplesContent);
953    }
954
955    /**
956     * Get previous report content from previous page ID
957     *
958     * Convenience function to retrieve content from a previous report page.
959     * Returns the content of the previous page or a default message if not found.
960     *
961     * @param string $previousId Previous page ID
962     * @return string Previous report content or default message if not found
963     */
964    private function getPreviousContent($previousId = '')
965    {
966        if (empty($previousId)) {
967            return '( no previous report )';
968        }
969
970        $content = $this->getPageContent($previousId);
971        if ($content !== false) {
972            return $content;
973        }
974
975        return '( previous report not found )';
976    }
977
978    /**
979     * Get ChromaDB client with configuration
980     *
981     * Creates and returns a ChromaDB client with the appropriate configuration.
982     * Extracts modality from the current page ID to use as the collection name.
983     *
984     * @return array Array containing the ChromaDB client and collection name
985     */
986    private function getChromaDBClient()
987    {
988        // Include config.php to get ChromaDB configuration
989        require_once 'config.php';
990
991        // Get ChromaDB configuration from config.php
992        $chromaHost = defined('CHROMA_HOST') ? CHROMA_HOST : 'localhost';
993        $chromaPort = defined('CHROMA_PORT') ? CHROMA_PORT : 8000;
994        $chromaTenant = defined('CHROMA_TENANT') ? CHROMA_TENANT : 'dokullm';
995        $chromaDatabase = defined('CHROMA_DATABASE') ? CHROMA_DATABASE : 'dokullm';
996        $chromaDefaultCollection = defined('CHROMA_COLLECTION') ? CHROMA_COLLECTION : 'documents';
997
998        // Use the first part of the current page ID as collection name, fallback to default
999        global $ID;
1000        $chromaCollection = $chromaDefaultCollection; // Default collection name
1001
1002        if (!empty($ID)) {
1003            // Split the page ID by ':' and take the first part as collection name
1004            $parts = explode(':', $ID);
1005            if (isset($parts[0]) && !empty($parts[0])) {
1006                // If the first part is 'playground', use the default collection
1007                // Otherwise, use the first part as the collection name
1008                if ($parts[0] === 'playground') {
1009                    $chromaCollection = $chromaDefaultCollection;
1010                } else {
1011                    $chromaCollection = $parts[0];
1012                }
1013            }
1014        }
1015
1016        // Create ChromaDB client
1017        $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient($chromaHost, $chromaPort, $chromaTenant, $chromaDatabase);
1018
1019
1020        return [$chromaClient, $chromaCollection];
1021    }
1022
1023    /**
1024     * Query ChromaDB for relevant documents
1025     *
1026     * Generates embeddings for the input text and queries ChromaDB for similar documents.
1027     * Extracts modality from the current page ID to use as the collection name.
1028     *
1029     * @param string $text The text to find similar documents for
1030     * @param int $limit Maximum number of documents to retrieve (default: 5)
1031     * @param array|null $where Optional filter conditions for metadata
1032     * @return array List of document IDs
1033     */
1034    private function queryChromaDB($text, $limit = 5, $where = null)
1035    {
1036        try {
1037            // Get ChromaDB client and collection name
1038            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
1039            // Query for similar documents
1040            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
1041
1042            // Extract document IDs from results
1043            $documentIds = [];
1044            if (isset($results['ids'][0]) && is_array($results['ids'][0])) {
1045                foreach ($results['ids'][0] as $id) {
1046                    // Use the ChromaDB ID directly without conversion
1047                    $documentIds[] = $id;
1048                }
1049            }
1050
1051            return $documentIds;
1052        } catch (Exception $e) {
1053            // Log error but don't fail the operation
1054            error_log('ChromaDB query failed: ' . $e->getMessage());
1055            return [];
1056        }
1057    }
1058
1059    /**
1060     * Query ChromaDB for relevant documents and return text snippets
1061     *
1062     * Generates embeddings for the input text and queries ChromaDB for similar documents.
1063     * Returns the actual text snippets instead of document IDs.
1064     *
1065     * @param string $text The text to find similar documents for
1066     * @param int $limit Maximum number of documents to retrieve (default: 10)
1067     * @param array|null $where Optional filter conditions for metadata
1068     * @return array List of text snippets
1069     */
1070    private function queryChromaDBSnippets($text, $limit = 10, $where = null)
1071    {
1072        try {
1073            // Get ChromaDB client and collection name
1074            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
1075            // Query for similar documents
1076            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
1077
1078            // Extract document texts from results
1079            $snippets = [];
1080            if (isset($results['documents'][0]) && is_array($results['documents'][0])) {
1081                foreach ($results['documents'][0] as $document) {
1082                    $snippets[] = $document;
1083                }
1084            }
1085
1086            return $snippets;
1087        } catch (Exception $e) {
1088            // Log error but don't fail the operation
1089            error_log('ChromaDB query failed: ' . $e->getMessage());
1090            return [];
1091        }
1092    }
1093
1094    /**
1095     * Query ChromaDB for a template document
1096     *
1097     * Generates embeddings for the input text and queries ChromaDB for a template document
1098     * by filtering with metadata 'template=true'.
1099     *
1100     * @param string $text The text to find a template for
1101     * @return array List of template document IDs (maximum 1)
1102     */
1103    public function queryChromaDBTemplate($text)
1104    {
1105        $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']);
1106
1107        // Remove chunk number (e.g., "@2") from the ID to get the base document ID
1108        if (!empty($templateIds)) {
1109            $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]);
1110        }
1111
1112        return $templateIds;
1113    }
1114
1115}
1116