xref: /plugin/dokullm/LlmClient.php (revision 9b704e62a99e945ec7e78c480face208b8601522)
1<?php
2namespace dokuwiki\plugin\dokullm;
3
4/**
5 * LLM Client for the dokullm plugin
6 *
7 * This class provides methods to interact with an LLM API for various
8 * text processing tasks such as completion, rewriting, grammar correction,
9 * summarization, conclusion creation, text analysis, and custom prompts.
10 *
11 * The client handles:
12 * - API configuration and authentication
13 * - Prompt template loading and processing
14 * - Context-aware requests with metadata
15 * - DokuWiki page content retrieval
16 */
17
18// must be run within Dokuwiki
19if (!defined('DOKU_INC')) {
20    die();
21}
22
23/**
24 * Get configuration value for the dokullm plugin
25 *
26 * @param string $key Configuration key
27 * @param mixed $default Default value if key not found
28 * @return mixed Configuration value
29 */
30function getConf($key, $default = null) {
31    global $conf;
32    return isset($conf['plugin']['dokullm'][$key]) ? $conf['plugin']['dokullm'][$key] : $default;
33}
34
35
36/**
37 * LLM Client class for handling API communications
38 *
39 * Manages configuration settings and provides methods for various
40 * text processing operations through an LLM API.
41 * Implements caching for tool calls to avoid duplicate processing.
42 */
43class LlmClient
44{
45    /** @var string The API endpoint URL */
46    private $api_url;
47
48    /** @var array Cache for tool call results */
49    private $toolCallCache = [];
50
51    /** @var string Current text for tool usage */
52    private $currentText = '';
53
54    /** @var array Track tool call counts to prevent infinite loops */
55    private $toolCallCounts = [];
56
57    /** @var string The API authentication key */
58    private $api_key;
59
60    /** @var string The model identifier to use */
61    private $model;
62
63    /** @var int The request timeout in seconds */
64    private $timeout;
65
66    /** @var float The temperature setting for response randomness */
67    private $temperature;
68
69    /** @var float The top-p setting for nucleus sampling */
70    private $top_p;
71
72    /** @var int The top-k setting for token selection */
73    private $top_k;
74
75    /** @var float The min-p setting for minimum probability threshold */
76    private $min_p;
77
78    /** @var bool Whether to enable thinking in the LLM responses */
79    private $think;
80
81    /**
82     * Initialize the LLM client with configuration settings
83     *
84     * Retrieves configuration values from DokuWiki's configuration system
85     * for API URL, key, model, timeout, and LLM sampling parameters.
86     *
87     * Configuration values:
88     * - api_url: The LLM API endpoint URL
89     * - api_key: Authentication key for the API (optional)
90     * - model: The model identifier to use for requests
91     * - timeout: Request timeout in seconds
92     * - language: Language code for prompt templates
93     * - temperature: Temperature setting for response randomness (0.0-1.0)
94     * - top_p: Top-p (nucleus sampling) setting (0.0-1.0)
95     * - top_k: Top-k setting (integer >= 1)
96     * - min_p: Minimum probability threshold (0.0-1.0)
97     * - think: Whether to enable thinking in LLM responses (boolean)
98     */
99    public function __construct()
100    {
101        $this->api_url = $this->getConf('api_url');
102        $this->api_key = $this->getConf('api_key');
103        $this->model = $this->getConf('model');
104        $this->timeout = $this->getConf('timeout');
105        $this->temperature = $this->getConf('temperature');
106        $this->top_p = $this->getConf('top_p');
107        $this->top_k = $this->getConf('top_k');
108        $this->min_p = $this->getConf('min_p');
109        $this->think = $this->getConf('think', false);
110    }
111
112
113
114    public function process($action, $text, $metadata = [], $useContext = true)
115    {
116        // Store the current text for tool usage
117        $this->currentText = $text;
118
119        // Add text, think and action to metadata
120        $metadata['text'] = $text;
121        $metadata['think'] = $this->think ? '/think' : '/no_think';
122        $metadata['action'] = $action;
123
124        // If we have 'template' in metadata, move it to 'page_template'
125        if (isset($metadata['template'])) {
126            $metadata['page_template'] = $metadata['template'];
127            unset($metadata['template']);
128        }
129
130        // If we have 'examples' in metadata, move it to 'page_examples'
131        if (isset($metadata['examples'])) {
132            $metadata['page_examples'] = $metadata['examples'];
133            unset($metadata['examples']);
134        }
135
136        // If we have 'previous' in metadata, move it to 'page_previous'
137        if (isset($metadata['previous'])) {
138            $metadata['page_previous'] = $metadata['previous'];
139            unset($metadata['previous']);
140        }
141
142        $prompt = $this->loadPrompt($action, $metadata);
143
144        return $this->callAPI($action, $prompt, $metadata, $useContext);
145    }
146
147
148
149    /**
150     * Create the provided text using the LLM
151     *
152     * Sends a prompt to the LLM asking it to create the given text.
153     * First queries ChromaDB for relevant documents to include as examples.
154     * If no template is defined, queries ChromaDB for a template.
155     *
156     * @param string $text The text to create
157     * @param array $metadata Optional metadata containing template, examples, and snippets
158     * @param bool $useContext Whether to include template and examples in the context (default: true)
159     * @return string The created text
160     */
161    public function createReport($text, $metadata = [], $useContext = true)
162    {
163        // Store the current text for tool usage
164        $this->currentText = $text;
165
166        // Check if tools should be used based on configuration
167        $useTools = $this->getConf('use_tools', false);
168
169        // Only try to find template and add snippets if tools are not enabled
170        // When tools are enabled, the LLM will call get_template and get_examples as needed
171        if (!$useTools) {
172            // If no template is defined, try to find one using ChromaDB
173            if (empty($metadata['template'])) {
174                $templateResult = $this->queryChromaDBTemplate($text);
175                if (!empty($templateResult)) {
176                    // Use the first result as template
177                    $metadata['template'] = $templateResult[0];
178                }
179            }
180
181            // Query ChromaDB for relevant documents to use as examples
182            $chromaResults = $this->queryChromaDBSnippets($text, 10);
183
184            // Add ChromaDB results to metadata as snippets
185            if (!empty($chromaResults)) {
186                // Merge with existing snippets
187                $metadata['snippets'] = array_merge(
188                    isset($metadata['snippets']) ? $metadata['snippets'] : [],
189                    $chromaResults
190                );
191            }
192        }
193
194        $think = $this->think ? '/think' : '/no_think';
195        $prompt = $this->loadPrompt('create', ['text' => $text, 'think' => $think]);
196
197        return $this->callAPI('create', $prompt, $metadata, $useContext);
198    }
199
200    /**
201     * Compare two texts and highlight differences
202     *
203     * Sends a prompt to the LLM asking it to compare two texts and
204     * highlight their similarities and differences.
205     *
206     * @param string $text The current text to compare
207     * @param array $metadata Optional metadata containing template, examples, and previous report reference
208     * @return string The comparison results
209     */
210    public function compareText($text, $metadata = [], $useContext = false)
211    {
212        // Store the current text for tool usage
213        $this->currentText = $text;
214
215        // Load previous report from metadata if specified
216        $previousText = '';
217        if (!empty($metadata['previous_report_page'])) {
218            $previousText = $this->getPageContent($metadata['previous_report_page']);
219            if ($previousText === false) {
220                $previousText = '';
221            }
222        }
223
224        // Extract dates for placeholders
225        $currentDate = $this->getPageDate();
226        $previousDate = !empty($metadata['previous_report_page']) ?
227                        $this->getPageDate($metadata['previous_report_page']) :
228                        '';
229
230        $think = $this->think ? '/think' : '/no_think';
231        $prompt = $this->loadPrompt('compare', [
232            'text' => $text,
233            'previous_text' => $previousText,
234            'current_date' => $currentDate,
235            'previous_date' => $previousDate,
236            'think' => $think
237        ]);
238
239        return $this->callAPI('compare', $prompt, $metadata, $useContext);
240    }
241
242    /**
243     * Process text with a custom user prompt
244     *
245     * Sends a custom prompt to the LLM along with the provided text.
246     *
247     * @param string $text The text to process
248     * @param string $customPrompt The custom prompt to use
249     * @param array $metadata Optional metadata containing template and examples
250     * @param bool $useContext Whether to include template and examples in the context (default: true)
251     * @return string The processed text
252     */
253    public function processCustomPrompt($text, $metadata = [], $useContext = true)
254    {
255        // Store the current text for tool usage
256        $this->currentText = $text;
257
258        // Format the prompt with the text and custom prompt
259        $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text;
260
261        return $this->callAPI('custom', $prompt, $metadata, $useContext);
262    }
263
264    /**
265     * Get the list of available tools for the LLM
266     *
267     * Defines the tools that can be used by the LLM during processing.
268     *
269     * @return array List of tool definitions
270     */
271    private function getAvailableTools()
272    {
273        return [
274            [
275                'type' => 'function',
276                'function' => [
277                    'name' => 'get_document',
278                    'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.',
279                    'parameters' => [
280                        'type' => 'object',
281                        'properties' => [
282                            'id' => [
283                                'type' => 'string',
284                                'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.'
285                            ]
286                        ],
287                        'required' => ['id']
288                    ]
289                ]
290            ],
291            [
292                'type' => 'function',
293                'function' => [
294                    'name' => 'get_template',
295                    'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.',
296                    'parameters' => [
297                        'type' => 'object',
298                        'properties' => [
299                            'language' => [
300                                'type' => 'string',
301                                'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).',
302                                'default' => 'ro'
303                            ]
304                        ]
305                    ]
306                ]
307            ],
308            [
309                'type' => 'function',
310                'function' => [
311                    'name' => 'get_examples',
312                    'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.',
313                    'parameters' => [
314                        'type' => 'object',
315                        'properties' => [
316                            'count' => [
317                                'type' => 'integer',
318                                'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.',
319                                'default' => 5
320                            ]
321                        ]
322                    ]
323                ]
324            ]
325        ];
326    }
327
328    /**
329     * Call the LLM API with the specified prompt
330     *
331     * Makes an HTTP POST request to the configured API endpoint with
332     * the prompt and other parameters. Handles authentication if an
333     * API key is configured.
334     *
335     * The method constructs a conversation with system and user messages,
336     * including context information from metadata when available.
337     *
338     * Complex logic includes:
339     * 1. Loading and enhancing the system prompt with metadata context
340     * 2. Building the API request with model parameters
341     * 3. Handling authentication with API key if configured
342     * 4. Making the HTTP request with proper error handling
343     * 5. Parsing and validating the API response
344     * 6. Supporting tool usage with automatic tool calling when enabled
345     * 7. Implementing context enhancement with templates, examples, and snippets
346     *
347     * The context information includes:
348     * - Template content: Used as a starting point for the response
349     * - Example pages: Full content of specified example pages
350     * - Text snippets: Relevant text examples from ChromaDB
351     *
352     * When tools are enabled, the method supports automatic tool calling:
353     * - Tools can retrieve documents, templates, and examples as needed
354     * - Tool responses are cached to avoid duplicate calls with identical parameters
355     * - Infinite loop protection prevents excessive tool calls
356     *
357     * @param string $command The command name for loading command-specific system prompts
358     * @param string $prompt The prompt to send to the LLM as user message
359     * @param array $metadata Optional metadata containing template, examples, and snippets
360     * @param bool $useContext Whether to include template and examples in the context (default: true)
361     * @return string The response content from the LLM
362     * @throws Exception If the API request fails or returns unexpected format
363     */
364
365    private function callAPI($command, $prompt, $metadata = [], $useContext = true)
366    {
367        // Load system prompt which provides general instructions to the LLM
368        $systemPrompt = $this->loadSystemPrompt($command, []);
369
370        // Enhance the prompt with context information from metadata
371        // This provides the LLM with additional context about templates and examples
372        if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) {
373            $contextInfo = "\n\n<context>\n";
374
375            // Add template content if specified in metadata
376            if (!empty($metadata['template'])) {
377                $templateContent = $this->getPageContent($metadata['template']);
378                if ($templateContent !== false) {
379                    $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n";
380                }
381            }
382
383            // Add example pages content if specified in metadata
384            if (!empty($metadata['examples'])) {
385                $examplesContent = [];
386                foreach ($metadata['examples'] as $example) {
387                    $content = $this->getPageContent($example);
388                    if ($content !== false) {
389                        $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n";
390                    }
391                }
392                if (!empty($examplesContent)) {
393                    $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n";
394                }
395            }
396
397            // Add text snippets if specified in metadata
398            if (!empty($metadata['snippets'])) {
399                $snippetsContent = [];
400                foreach ($metadata['snippets'] as $index => $snippet) {
401                    // These are text snippets from ChromaDB
402                    $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n";
403                }
404                if (!empty($snippetsContent)) {
405                    $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n";
406                }
407            }
408
409            $contextInfo .= "\n</context>\n";
410
411            // Append context information to system prompt
412            $prompt = $contextInfo . "\n\n" . $prompt;
413        }
414
415        // Check if tools should be used based on configuration
416        $useTools = $this->getConf('use_tools', false);
417
418        // Prepare API request data with model parameters
419        $data = [
420            'model' => $this->model,
421            'messages' => [
422                ['role' => 'system', 'content' => $systemPrompt],
423                ['role' => 'user', 'content' => $prompt]
424            ],
425            'max_tokens' => 6144,
426            'stream' => false,
427            'keep_alive' => '30m',
428            'think' => true
429        ];
430
431        // Add tools to the request only if useTools is true
432        if ($useTools) {
433            // Define available tools
434            $data['tools'] = $this->getAvailableTools();
435            $data['tool_choice'] = 'auto';
436            $data['parallel_tool_calls'] = false;
437        }
438
439        // Only add parameters if they are defined and not null
440        if ($this->temperature !== null) {
441            $data['temperature'] = $this->temperature;
442        }
443        if ($this->top_p !== null) {
444            $data['top_p'] = $this->top_p;
445        }
446        if ($this->top_k !== null) {
447            $data['top_k'] = $this->top_k;
448        }
449        if ($this->min_p !== null) {
450            $data['min_p'] = $this->min_p;
451        }
452
453        // Make an API call with tool responses
454        return $this->callAPIWithTools($data, false);
455    }
456
457    /**
458     * Handle tool calls from the LLM
459     *
460     * Processes tool calls made by the LLM and returns appropriate responses.
461     * Implements caching to avoid duplicate calls with identical parameters.
462     *
463     * @param array $toolCall The tool call data from the LLM
464     * @return array The tool response message
465     */
466    private function handleToolCall($toolCall)
467    {
468        $toolName = $toolCall['function']['name'];
469        $arguments = json_decode($toolCall['function']['arguments'], true);
470
471        // Create a cache key from the tool name and arguments
472        $cacheKey = md5($toolName . serialize($arguments));
473
474        // Check if we have a cached result for this tool call
475        if (isset($this->toolCallCache[$cacheKey])) {
476            // Return cached result and indicate it was found in cache
477            $toolResponse = $this->toolCallCache[$cacheKey];
478            // Update with current tool call ID
479            $toolResponse['tool_call_id'] = $toolCall['id'];
480            $toolResponse['cached'] = true; // Indicate this response was cached
481            return $toolResponse;
482        }
483
484        $toolResponse = [
485            'role' => 'tool',
486            'tool_call_id' => $toolCall['id'],
487            'cached' => false // Indicate this is a fresh response
488        ];
489
490        switch ($toolName) {
491            case 'get_document':
492                $documentId = $arguments['id'];
493                $content = $this->getPageContent($documentId);
494                if ($content === false) {
495                    $toolResponse['content'] = 'Document not found: ' . $documentId;
496                } else {
497                    $toolResponse['content'] = $content;
498                }
499                break;
500
501            case 'get_template':
502                // Get template content using the convenience function
503                $toolResponse['content'] = $this->getTemplateContent();
504                break;
505
506            case 'get_examples':
507                // Get examples content using the convenience function
508                $count = isset($arguments['count']) ? (int)$arguments['count'] : 5;
509                $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>';
510                break;
511
512            default:
513                $toolResponse['content'] = 'Unknown tool: ' . $toolName;
514        }
515
516        // Cache the result for future calls with the same parameters
517        $cacheEntry = $toolResponse;
518        // Remove tool_call_id and cached flag from cache as they change per call
519        unset($cacheEntry['tool_call_id']);
520        unset($cacheEntry['cached']);
521        $this->toolCallCache[$cacheKey] = $cacheEntry;
522
523        return $toolResponse;
524    }
525
526    /**
527     * Make an API call with tool responses
528     *
529     * Sends a follow-up request to the LLM with tool responses.
530     * Implements complex logic for handling tool calls with caching and loop protection.
531     *
532     * Complex logic includes:
533     * 1. Making HTTP requests with proper authentication and error handling
534     * 2. Processing tool calls from the LLM response
535     * 3. Caching tool responses to avoid duplicate calls with identical parameters
536     * 4. Tracking tool call counts to prevent infinite loops
537     * 5. Implementing loop protection with call count limits
538     * 6. Handling recursive tool calls until final content is generated
539     *
540     * Loop protection works by:
541     * - Tracking individual tool call counts (max 3 per tool)
542     * - Tracking total tool calls (max 10 total)
543     * - Disabling tools when limits are exceeded to break potential loops
544     *
545     * @param array $data The API request data including messages with tool responses
546     * @param bool $toolsCalled Whether tools have already been called (used for loop protection)
547     * @param bool $useTools Whether to process tool calls (used for loop protection)
548     * @return string The final response content
549     */
550    private function callAPIWithTools($data, $toolsCalled = false, $useTools = true)
551    {
552        // Set up HTTP headers, including authentication if API key is configured
553        $headers = [
554            'Content-Type: application/json'
555        ];
556
557        if (!empty($this->api_key)) {
558            $headers[] = 'Authorization: Bearer ' . $this->api_key;
559        }
560
561       // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops
562        if ($toolsCalled) {
563            unset($data['tools']);
564            unset($data['tool_choice']);
565        }
566
567        // Initialize and configure cURL for the API request
568        $ch = curl_init();
569        curl_setopt($ch, CURLOPT_URL, $this->api_url);
570        curl_setopt($ch, CURLOPT_POST, true);
571        curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
572        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
573        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
574        curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
575        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
576
577        // Execute the API request
578        $response = curl_exec($ch);
579        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
580        $error = curl_error($ch);
581        curl_close($ch);
582
583        // Handle cURL errors
584        if ($error) {
585            throw new Exception('API request failed: ' . $error);
586        }
587
588        // Handle HTTP errors
589        if ($httpCode !== 200) {
590            throw new Exception('API request failed with HTTP code: ' . $httpCode);
591        }
592
593        // Parse and validate the JSON response
594        $result = json_decode($response, true);
595
596        // Extract the content from the response if available
597        if (isset($result['choices'][0]['message']['content'])) {
598            $content = trim($result['choices'][0]['message']['content']);
599            // Reset tool call counts when we get final content
600            $this->toolCallCounts = [];
601            return $content;
602        }
603
604        // Handle tool calls if present
605        if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) {
606            $toolCalls = $result['choices'][0]['message']['tool_calls'];
607            // Start with original messages
608            $messages = $data['messages'];
609            // Add assistant's message with tool calls, keeping all original fields except for content (which is null)
610            $assistantMessage = [];
611            foreach ($result['choices'][0]['message'] as $key => $value) {
612                if ($key !== 'content') {
613                    $assistantMessage[$key] = $value;
614                }
615            }
616            // Add assistant's message with tool calls
617            $messages[] = $assistantMessage;
618
619            // Process each tool call and track counts to prevent infinite loops
620            foreach ($toolCalls as $toolCall) {
621                $toolName = $toolCall['function']['name'];
622                // Increment tool call count
623                if (!isset($this->toolCallCounts[$toolName])) {
624                    $this->toolCallCounts[$toolName] = 0;
625                }
626                $this->toolCallCounts[$toolName]++;
627
628                $toolResponse = $this->handleToolCall($toolCall);
629                $messages[] = $toolResponse;
630            }
631
632            // Check if any tool has been called more than 3 times
633            $toolsCalledCount = 0;
634            foreach ($this->toolCallCounts as $count) {
635                if ($count > 3) {
636                    // If any tool called more than 3 times, disable tools to break loop
637                    $toolsCalled = true;
638                    break;
639                }
640                $toolsCalledCount += $count;
641            }
642
643            // If total tool calls exceed 10, also disable tools
644            if ($toolsCalledCount > 10) {
645                $toolsCalled = true;
646            }
647
648            // Make another API call with tool responses
649            $data['messages'] = $messages;
650            return $this->callAPIWithTools($data, $toolsCalled, $useTools);
651        }
652
653        // Throw exception for unexpected response format
654        throw new Exception('Unexpected API response format');
655    }
656
657    /**
658     * Load a prompt template from a DokuWiki page and replace placeholders
659     *
660     * Loads prompt templates from DokuWiki pages with IDs in the format
661     * dokullm:prompts:LANGUAGE:PROMPT_NAME
662     *
663     * The method implements a language fallback mechanism:
664     * 1. First tries to load the prompt in the configured language
665     * 2. If not found, falls back to English prompts
666     * 3. Throws an exception if neither is available
667     *
668     * After loading the prompt, it scans for placeholders and automatically
669     * adds missing ones with appropriate values before replacing all placeholders.
670     *
671     * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite')
672     * @param array $variables Associative array of placeholder => value pairs
673     * @return string The processed prompt with placeholders replaced
674     * @throws Exception If the prompt page cannot be loaded in any language
675     */
676    private function loadPrompt($promptName, $variables = [])
677    {
678        $language = $this->getConf('language');
679
680        // Default to 'en' if language is 'default' or not set
681        if ($language === 'default' || empty($language)) {
682            $language = 'en';
683        }
684
685        // Construct the page ID for the prompt in the configured language
686        $promptPageId = 'dokullm:prompts:' . $language . ':' . $promptName;
687
688        // Try to get the content of the prompt page in the configured language
689        $prompt = $this->getPageContent($promptPageId);
690
691        // If the language-specific prompt doesn't exist, try English as fallback
692        if ($prompt === false && $language !== 'en') {
693            $promptPageId = 'dokullm:prompts:en:' . $promptName;
694            $prompt = $this->getPageContent($promptPageId);
695        }
696
697        // If still no prompt found, throw an exception
698        if ($prompt === false) {
699            throw new Exception('Prompt page not found: ' . $promptPageId);
700        }
701
702        // Find placeholders in the prompt
703        $placeholders = $this->findPlaceholders($prompt);
704
705        // Add missing placeholders with appropriate values
706        foreach ($placeholders as $placeholder) {
707            // Skip if already provided in variables
708            if (isset($variables[$placeholder])) {
709                continue;
710            }
711
712            // Add appropriate values for specific placeholders
713            switch ($placeholder) {
714                case 'template':
715                    // If we have a page_template in variables, use it
716                    $variables[$placeholder] = $this->getTemplateContent($variables['page_template']);
717                    break;
718
719                case 'snippets':
720                    $variables[$placeholder] = $this->getSnippets(10);
721                    break;
722
723                case 'examples':
724                    // If we have example page IDs in metadata, add examples content
725                    $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']);
726                    break;
727
728                case 'previous':
729                    // If we have a previous report page ID in metadata, add previous content
730                    $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']);
731
732                    // Add current and previous dates to metadata
733                    $variables['current_date'] = $this->getPageDate();
734                    $variables['previous_date'] = !empty($variables['page_previous']) ?
735                                                $this->getPageDate($variables['page_previous']) :
736                                                '';
737                    break;
738
739                default:
740                    // For other placeholders, leave them empty or set a default value
741                    $variables[$placeholder] = '';
742                    break;
743            }
744        }
745
746        // Replace placeholders with actual values
747        // Placeholders are in the format {placeholder_name}
748        foreach ($variables as $placeholder => $value) {
749            $prompt = str_replace('{' . $placeholder . '}', $value, $prompt);
750        }
751
752        // Return the processed prompt
753        return $prompt;
754    }
755
756    /**
757     * Load system prompt with optional command-specific appendage
758     *
759     * Loads the main system prompt and appends any command-specific system prompt
760     * if available.
761     *
762     * @param string $action The action/command name
763     * @param array $variables Associative array of placeholder => value pairs
764     * @return string The combined system prompt
765     */
766    private function loadSystemPrompt($action, $variables = [])
767    {
768        // Load system prompt which provides general instructions to the LLM
769        $systemPrompt = $this->loadPrompt('system', $variables);
770
771        // Check if there's a command-specific system prompt appendage
772        if (!empty($action)) {
773            try {
774                $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables);
775                if ($commandSystemPrompt !== false) {
776                    $systemPrompt .= "\n" . $commandSystemPrompt;
777                }
778            } catch (Exception $e) {
779                // Ignore exceptions when loading command-specific system prompt
780                // This allows the main system prompt to still be used
781            }
782        }
783
784        return $systemPrompt;
785    }
786
787    /**
788     * Get the content of a DokuWiki page
789     *
790     * Retrieves the raw content of a DokuWiki page by its ID.
791     * Used for loading template and example page content for context.
792     *
793     * @param string $pageId The page ID to retrieve
794     * @return string|false The page content or false if not found/readable
795     */
796    public function getPageContent($pageId)
797    {
798        // Convert page ID to file path
799        $pageFile = wikiFN($pageId);
800
801        // Check if file exists and is readable
802        if (file_exists($pageFile) && is_readable($pageFile)) {
803            return file_get_contents($pageFile);
804        }
805
806        return false;
807    }
808
809    /**
810     * Extract date from page ID or file timestamp
811     *
812     * Attempts to extract a date in YYmmdd format from the page ID.
813     * If not found, uses the file's last modification timestamp.
814     *
815     * @param string $pageId Optional page ID to extract date from (defaults to current page)
816     * @return string Formatted date string (YYYY-MM-DD)
817     */
818    private function getPageDate($pageId = null)
819    {
820        global $ID;
821
822        // Use provided page ID or current page ID
823        $targetPageId = $pageId ?: $ID;
824
825        // Try to extract date from page ID (looking for YYmmdd pattern)
826        if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) {
827            // Convert YYmmdd to YYYY-MM-DD
828            $year = $matches[1];
829            $month = $matches[2];
830            $day = $matches[3];
831
832            // Assume 20xx for years 00-69, 19xx for years 70-99
833            $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year;
834
835            return $fullYear . '-' . $month . '-' . $day;
836        }
837
838        // Fallback to file timestamp
839        $pageFile = wikiFN($targetPageId);
840        if (file_exists($pageFile)) {
841            $timestamp = filemtime($pageFile);
842            return date('Y-m-d', $timestamp);
843        }
844
845        // Return empty string if no date can be determined
846        return '';
847    }
848
849    /**
850     * Get current text
851     *
852     * Retrieves the current text stored from the process function.
853     *
854     * @return string The current text
855     */
856    private function getCurrentText()
857    {
858        return $this->currentText;
859    }
860
861    /**
862     * Scan text for placeholders
863     *
864     * Finds all placeholders in the format {placeholder_name} in the provided text
865     * and returns an array of unique placeholder names.
866     *
867     * @param string $text The text to scan for placeholders
868     * @return array List of unique placeholder names found in the text
869     */
870    public function findPlaceholders($text)
871    {
872        $placeholders = [];
873        $pattern = '/\{([^}]+)\}/';
874
875        if (preg_match_all($pattern, $text, $matches)) {
876            // Get unique placeholder names
877            $placeholders = array_unique($matches[1]);
878        }
879
880        return $placeholders;
881    }
882
883    /**
884     * Get template content for the current text
885     *
886     * Convenience function to retrieve template content. If a pageId is provided,
887     * retrieves content directly from that page. Otherwise, queries ChromaDB for
888     * a relevant template based on the current text.
889     *
890     * @param string|null $pageId Optional page ID to retrieve template from directly
891     * @return string The template content or empty string if not found
892     */
893    private function getTemplateContent($pageId = null)
894    {
895        // If pageId is provided, use it directly
896        if ($pageId !== null) {
897            $templateContent = $this->getPageContent($pageId);
898            if ($templateContent !== false) {
899                return $templateContent;
900            }
901        }
902
903        // Otherwise, get template suggestion for the current text
904        $pageId = $this->queryChromaDBTemplate($this->getCurrentText());
905        if (!empty($pageId)) {
906            $templateContent = $this->getPageContent($pageId[0]);
907            if ($templateContent !== false) {
908                return $templateContent;
909            }
910        }
911        return '( no template )';
912    }
913
914    /**
915     * Get snippets content for the current text
916     *
917     * Convenience function to retrieve relevant snippets for the current text.
918     * Queries ChromaDB for relevant snippets and returns them formatted.
919     *
920     * @param int $count Number of snippets to retrieve (default: 10)
921     * @return string Formatted snippets content or empty string if not found
922     */
923    private function getSnippets($count = 10)
924    {
925        // Get example snippets for the current text
926        $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count);
927        if (!empty($snippets)) {
928            $formattedSnippets = [];
929            foreach ($snippets as $index => $snippet) {
930                $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>';
931            }
932            return implode("\n", $formattedSnippets);
933        }
934        return '( no examples )';
935    }
936
937    /**
938     * Get examples content from example page IDs
939     *
940     * Convenience function to retrieve content from example pages.
941     * Returns the content of each page packed in XML elements.
942     *
943     * @param array $exampleIds List of example page IDs
944     * @return string Formatted examples content or empty string if not found
945     */
946    private function getExamplesContent($exampleIds = [])
947    {
948        if (empty($exampleIds) || !is_array($exampleIds)) {
949            return '( no examples )';
950        }
951
952        $examplesContent = [];
953        foreach ($exampleIds as $index => $exampleId) {
954            $content = $this->getPageContent($exampleId);
955            if ($content !== false) {
956                $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>';
957            }
958        }
959
960        return implode("\n", $examplesContent);
961    }
962
963    /**
964     * Get previous report content from previous page ID
965     *
966     * Convenience function to retrieve content from a previous report page.
967     * Returns the content of the previous page or a default message if not found.
968     *
969     * @param string $previousId Previous page ID
970     * @return string Previous report content or default message if not found
971     */
972    private function getPreviousContent($previousId = '')
973    {
974        if (empty($previousId)) {
975            return '( no previous report )';
976        }
977
978        $content = $this->getPageContent($previousId);
979        if ($content !== false) {
980            return $content;
981        }
982
983        return '( previous report not found )';
984    }
985
986    /**
987     * Get ChromaDB client with configuration
988     *
989     * Creates and returns a ChromaDB client with the appropriate configuration.
990     * Extracts modality from the current page ID to use as the collection name.
991     *
992     * @return array Array containing the ChromaDB client and collection name
993     */
994    private function getChromaDBClient()
995    {
996        // Get ChromaDB configuration from DokuWiki plugin configuration
997        $chromaHost = $this->getConf('chroma_host', 'localhost');
998        $chromaPort = $this->getConf('chroma_port', 8000);
999        $chromaTenant = $this->getConf('chroma_tenant', 'dokullm');
1000        $chromaDatabase = $this->getConf('chroma_database', 'dokullm');
1001        $chromaDefaultCollection = $this->getConf('chroma_collection', 'documents');
1002        $ollamaHost = $this->getConf('ollama_host', 'localhost');
1003        $ollamaPort = $this->getConf('ollama_port', 11434);
1004        $ollamaModel = $this->getConf('ollama_embeddings_model', 'nomic-embed-text');
1005
1006        // Use the first part of the current page ID as collection name, fallback to default
1007        global $ID;
1008        $chromaCollection = $chromaDefaultCollection; // Default collection name
1009
1010        if (!empty($ID)) {
1011            // Split the page ID by ':' and take the first part as collection name
1012            $parts = explode(':', $ID);
1013            if (isset($parts[0]) && !empty($parts[0])) {
1014                // If the first part is 'playground', use the default collection
1015                // Otherwise, use the first part as the collection name
1016                if ($parts[0] === 'playground') {
1017                    $chromaCollection = $chromaDefaultCollection;
1018                } else {
1019                    $chromaCollection = $parts[0];
1020                }
1021            }
1022        }
1023
1024        // Create ChromaDB client with all required parameters
1025        $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient(
1026            $chromaHost,
1027            $chromaPort,
1028            $chromaTenant,
1029            $chromaDatabase,
1030            $ollamaHost,
1031            $ollamaPort,
1032            $ollamaModel
1033        );
1034
1035
1036        return [$chromaClient, $chromaCollection];
1037    }
1038
1039    /**
1040     * Query ChromaDB for relevant documents
1041     *
1042     * Generates embeddings for the input text and queries ChromaDB for similar documents.
1043     * Extracts modality from the current page ID to use as the collection name.
1044     *
1045     * @param string $text The text to find similar documents for
1046     * @param int $limit Maximum number of documents to retrieve (default: 5)
1047     * @param array|null $where Optional filter conditions for metadata
1048     * @return array List of document IDs
1049     */
1050    private function queryChromaDB($text, $limit = 5, $where = null)
1051    {
1052        try {
1053            // Get ChromaDB client and collection name
1054            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
1055            // Query for similar documents
1056            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
1057
1058            // Extract document IDs from results
1059            $documentIds = [];
1060            if (isset($results['ids'][0]) && is_array($results['ids'][0])) {
1061                foreach ($results['ids'][0] as $id) {
1062                    // Use the ChromaDB ID directly without conversion
1063                    $documentIds[] = $id;
1064                }
1065            }
1066
1067            return $documentIds;
1068        } catch (Exception $e) {
1069            // Log error but don't fail the operation
1070            error_log('ChromaDB query failed: ' . $e->getMessage());
1071            return [];
1072        }
1073    }
1074
1075    /**
1076     * Query ChromaDB for relevant documents and return text snippets
1077     *
1078     * Generates embeddings for the input text and queries ChromaDB for similar documents.
1079     * Returns the actual text snippets instead of document IDs.
1080     *
1081     * @param string $text The text to find similar documents for
1082     * @param int $limit Maximum number of documents to retrieve (default: 10)
1083     * @param array|null $where Optional filter conditions for metadata
1084     * @return array List of text snippets
1085     */
1086    private function queryChromaDBSnippets($text, $limit = 10, $where = null)
1087    {
1088        try {
1089            // Get ChromaDB client and collection name
1090            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
1091            // Query for similar documents
1092            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
1093
1094            // Extract document texts from results
1095            $snippets = [];
1096            if (isset($results['documents'][0]) && is_array($results['documents'][0])) {
1097                foreach ($results['documents'][0] as $document) {
1098                    $snippets[] = $document;
1099                }
1100            }
1101
1102            return $snippets;
1103        } catch (Exception $e) {
1104            // Log error but don't fail the operation
1105            error_log('ChromaDB query failed: ' . $e->getMessage());
1106            return [];
1107        }
1108    }
1109
1110    /**
1111     * Query ChromaDB for a template document
1112     *
1113     * Generates embeddings for the input text and queries ChromaDB for a template document
1114     * by filtering with metadata 'template=true'.
1115     *
1116     * @param string $text The text to find a template for
1117     * @return array List of template document IDs (maximum 1)
1118     */
1119    public function queryChromaDBTemplate($text)
1120    {
1121        $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']);
1122
1123        // Remove chunk number (e.g., "@2") from the ID to get the base document ID
1124        if (!empty($templateIds)) {
1125            $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]);
1126        }
1127
1128        return $templateIds;
1129    }
1130
1131}
1132