xref: /plugin/dokullm/LlmClient.php (revision 852801abb9ac65fb5f5c69ba6720db3f8ce92017)
1<?php
2namespace dokuwiki\plugin\dokullm;
3
4/**
5 * LLM Client for the dokullm plugin
6 *
7 * This class provides methods to interact with an LLM API for various
8 * text processing tasks such as completion, rewriting, grammar correction,
9 * summarization, conclusion creation, text analysis, and custom prompts.
10 *
11 * The client handles:
12 * - API configuration and authentication
13 * - Prompt template loading and processing
14 * - Context-aware requests with metadata
15 * - DokuWiki page content retrieval
16 */
17
18// must be run within Dokuwiki
19if (!defined('DOKU_INC')) {
20    die();
21}
22
23/**
24 * LLM Client class for handling API communications
25 *
26 * Manages configuration settings and provides methods for various
27 * text processing operations through an LLM API.
28 * Implements caching for tool calls to avoid duplicate processing.
29 */
30class LlmClient
31{
32    /** @var string The API endpoint URL */
33    private $api_url;
34
35    /** @var array Cache for tool call results */
36    private $toolCallCache = [];
37
38    /** @var string Current text for tool usage */
39    private $currentText = '';
40
41    /** @var array Track tool call counts to prevent infinite loops */
42    private $toolCallCounts = [];
43
44    /** @var string The API authentication key */
45    private $api_key;
46
47    /** @var string The model identifier to use */
48    private $model;
49
50    /** @var int The request timeout in seconds */
51    private $timeout;
52
53    /** @var float The temperature setting for response randomness */
54    private $temperature;
55
56    /** @var float The top-p setting for nucleus sampling */
57    private $top_p;
58
59    /** @var int The top-k setting for token selection */
60    private $top_k;
61
62    /** @var float The min-p setting for minimum probability threshold */
63    private $min_p;
64
65    /** @var bool Whether to enable thinking in the LLM responses */
66    private $think;
67
68    /**
69     * Initialize the LLM client with configuration settings
70     *
71     * Retrieves configuration values from DokuWiki's configuration system
72     * for API URL, key, model, timeout, and LLM sampling parameters.
73     *
74     * Configuration values:
75     * - api_url: The LLM API endpoint URL
76     * - api_key: Authentication key for the API (optional)
77     * - model: The model identifier to use for requests
78     * - timeout: Request timeout in seconds
79     * - language: Language code for prompt templates
80     * - temperature: Temperature setting for response randomness (0.0-1.0)
81     * - top_p: Top-p (nucleus sampling) setting (0.0-1.0)
82     * - top_k: Top-k setting (integer >= 1)
83     * - min_p: Minimum probability threshold (0.0-1.0)
84     * - think: Whether to enable thinking in LLM responses (boolean)
85     */
86    public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $language = null)
87    {
88        $this->api_url = $api_url;
89        $this->api_key = $api_key;
90        $this->model = $model;
91        $this->timeout = $timeout;
92        $this->temperature = $temperature;
93        $this->top_p = $top_p;
94        $this->top_k = $top_k;
95        $this->min_p = $min_p;
96        $this->think = $think;
97        $this->language = $language;
98    }
99
100
101
102    public function process($action, $text, $metadata = [], $useContext = true)
103    {
104        // Store the current text for tool usage
105        $this->currentText = $text;
106
107        // Add text, think and action to metadata
108        $metadata['text'] = $text;
109        $metadata['think'] = $this->think ? '/think' : '/no_think';
110        $metadata['action'] = $action;
111
112        // If we have 'template' in metadata, move it to 'page_template'
113        if (isset($metadata['template'])) {
114            $metadata['page_template'] = $metadata['template'];
115            unset($metadata['template']);
116        }
117
118        // If we have 'examples' in metadata, move it to 'page_examples'
119        if (isset($metadata['examples'])) {
120            $metadata['page_examples'] = $metadata['examples'];
121            unset($metadata['examples']);
122        }
123
124        // If we have 'previous' in metadata, move it to 'page_previous'
125        if (isset($metadata['previous'])) {
126            $metadata['page_previous'] = $metadata['previous'];
127            unset($metadata['previous']);
128        }
129
130        $prompt = $this->loadPrompt($action, $this->language, $metadata);
131
132        return $this->callAPI($action, $this->language, $prompt, $metadata, $useContext);
133    }
134
135
136
137    /**
138     * Create the provided text using the LLM
139     *
140     * Sends a prompt to the LLM asking it to create the given text.
141     * First queries ChromaDB for relevant documents to include as examples.
142     * If no template is defined, queries ChromaDB for a template.
143     *
144     * @param string $text The text to create
145     * @param array $metadata Optional metadata containing template, examples, and snippets
146     * @param bool $useContext Whether to include template and examples in the context (default: true)
147     * @return string The created text
148     */
149    public function createReport($text, $metadata = [], $useContext = true, $useTools = false)
150    {
151        // Store the current text for tool usage
152        $this->currentText = $text;
153
154        // Only try to find template and add snippets if tools are not enabled
155        // When tools are enabled, the LLM will call get_template and get_examples as needed
156        if (!$useTools) {
157            // If no template is defined, try to find one using ChromaDB
158            if (empty($metadata['template'])) {
159                $templateResult = $this->queryChromaDBTemplate($text);
160                if (!empty($templateResult)) {
161                    // Use the first result as template
162                    $metadata['template'] = $templateResult[0];
163                }
164            }
165
166            // Query ChromaDB for relevant documents to use as examples
167            $chromaResults = $this->queryChromaDBSnippets($text, 10);
168
169            // Add ChromaDB results to metadata as snippets
170            if (!empty($chromaResults)) {
171                // Merge with existing snippets
172                $metadata['snippets'] = array_merge(
173                    isset($metadata['snippets']) ? $metadata['snippets'] : [],
174                    $chromaResults
175                );
176            }
177        }
178
179        $think = $this->think ? '/think' : '/no_think';
180        $prompt = $this->loadPrompt('create', ['text' => $text, 'think' => $think]);
181
182        return $this->callAPI('create', $this->language, $prompt, $metadata, $useContext);
183    }
184
185    /**
186     * Compare two texts and highlight differences
187     *
188     * Sends a prompt to the LLM asking it to compare two texts and
189     * highlight their similarities and differences.
190     *
191     * @param string $text The current text to compare
192     * @param array $metadata Optional metadata containing template, examples, and previous report reference
193     * @return string The comparison results
194     */
195    public function compareText($text, $metadata = [], $useContext = false)
196    {
197        // Store the current text for tool usage
198        $this->currentText = $text;
199
200        // Load previous report from metadata if specified
201        $previousText = '';
202        if (!empty($metadata['previous_report_page'])) {
203            $previousText = $this->getPageContent($metadata['previous_report_page']);
204            if ($previousText === false) {
205                $previousText = '';
206            }
207        }
208
209        // Extract dates for placeholders
210        $currentDate = $this->getPageDate();
211        $previousDate = !empty($metadata['previous_report_page']) ?
212                        $this->getPageDate($metadata['previous_report_page']) :
213                        '';
214
215        $think = $this->think ? '/think' : '/no_think';
216        $prompt = $this->loadPrompt('compare', [
217            'text' => $text,
218            'previous_text' => $previousText,
219            'current_date' => $currentDate,
220            'previous_date' => $previousDate,
221            'think' => $think
222        ]);
223
224        return $this->callAPI('compare', $this->language, $prompt, $metadata, $useContext);
225    }
226
227    /**
228     * Process text with a custom user prompt
229     *
230     * Sends a custom prompt to the LLM along with the provided text.
231     *
232     * @param string $text The text to process
233     * @param string $customPrompt The custom prompt to use
234     * @param array $metadata Optional metadata containing template and examples
235     * @param bool $useContext Whether to include template and examples in the context (default: true)
236     * @return string The processed text
237     */
238    public function processCustomPrompt($text, $metadata = [], $useContext = true)
239    {
240        // Store the current text for tool usage
241        $this->currentText = $text;
242
243        // Format the prompt with the text and custom prompt
244        $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text;
245
246        return $this->callAPI('custom', $this->language, $prompt, $metadata, $useContext);
247    }
248
249    /**
250     * Get the list of available tools for the LLM
251     *
252     * Defines the tools that can be used by the LLM during processing.
253     *
254     * @return array List of tool definitions
255     */
256    private function getAvailableTools()
257    {
258        return [
259            [
260                'type' => 'function',
261                'function' => [
262                    'name' => 'get_document',
263                    'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.',
264                    'parameters' => [
265                        'type' => 'object',
266                        'properties' => [
267                            'id' => [
268                                'type' => 'string',
269                                'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.'
270                            ]
271                        ],
272                        'required' => ['id']
273                    ]
274                ]
275            ],
276            [
277                'type' => 'function',
278                'function' => [
279                    'name' => 'get_template',
280                    'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.',
281                    'parameters' => [
282                        'type' => 'object',
283                        'properties' => [
284                            'language' => [
285                                'type' => 'string',
286                                'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).',
287                                'default' => 'ro'
288                            ]
289                        ]
290                    ]
291                ]
292            ],
293            [
294                'type' => 'function',
295                'function' => [
296                    'name' => 'get_examples',
297                    'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.',
298                    'parameters' => [
299                        'type' => 'object',
300                        'properties' => [
301                            'count' => [
302                                'type' => 'integer',
303                                'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.',
304                                'default' => 5
305                            ]
306                        ]
307                    ]
308                ]
309            ]
310        ];
311    }
312
313    /**
314     * Call the LLM API with the specified prompt
315     *
316     * Makes an HTTP POST request to the configured API endpoint with
317     * the prompt and other parameters. Handles authentication if an
318     * API key is configured.
319     *
320     * The method constructs a conversation with system and user messages,
321     * including context information from metadata when available.
322     *
323     * Complex logic includes:
324     * 1. Loading and enhancing the system prompt with metadata context
325     * 2. Building the API request with model parameters
326     * 3. Handling authentication with API key if configured
327     * 4. Making the HTTP request with proper error handling
328     * 5. Parsing and validating the API response
329     * 6. Supporting tool usage with automatic tool calling when enabled
330     * 7. Implementing context enhancement with templates, examples, and snippets
331     *
332     * The context information includes:
333     * - Template content: Used as a starting point for the response
334     * - Example pages: Full content of specified example pages
335     * - Text snippets: Relevant text examples from ChromaDB
336     *
337     * When tools are enabled, the method supports automatic tool calling:
338     * - Tools can retrieve documents, templates, and examples as needed
339     * - Tool responses are cached to avoid duplicate calls with identical parameters
340     * - Infinite loop protection prevents excessive tool calls
341     *
342     * @param string $command The command name for loading command-specific system prompts
343     * @param string $prompt The prompt to send to the LLM as user message
344     * @param array $metadata Optional metadata containing template, examples, and snippets
345     * @param bool $useContext Whether to include template and examples in the context (default: true)
346     * @return string The response content from the LLM
347     * @throws Exception If the API request fails or returns unexpected format
348     */
349
350    private function callAPI($command, $language, $prompt, $metadata = [], $useContext = true, $useTools = false)
351    {
352        // Load system prompt which provides general instructions to the LLM
353        $systemPrompt = $this->loadSystemPrompt($command, $language, []);
354
355        // Enhance the prompt with context information from metadata
356        // This provides the LLM with additional context about templates and examples
357        if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) {
358            $contextInfo = "\n\n<context>\n";
359
360            // Add template content if specified in metadata
361            if (!empty($metadata['template'])) {
362                $templateContent = $this->getPageContent($metadata['template']);
363                if ($templateContent !== false) {
364                    $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n";
365                }
366            }
367
368            // Add example pages content if specified in metadata
369            if (!empty($metadata['examples'])) {
370                $examplesContent = [];
371                foreach ($metadata['examples'] as $example) {
372                    $content = $this->getPageContent($example);
373                    if ($content !== false) {
374                        $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n";
375                    }
376                }
377                if (!empty($examplesContent)) {
378                    $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n";
379                }
380            }
381
382            // Add text snippets if specified in metadata
383            if (!empty($metadata['snippets'])) {
384                $snippetsContent = [];
385                foreach ($metadata['snippets'] as $index => $snippet) {
386                    // These are text snippets from ChromaDB
387                    $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n";
388                }
389                if (!empty($snippetsContent)) {
390                    $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n";
391                }
392            }
393
394            $contextInfo .= "\n</context>\n";
395
396            // Append context information to system prompt
397            $prompt = $contextInfo . "\n\n" . $prompt;
398        }
399
400        // Prepare API request data with model parameters
401        $data = [
402            'model' => $this->model,
403            'messages' => [
404                ['role' => 'system', 'content' => $systemPrompt],
405                ['role' => 'user', 'content' => $prompt]
406            ],
407            'max_tokens' => 6144,
408            'stream' => false,
409            'keep_alive' => '30m',
410            'think' => true
411        ];
412
413        // Add tools to the request only if useTools is true
414        if ($useTools) {
415            // Define available tools
416            $data['tools'] = $this->getAvailableTools();
417            $data['tool_choice'] = 'auto';
418            $data['parallel_tool_calls'] = false;
419        }
420
421        // Only add parameters if they are defined and not null
422        if ($this->temperature !== null) {
423            $data['temperature'] = $this->temperature;
424        }
425        if ($this->top_p !== null) {
426            $data['top_p'] = $this->top_p;
427        }
428        if ($this->top_k !== null) {
429            $data['top_k'] = $this->top_k;
430        }
431        if ($this->min_p !== null) {
432            $data['min_p'] = $this->min_p;
433        }
434
435        // Make an API call with tool responses
436        return $this->callAPIWithTools($data, false);
437    }
438
439    /**
440     * Handle tool calls from the LLM
441     *
442     * Processes tool calls made by the LLM and returns appropriate responses.
443     * Implements caching to avoid duplicate calls with identical parameters.
444     *
445     * @param array $toolCall The tool call data from the LLM
446     * @return array The tool response message
447     */
448    private function handleToolCall($toolCall)
449    {
450        $toolName = $toolCall['function']['name'];
451        $arguments = json_decode($toolCall['function']['arguments'], true);
452
453        // Create a cache key from the tool name and arguments
454        $cacheKey = md5($toolName . serialize($arguments));
455
456        // Check if we have a cached result for this tool call
457        if (isset($this->toolCallCache[$cacheKey])) {
458            // Return cached result and indicate it was found in cache
459            $toolResponse = $this->toolCallCache[$cacheKey];
460            // Update with current tool call ID
461            $toolResponse['tool_call_id'] = $toolCall['id'];
462            $toolResponse['cached'] = true; // Indicate this response was cached
463            return $toolResponse;
464        }
465
466        $toolResponse = [
467            'role' => 'tool',
468            'tool_call_id' => $toolCall['id'],
469            'cached' => false // Indicate this is a fresh response
470        ];
471
472        switch ($toolName) {
473            case 'get_document':
474                $documentId = $arguments['id'];
475                $content = $this->getPageContent($documentId);
476                if ($content === false) {
477                    $toolResponse['content'] = 'Document not found: ' . $documentId;
478                } else {
479                    $toolResponse['content'] = $content;
480                }
481                break;
482
483            case 'get_template':
484                // Get template content using the convenience function
485                $toolResponse['content'] = $this->getTemplateContent();
486                break;
487
488            case 'get_examples':
489                // Get examples content using the convenience function
490                $count = isset($arguments['count']) ? (int)$arguments['count'] : 5;
491                $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>';
492                break;
493
494            default:
495                $toolResponse['content'] = 'Unknown tool: ' . $toolName;
496        }
497
498        // Cache the result for future calls with the same parameters
499        $cacheEntry = $toolResponse;
500        // Remove tool_call_id and cached flag from cache as they change per call
501        unset($cacheEntry['tool_call_id']);
502        unset($cacheEntry['cached']);
503        $this->toolCallCache[$cacheKey] = $cacheEntry;
504
505        return $toolResponse;
506    }
507
508    /**
509     * Make an API call with tool responses
510     *
511     * Sends a follow-up request to the LLM with tool responses.
512     * Implements complex logic for handling tool calls with caching and loop protection.
513     *
514     * Complex logic includes:
515     * 1. Making HTTP requests with proper authentication and error handling
516     * 2. Processing tool calls from the LLM response
517     * 3. Caching tool responses to avoid duplicate calls with identical parameters
518     * 4. Tracking tool call counts to prevent infinite loops
519     * 5. Implementing loop protection with call count limits
520     * 6. Handling recursive tool calls until final content is generated
521     *
522     * Loop protection works by:
523     * - Tracking individual tool call counts (max 3 per tool)
524     * - Tracking total tool calls (max 10 total)
525     * - Disabling tools when limits are exceeded to break potential loops
526     *
527     * @param array $data The API request data including messages with tool responses
528     * @param bool $toolsCalled Whether tools have already been called (used for loop protection)
529     * @param bool $useTools Whether to process tool calls (used for loop protection)
530     * @return string The final response content
531     */
532    private function callAPIWithTools($data, $toolsCalled = false, $useTools = true)
533    {
534        // Set up HTTP headers, including authentication if API key is configured
535        $headers = [
536            'Content-Type: application/json'
537        ];
538
539        if (!empty($this->api_key)) {
540            $headers[] = 'Authorization: Bearer ' . $this->api_key;
541        }
542
543       // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops
544        if ($toolsCalled) {
545            unset($data['tools']);
546            unset($data['tool_choice']);
547        }
548
549        // Initialize and configure cURL for the API request
550        $ch = curl_init();
551        curl_setopt($ch, CURLOPT_URL, $this->api_url);
552        curl_setopt($ch, CURLOPT_POST, true);
553        curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
554        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
555        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
556        curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
557        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
558
559        // Execute the API request
560        $response = curl_exec($ch);
561        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
562        $error = curl_error($ch);
563        curl_close($ch);
564
565        // Handle cURL errors
566        if ($error) {
567            throw new Exception('API request failed: ' . $error);
568        }
569
570        // Handle HTTP errors
571        if ($httpCode !== 200) {
572            throw new Exception('API request failed with HTTP code: ' . $httpCode);
573        }
574
575        // Parse and validate the JSON response
576        $result = json_decode($response, true);
577
578        // Extract the content from the response if available
579        if (isset($result['choices'][0]['message']['content'])) {
580            $content = trim($result['choices'][0]['message']['content']);
581            // Reset tool call counts when we get final content
582            $this->toolCallCounts = [];
583            return $content;
584        }
585
586        // Handle tool calls if present
587        if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) {
588            $toolCalls = $result['choices'][0]['message']['tool_calls'];
589            // Start with original messages
590            $messages = $data['messages'];
591            // Add assistant's message with tool calls, keeping all original fields except for content (which is null)
592            $assistantMessage = [];
593            foreach ($result['choices'][0]['message'] as $key => $value) {
594                if ($key !== 'content') {
595                    $assistantMessage[$key] = $value;
596                }
597            }
598            // Add assistant's message with tool calls
599            $messages[] = $assistantMessage;
600
601            // Process each tool call and track counts to prevent infinite loops
602            foreach ($toolCalls as $toolCall) {
603                $toolName = $toolCall['function']['name'];
604                // Increment tool call count
605                if (!isset($this->toolCallCounts[$toolName])) {
606                    $this->toolCallCounts[$toolName] = 0;
607                }
608                $this->toolCallCounts[$toolName]++;
609
610                $toolResponse = $this->handleToolCall($toolCall);
611                $messages[] = $toolResponse;
612            }
613
614            // Check if any tool has been called more than 3 times
615            $toolsCalledCount = 0;
616            foreach ($this->toolCallCounts as $count) {
617                if ($count > 3) {
618                    // If any tool called more than 3 times, disable tools to break loop
619                    $toolsCalled = true;
620                    break;
621                }
622                $toolsCalledCount += $count;
623            }
624
625            // If total tool calls exceed 10, also disable tools
626            if ($toolsCalledCount > 10) {
627                $toolsCalled = true;
628            }
629
630            // Make another API call with tool responses
631            $data['messages'] = $messages;
632            return $this->callAPIWithTools($data, $toolsCalled, $useTools);
633        }
634
635        // Throw exception for unexpected response format
636        throw new Exception('Unexpected API response format');
637    }
638
639    /**
640     * Load a prompt template from a DokuWiki page and replace placeholders
641     *
642     * Loads prompt templates from DokuWiki pages with IDs in the format
643     * dokullm:prompts:LANGUAGE:PROMPT_NAME
644     *
645     * The method implements a language fallback mechanism:
646     * 1. First tries to load the prompt in the configured language
647     * 2. If not found, falls back to English prompts
648     * 3. Throws an exception if neither is available
649     *
650     * After loading the prompt, it scans for placeholders and automatically
651     * adds missing ones with appropriate values before replacing all placeholders.
652     *
653     * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite')
654     * @param array $variables Associative array of placeholder => value pairs
655     * @return string The processed prompt with placeholders replaced
656     * @throws Exception If the prompt page cannot be loaded in any language
657     */
658    private function loadPrompt($promptName, $language, $variables = [])
659    {
660        // Default to 'en' if language is 'default' or not set
661        if ($language === 'default' || empty($language)) {
662            $language = 'en';
663        }
664
665        // Construct the page ID for the prompt in the configured language
666        $promptPageId = 'dokullm:prompts:' . $language . ':' . $promptName;
667
668        // Try to get the content of the prompt page in the configured language
669        $prompt = $this->getPageContent($promptPageId);
670
671        // If the language-specific prompt doesn't exist, try English as fallback
672        if ($prompt === false && $language !== 'en') {
673            $promptPageId = 'dokullm:prompts:en:' . $promptName;
674            $prompt = $this->getPageContent($promptPageId);
675        }
676
677        // If still no prompt found, throw an exception
678        if ($prompt === false) {
679            throw new Exception('Prompt page not found: ' . $promptPageId);
680        }
681
682        // Find placeholders in the prompt
683        $placeholders = $this->findPlaceholders($prompt);
684
685        // Add missing placeholders with appropriate values
686        foreach ($placeholders as $placeholder) {
687            // Skip if already provided in variables
688            if (isset($variables[$placeholder])) {
689                continue;
690            }
691
692            // Add appropriate values for specific placeholders
693            switch ($placeholder) {
694                case 'template':
695                    // If we have a page_template in variables, use it
696                    $variables[$placeholder] = $this->getTemplateContent($variables['page_template']);
697                    break;
698
699                case 'snippets':
700                    $variables[$placeholder] = $this->getSnippets(10);
701                    break;
702
703                case 'examples':
704                    // If we have example page IDs in metadata, add examples content
705                    $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']);
706                    break;
707
708                case 'previous':
709                    // If we have a previous report page ID in metadata, add previous content
710                    $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']);
711
712                    // Add current and previous dates to metadata
713                    $variables['current_date'] = $this->getPageDate();
714                    $variables['previous_date'] = !empty($variables['page_previous']) ?
715                                                $this->getPageDate($variables['page_previous']) :
716                                                '';
717                    break;
718
719                default:
720                    // For other placeholders, leave them empty or set a default value
721                    $variables[$placeholder] = '';
722                    break;
723            }
724        }
725
726        // Replace placeholders with actual values
727        // Placeholders are in the format {placeholder_name}
728        foreach ($variables as $placeholder => $value) {
729            $prompt = str_replace('{' . $placeholder . '}', $value, $prompt);
730        }
731
732        // Return the processed prompt
733        return $prompt;
734    }
735
736    /**
737     * Load system prompt with optional command-specific appendage
738     *
739     * Loads the main system prompt and appends any command-specific system prompt
740     * if available.
741     *
742     * @param string $action The action/command name
743     * @param array $variables Associative array of placeholder => value pairs
744     * @return string The combined system prompt
745     */
746    private function loadSystemPrompt($action, $language, $variables = [])
747    {
748        // Load system prompt which provides general instructions to the LLM
749        $systemPrompt = $this->loadPrompt('system', $language, $variables);
750
751        // Check if there's a command-specific system prompt appendage
752        if (!empty($action)) {
753            try {
754                $commandSystemPrompt = $this->loadPrompt($action . ':system', $language, $variables);
755                if ($commandSystemPrompt !== false) {
756                    $systemPrompt .= "\n" . $commandSystemPrompt;
757                }
758            } catch (Exception $e) {
759                // Ignore exceptions when loading command-specific system prompt
760                // This allows the main system prompt to still be used
761            }
762        }
763
764        return $systemPrompt;
765    }
766
767    /**
768     * Get the content of a DokuWiki page
769     *
770     * Retrieves the raw content of a DokuWiki page by its ID.
771     * Used for loading template and example page content for context.
772     *
773     * @param string $pageId The page ID to retrieve
774     * @return string|false The page content or false if not found/readable
775     */
776    public function getPageContent($pageId)
777    {
778        // Convert page ID to file path
779        $pageFile = wikiFN($pageId);
780
781        // Check if file exists and is readable
782        if (file_exists($pageFile) && is_readable($pageFile)) {
783            return file_get_contents($pageFile);
784        }
785
786        return false;
787    }
788
789    /**
790     * Extract date from page ID or file timestamp
791     *
792     * Attempts to extract a date in YYmmdd format from the page ID.
793     * If not found, uses the file's last modification timestamp.
794     *
795     * @param string $pageId Optional page ID to extract date from (defaults to current page)
796     * @return string Formatted date string (YYYY-MM-DD)
797     */
798    private function getPageDate($pageId = null)
799    {
800        global $ID;
801
802        // Use provided page ID or current page ID
803        $targetPageId = $pageId ?: $ID;
804
805        // Try to extract date from page ID (looking for YYmmdd pattern)
806        if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) {
807            // Convert YYmmdd to YYYY-MM-DD
808            $year = $matches[1];
809            $month = $matches[2];
810            $day = $matches[3];
811
812            // Assume 20xx for years 00-69, 19xx for years 70-99
813            $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year;
814
815            return $fullYear . '-' . $month . '-' . $day;
816        }
817
818        // Fallback to file timestamp
819        $pageFile = wikiFN($targetPageId);
820        if (file_exists($pageFile)) {
821            $timestamp = filemtime($pageFile);
822            return date('Y-m-d', $timestamp);
823        }
824
825        // Return empty string if no date can be determined
826        return '';
827    }
828
829    /**
830     * Get current text
831     *
832     * Retrieves the current text stored from the process function.
833     *
834     * @return string The current text
835     */
836    private function getCurrentText()
837    {
838        return $this->currentText;
839    }
840
841    /**
842     * Scan text for placeholders
843     *
844     * Finds all placeholders in the format {placeholder_name} in the provided text
845     * and returns an array of unique placeholder names.
846     *
847     * @param string $text The text to scan for placeholders
848     * @return array List of unique placeholder names found in the text
849     */
850    public function findPlaceholders($text)
851    {
852        $placeholders = [];
853        $pattern = '/\{([^}]+)\}/';
854
855        if (preg_match_all($pattern, $text, $matches)) {
856            // Get unique placeholder names
857            $placeholders = array_unique($matches[1]);
858        }
859
860        return $placeholders;
861    }
862
863    /**
864     * Get template content for the current text
865     *
866     * Convenience function to retrieve template content. If a pageId is provided,
867     * retrieves content directly from that page. Otherwise, queries ChromaDB for
868     * a relevant template based on the current text.
869     *
870     * @param string|null $pageId Optional page ID to retrieve template from directly
871     * @return string The template content or empty string if not found
872     */
873    private function getTemplateContent($pageId = null)
874    {
875        // If pageId is provided, use it directly
876        if ($pageId !== null) {
877            $templateContent = $this->getPageContent($pageId);
878            if ($templateContent !== false) {
879                return $templateContent;
880            }
881        }
882
883        // Otherwise, get template suggestion for the current text
884        $pageId = $this->queryChromaDBTemplate($this->getCurrentText());
885        if (!empty($pageId)) {
886            $templateContent = $this->getPageContent($pageId[0]);
887            if ($templateContent !== false) {
888                return $templateContent;
889            }
890        }
891        return '( no template )';
892    }
893
894    /**
895     * Get snippets content for the current text
896     *
897     * Convenience function to retrieve relevant snippets for the current text.
898     * Queries ChromaDB for relevant snippets and returns them formatted.
899     *
900     * @param int $count Number of snippets to retrieve (default: 10)
901     * @return string Formatted snippets content or empty string if not found
902     */
903    private function getSnippets($count = 10)
904    {
905        // Get example snippets for the current text
906        $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count);
907        if (!empty($snippets)) {
908            $formattedSnippets = [];
909            foreach ($snippets as $index => $snippet) {
910                $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>';
911            }
912            return implode("\n", $formattedSnippets);
913        }
914        return '( no examples )';
915    }
916
917    /**
918     * Get examples content from example page IDs
919     *
920     * Convenience function to retrieve content from example pages.
921     * Returns the content of each page packed in XML elements.
922     *
923     * @param array $exampleIds List of example page IDs
924     * @return string Formatted examples content or empty string if not found
925     */
926    private function getExamplesContent($exampleIds = [])
927    {
928        if (empty($exampleIds) || !is_array($exampleIds)) {
929            return '( no examples )';
930        }
931
932        $examplesContent = [];
933        foreach ($exampleIds as $index => $exampleId) {
934            $content = $this->getPageContent($exampleId);
935            if ($content !== false) {
936                $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>';
937            }
938        }
939
940        return implode("\n", $examplesContent);
941    }
942
943    /**
944     * Get previous report content from previous page ID
945     *
946     * Convenience function to retrieve content from a previous report page.
947     * Returns the content of the previous page or a default message if not found.
948     *
949     * @param string $previousId Previous page ID
950     * @return string Previous report content or default message if not found
951     */
952    private function getPreviousContent($previousId = '')
953    {
954        if (empty($previousId)) {
955            return '( no previous report )';
956        }
957
958        $content = $this->getPageContent($previousId);
959        if ($content !== false) {
960            return $content;
961        }
962
963        return '( previous report not found )';
964    }
965
966    /**
967     * Get ChromaDB client with configuration
968     *
969     * Creates and returns a ChromaDB client with the appropriate configuration.
970     * Extracts modality from the current page ID to use as the collection name.
971     *
972     * @return array Array containing the ChromaDB client and collection name
973     */
974    private function getChromaDBClient()
975    {
976        // Get ChromaDB configuration from DokuWiki plugin configuration
977        $chromaHost = $this->getConf('chroma_host', 'localhost');
978        $chromaPort = $this->getConf('chroma_port', 8000);
979        $chromaTenant = $this->getConf('chroma_tenant', 'dokullm');
980        $chromaDatabase = $this->getConf('chroma_database', 'dokullm');
981        $chromaDefaultCollection = $this->getConf('chroma_collection', 'documents');
982        $ollamaHost = $this->getConf('ollama_host', 'localhost');
983        $ollamaPort = $this->getConf('ollama_port', 11434);
984        $ollamaModel = $this->getConf('ollama_embeddings_model', 'nomic-embed-text');
985
986        // Use the first part of the current page ID as collection name, fallback to default
987        global $ID;
988        $chromaCollection = $chromaDefaultCollection; // Default collection name
989
990        if (!empty($ID)) {
991            // Split the page ID by ':' and take the first part as collection name
992            $parts = explode(':', $ID);
993            if (isset($parts[0]) && !empty($parts[0])) {
994                // If the first part is 'playground', use the default collection
995                // Otherwise, use the first part as the collection name
996                if ($parts[0] === 'playground') {
997                    $chromaCollection = $chromaDefaultCollection;
998                } else {
999                    $chromaCollection = $parts[0];
1000                }
1001            }
1002        }
1003
1004        // Create ChromaDB client with all required parameters
1005        $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient(
1006            $chromaHost,
1007            $chromaPort,
1008            $chromaTenant,
1009            $chromaDatabase,
1010            $ollamaHost,
1011            $ollamaPort,
1012            $ollamaModel
1013        );
1014
1015
1016        return [$chromaClient, $chromaCollection];
1017    }
1018
1019    /**
1020     * Query ChromaDB for relevant documents
1021     *
1022     * Generates embeddings for the input text and queries ChromaDB for similar documents.
1023     * Extracts modality from the current page ID to use as the collection name.
1024     *
1025     * @param string $text The text to find similar documents for
1026     * @param int $limit Maximum number of documents to retrieve (default: 5)
1027     * @param array|null $where Optional filter conditions for metadata
1028     * @return array List of document IDs
1029     */
1030    private function queryChromaDB($text, $limit = 5, $where = null)
1031    {
1032        try {
1033            // Get ChromaDB client and collection name
1034            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
1035            // Query for similar documents
1036            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
1037
1038            // Extract document IDs from results
1039            $documentIds = [];
1040            if (isset($results['ids'][0]) && is_array($results['ids'][0])) {
1041                foreach ($results['ids'][0] as $id) {
1042                    // Use the ChromaDB ID directly without conversion
1043                    $documentIds[] = $id;
1044                }
1045            }
1046
1047            return $documentIds;
1048        } catch (Exception $e) {
1049            // Log error but don't fail the operation
1050            error_log('ChromaDB query failed: ' . $e->getMessage());
1051            return [];
1052        }
1053    }
1054
1055    /**
1056     * Query ChromaDB for relevant documents and return text snippets
1057     *
1058     * Generates embeddings for the input text and queries ChromaDB for similar documents.
1059     * Returns the actual text snippets instead of document IDs.
1060     *
1061     * @param string $text The text to find similar documents for
1062     * @param int $limit Maximum number of documents to retrieve (default: 10)
1063     * @param array|null $where Optional filter conditions for metadata
1064     * @return array List of text snippets
1065     */
1066    private function queryChromaDBSnippets($text, $limit = 10, $where = null)
1067    {
1068        try {
1069            // Get ChromaDB client and collection name
1070            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
1071            // Query for similar documents
1072            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
1073
1074            // Extract document texts from results
1075            $snippets = [];
1076            if (isset($results['documents'][0]) && is_array($results['documents'][0])) {
1077                foreach ($results['documents'][0] as $document) {
1078                    $snippets[] = $document;
1079                }
1080            }
1081
1082            return $snippets;
1083        } catch (Exception $e) {
1084            // Log error but don't fail the operation
1085            error_log('ChromaDB query failed: ' . $e->getMessage());
1086            return [];
1087        }
1088    }
1089
1090    /**
1091     * Query ChromaDB for a template document
1092     *
1093     * Generates embeddings for the input text and queries ChromaDB for a template document
1094     * by filtering with metadata 'template=true'.
1095     *
1096     * @param string $text The text to find a template for
1097     * @return array List of template document IDs (maximum 1)
1098     */
1099    public function queryChromaDBTemplate($text)
1100    {
1101        $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']);
1102
1103        // Remove chunk number (e.g., "@2") from the ID to get the base document ID
1104        if (!empty($templateIds)) {
1105            $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]);
1106        }
1107
1108        return $templateIds;
1109    }
1110
1111}
1112