xref: /plugin/dokullm/LlmClient.php (revision 15cead4bb0560e870c60c084d307ef2a2b780008)
1<?php
2namespace dokuwiki\plugin\dokullm;
3
4/**
5 * LLM Client for the dokullm plugin
6 *
7 * This class provides methods to interact with an LLM API for various
8 * text processing tasks such as completion, rewriting, grammar correction,
9 * summarization, conclusion creation, text analysis, and custom prompts.
10 *
11 * The client handles:
12 * - API configuration and authentication
13 * - Prompt template loading and processing
14 * - Context-aware requests with metadata
15 * - DokuWiki page content retrieval
16 */
17
18// must be run within Dokuwiki
19if (!defined('DOKU_INC')) {
20    die();
21}
22
23/**
24 * LLM Client class for handling API communications
25 *
26 * Manages configuration settings and provides methods for various
27 * text processing operations through an LLM API.
28 * Implements caching for tool calls to avoid duplicate processing.
29 */
30class LlmClient
31{
32    /** @var string The API endpoint URL */
33    private $api_url;
34
35    /** @var array Cache for tool call results */
36    private $toolCallCache = [];
37
38    /** @var string Current text for tool usage */
39    private $currentText = '';
40
41    /** @var array Track tool call counts to prevent infinite loops */
42    private $toolCallCounts = [];
43
44    /** @var string The API authentication key */
45    private $api_key;
46
47    /** @var string The model identifier to use */
48    private $model;
49
50    /** @var int The request timeout in seconds */
51    private $timeout;
52
53    /** @var float The temperature setting for response randomness */
54    private $temperature;
55
56    /** @var float The top-p setting for nucleus sampling */
57    private $top_p;
58
59    /** @var int The top-k setting for token selection */
60    private $top_k;
61
62    /** @var float The min-p setting for minimum probability threshold */
63    private $min_p;
64
65    /** @var bool Whether to enable thinking in the LLM responses */
66    private $think;
67
68    /**
69     * Initialize the LLM client with configuration settings
70     *
71     * Retrieves configuration values from DokuWiki's configuration system
72     * for API URL, key, model, timeout, and LLM sampling parameters.
73     *
74     * Configuration values:
75     * - api_url: The LLM API endpoint URL
76     * - api_key: Authentication key for the API (optional)
77     * - model: The model identifier to use for requests
78     * - timeout: Request timeout in seconds
79     * - language: Language code for prompt templates
80     * - temperature: Temperature setting for response randomness (0.0-1.0)
81     * - top_p: Top-p (nucleus sampling) setting (0.0-1.0)
82     * - top_k: Top-k setting (integer >= 1)
83     * - min_p: Minimum probability threshold (0.0-1.0)
84     * - think: Whether to enable thinking in LLM responses (boolean)
85     */
86    public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null)
87    {
88        $this->api_url = $api_url;
89        $this->api_key = $api_key;
90        $this->model = $model;
91        $this->timeout = $timeout;
92        $this->temperature = $temperature;
93        $this->top_p = $top_p;
94        $this->top_k = $top_k;
95        $this->min_p = $min_p;
96        $this->think = $think;
97    }
98
99
100
101    public function process($action, $language, $text, $metadata = [], $useContext = true)
102    {
103        // Store the current text for tool usage
104        $this->currentText = $text;
105
106        // Add text, think and action to metadata
107        $metadata['text'] = $text;
108        $metadata['think'] = $this->think ? '/think' : '/no_think';
109        $metadata['action'] = $action;
110
111        // If we have 'template' in metadata, move it to 'page_template'
112        if (isset($metadata['template'])) {
113            $metadata['page_template'] = $metadata['template'];
114            unset($metadata['template']);
115        }
116
117        // If we have 'examples' in metadata, move it to 'page_examples'
118        if (isset($metadata['examples'])) {
119            $metadata['page_examples'] = $metadata['examples'];
120            unset($metadata['examples']);
121        }
122
123        // If we have 'previous' in metadata, move it to 'page_previous'
124        if (isset($metadata['previous'])) {
125            $metadata['page_previous'] = $metadata['previous'];
126            unset($metadata['previous']);
127        }
128
129        $prompt = $this->loadPrompt($action, $language, $metadata);
130
131        return $this->callAPI($action, $language, $prompt, $metadata, $useContext);
132    }
133
134
135
136    /**
137     * Create the provided text using the LLM
138     *
139     * Sends a prompt to the LLM asking it to create the given text.
140     * First queries ChromaDB for relevant documents to include as examples.
141     * If no template is defined, queries ChromaDB for a template.
142     *
143     * @param string $text The text to create
144     * @param array $metadata Optional metadata containing template, examples, and snippets
145     * @param bool $useContext Whether to include template and examples in the context (default: true)
146     * @return string The created text
147     */
148    public function createReport($text, $metadata = [], $useContext = true, $useTools = false)
149    {
150        // Store the current text for tool usage
151        $this->currentText = $text;
152
153        // Only try to find template and add snippets if tools are not enabled
154        // When tools are enabled, the LLM will call get_template and get_examples as needed
155        if (!$useTools) {
156            // If no template is defined, try to find one using ChromaDB
157            if (empty($metadata['template'])) {
158                $templateResult = $this->queryChromaDBTemplate($text);
159                if (!empty($templateResult)) {
160                    // Use the first result as template
161                    $metadata['template'] = $templateResult[0];
162                }
163            }
164
165            // Query ChromaDB for relevant documents to use as examples
166            $chromaResults = $this->queryChromaDBSnippets($text, 10);
167
168            // Add ChromaDB results to metadata as snippets
169            if (!empty($chromaResults)) {
170                // Merge with existing snippets
171                $metadata['snippets'] = array_merge(
172                    isset($metadata['snippets']) ? $metadata['snippets'] : [],
173                    $chromaResults
174                );
175            }
176        }
177
178        $think = $this->think ? '/think' : '/no_think';
179        $prompt = $this->loadPrompt('create', ['text' => $text, 'think' => $think]);
180
181        return $this->callAPI('create', $language, $prompt, $metadata, $useContext);
182    }
183
184    /**
185     * Compare two texts and highlight differences
186     *
187     * Sends a prompt to the LLM asking it to compare two texts and
188     * highlight their similarities and differences.
189     *
190     * @param string $text The current text to compare
191     * @param array $metadata Optional metadata containing template, examples, and previous report reference
192     * @return string The comparison results
193     */
194    public function compareText($text, $metadata = [], $useContext = false)
195    {
196        // Store the current text for tool usage
197        $this->currentText = $text;
198
199        // Load previous report from metadata if specified
200        $previousText = '';
201        if (!empty($metadata['previous_report_page'])) {
202            $previousText = $this->getPageContent($metadata['previous_report_page']);
203            if ($previousText === false) {
204                $previousText = '';
205            }
206        }
207
208        // Extract dates for placeholders
209        $currentDate = $this->getPageDate();
210        $previousDate = !empty($metadata['previous_report_page']) ?
211                        $this->getPageDate($metadata['previous_report_page']) :
212                        '';
213
214        $think = $this->think ? '/think' : '/no_think';
215        $prompt = $this->loadPrompt('compare', [
216            'text' => $text,
217            'previous_text' => $previousText,
218            'current_date' => $currentDate,
219            'previous_date' => $previousDate,
220            'think' => $think
221        ]);
222
223        return $this->callAPI('compare', $language, $prompt, $metadata, $useContext);
224    }
225
226    /**
227     * Process text with a custom user prompt
228     *
229     * Sends a custom prompt to the LLM along with the provided text.
230     *
231     * @param string $text The text to process
232     * @param string $customPrompt The custom prompt to use
233     * @param array $metadata Optional metadata containing template and examples
234     * @param bool $useContext Whether to include template and examples in the context (default: true)
235     * @return string The processed text
236     */
237    public function processCustomPrompt($text, $metadata = [], $useContext = true)
238    {
239        // Store the current text for tool usage
240        $this->currentText = $text;
241
242        // Format the prompt with the text and custom prompt
243        $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text;
244
245        return $this->callAPI('custom', $language, $prompt, $metadata, $useContext);
246    }
247
248    /**
249     * Get the list of available tools for the LLM
250     *
251     * Defines the tools that can be used by the LLM during processing.
252     *
253     * @return array List of tool definitions
254     */
255    private function getAvailableTools()
256    {
257        return [
258            [
259                'type' => 'function',
260                'function' => [
261                    'name' => 'get_document',
262                    'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.',
263                    'parameters' => [
264                        'type' => 'object',
265                        'properties' => [
266                            'id' => [
267                                'type' => 'string',
268                                'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.'
269                            ]
270                        ],
271                        'required' => ['id']
272                    ]
273                ]
274            ],
275            [
276                'type' => 'function',
277                'function' => [
278                    'name' => 'get_template',
279                    'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.',
280                    'parameters' => [
281                        'type' => 'object',
282                        'properties' => [
283                            'language' => [
284                                'type' => 'string',
285                                'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).',
286                                'default' => 'ro'
287                            ]
288                        ]
289                    ]
290                ]
291            ],
292            [
293                'type' => 'function',
294                'function' => [
295                    'name' => 'get_examples',
296                    'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.',
297                    'parameters' => [
298                        'type' => 'object',
299                        'properties' => [
300                            'count' => [
301                                'type' => 'integer',
302                                'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.',
303                                'default' => 5
304                            ]
305                        ]
306                    ]
307                ]
308            ]
309        ];
310    }
311
312    /**
313     * Call the LLM API with the specified prompt
314     *
315     * Makes an HTTP POST request to the configured API endpoint with
316     * the prompt and other parameters. Handles authentication if an
317     * API key is configured.
318     *
319     * The method constructs a conversation with system and user messages,
320     * including context information from metadata when available.
321     *
322     * Complex logic includes:
323     * 1. Loading and enhancing the system prompt with metadata context
324     * 2. Building the API request with model parameters
325     * 3. Handling authentication with API key if configured
326     * 4. Making the HTTP request with proper error handling
327     * 5. Parsing and validating the API response
328     * 6. Supporting tool usage with automatic tool calling when enabled
329     * 7. Implementing context enhancement with templates, examples, and snippets
330     *
331     * The context information includes:
332     * - Template content: Used as a starting point for the response
333     * - Example pages: Full content of specified example pages
334     * - Text snippets: Relevant text examples from ChromaDB
335     *
336     * When tools are enabled, the method supports automatic tool calling:
337     * - Tools can retrieve documents, templates, and examples as needed
338     * - Tool responses are cached to avoid duplicate calls with identical parameters
339     * - Infinite loop protection prevents excessive tool calls
340     *
341     * @param string $command The command name for loading command-specific system prompts
342     * @param string $prompt The prompt to send to the LLM as user message
343     * @param array $metadata Optional metadata containing template, examples, and snippets
344     * @param bool $useContext Whether to include template and examples in the context (default: true)
345     * @return string The response content from the LLM
346     * @throws Exception If the API request fails or returns unexpected format
347     */
348
349    private function callAPI($command, $language, $prompt, $metadata = [], $useContext = true, $useTools = false)
350    {
351        // Load system prompt which provides general instructions to the LLM
352        $systemPrompt = $this->loadSystemPrompt($command, $language, []);
353
354        // Enhance the prompt with context information from metadata
355        // This provides the LLM with additional context about templates and examples
356        if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) {
357            $contextInfo = "\n\n<context>\n";
358
359            // Add template content if specified in metadata
360            if (!empty($metadata['template'])) {
361                $templateContent = $this->getPageContent($metadata['template']);
362                if ($templateContent !== false) {
363                    $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n";
364                }
365            }
366
367            // Add example pages content if specified in metadata
368            if (!empty($metadata['examples'])) {
369                $examplesContent = [];
370                foreach ($metadata['examples'] as $example) {
371                    $content = $this->getPageContent($example);
372                    if ($content !== false) {
373                        $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n";
374                    }
375                }
376                if (!empty($examplesContent)) {
377                    $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n";
378                }
379            }
380
381            // Add text snippets if specified in metadata
382            if (!empty($metadata['snippets'])) {
383                $snippetsContent = [];
384                foreach ($metadata['snippets'] as $index => $snippet) {
385                    // These are text snippets from ChromaDB
386                    $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n";
387                }
388                if (!empty($snippetsContent)) {
389                    $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n";
390                }
391            }
392
393            $contextInfo .= "\n</context>\n";
394
395            // Append context information to system prompt
396            $prompt = $contextInfo . "\n\n" . $prompt;
397        }
398
399        // Prepare API request data with model parameters
400        $data = [
401            'model' => $this->model,
402            'messages' => [
403                ['role' => 'system', 'content' => $systemPrompt],
404                ['role' => 'user', 'content' => $prompt]
405            ],
406            'max_tokens' => 6144,
407            'stream' => false,
408            'keep_alive' => '30m',
409            'think' => true
410        ];
411
412        // Add tools to the request only if useTools is true
413        if ($useTools) {
414            // Define available tools
415            $data['tools'] = $this->getAvailableTools();
416            $data['tool_choice'] = 'auto';
417            $data['parallel_tool_calls'] = false;
418        }
419
420        // Only add parameters if they are defined and not null
421        if ($this->temperature !== null) {
422            $data['temperature'] = $this->temperature;
423        }
424        if ($this->top_p !== null) {
425            $data['top_p'] = $this->top_p;
426        }
427        if ($this->top_k !== null) {
428            $data['top_k'] = $this->top_k;
429        }
430        if ($this->min_p !== null) {
431            $data['min_p'] = $this->min_p;
432        }
433
434        // Make an API call with tool responses
435        return $this->callAPIWithTools($data, false);
436    }
437
438    /**
439     * Handle tool calls from the LLM
440     *
441     * Processes tool calls made by the LLM and returns appropriate responses.
442     * Implements caching to avoid duplicate calls with identical parameters.
443     *
444     * @param array $toolCall The tool call data from the LLM
445     * @return array The tool response message
446     */
447    private function handleToolCall($toolCall)
448    {
449        $toolName = $toolCall['function']['name'];
450        $arguments = json_decode($toolCall['function']['arguments'], true);
451
452        // Create a cache key from the tool name and arguments
453        $cacheKey = md5($toolName . serialize($arguments));
454
455        // Check if we have a cached result for this tool call
456        if (isset($this->toolCallCache[$cacheKey])) {
457            // Return cached result and indicate it was found in cache
458            $toolResponse = $this->toolCallCache[$cacheKey];
459            // Update with current tool call ID
460            $toolResponse['tool_call_id'] = $toolCall['id'];
461            $toolResponse['cached'] = true; // Indicate this response was cached
462            return $toolResponse;
463        }
464
465        $toolResponse = [
466            'role' => 'tool',
467            'tool_call_id' => $toolCall['id'],
468            'cached' => false // Indicate this is a fresh response
469        ];
470
471        switch ($toolName) {
472            case 'get_document':
473                $documentId = $arguments['id'];
474                $content = $this->getPageContent($documentId);
475                if ($content === false) {
476                    $toolResponse['content'] = 'Document not found: ' . $documentId;
477                } else {
478                    $toolResponse['content'] = $content;
479                }
480                break;
481
482            case 'get_template':
483                // Get template content using the convenience function
484                $toolResponse['content'] = $this->getTemplateContent();
485                break;
486
487            case 'get_examples':
488                // Get examples content using the convenience function
489                $count = isset($arguments['count']) ? (int)$arguments['count'] : 5;
490                $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>';
491                break;
492
493            default:
494                $toolResponse['content'] = 'Unknown tool: ' . $toolName;
495        }
496
497        // Cache the result for future calls with the same parameters
498        $cacheEntry = $toolResponse;
499        // Remove tool_call_id and cached flag from cache as they change per call
500        unset($cacheEntry['tool_call_id']);
501        unset($cacheEntry['cached']);
502        $this->toolCallCache[$cacheKey] = $cacheEntry;
503
504        return $toolResponse;
505    }
506
507    /**
508     * Make an API call with tool responses
509     *
510     * Sends a follow-up request to the LLM with tool responses.
511     * Implements complex logic for handling tool calls with caching and loop protection.
512     *
513     * Complex logic includes:
514     * 1. Making HTTP requests with proper authentication and error handling
515     * 2. Processing tool calls from the LLM response
516     * 3. Caching tool responses to avoid duplicate calls with identical parameters
517     * 4. Tracking tool call counts to prevent infinite loops
518     * 5. Implementing loop protection with call count limits
519     * 6. Handling recursive tool calls until final content is generated
520     *
521     * Loop protection works by:
522     * - Tracking individual tool call counts (max 3 per tool)
523     * - Tracking total tool calls (max 10 total)
524     * - Disabling tools when limits are exceeded to break potential loops
525     *
526     * @param array $data The API request data including messages with tool responses
527     * @param bool $toolsCalled Whether tools have already been called (used for loop protection)
528     * @param bool $useTools Whether to process tool calls (used for loop protection)
529     * @return string The final response content
530     */
531    private function callAPIWithTools($data, $toolsCalled = false, $useTools = true)
532    {
533        // Set up HTTP headers, including authentication if API key is configured
534        $headers = [
535            'Content-Type: application/json'
536        ];
537
538        if (!empty($this->api_key)) {
539            $headers[] = 'Authorization: Bearer ' . $this->api_key;
540        }
541
542       // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops
543        if ($toolsCalled) {
544            unset($data['tools']);
545            unset($data['tool_choice']);
546        }
547
548        // Initialize and configure cURL for the API request
549        $ch = curl_init();
550        curl_setopt($ch, CURLOPT_URL, $this->api_url);
551        curl_setopt($ch, CURLOPT_POST, true);
552        curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
553        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
554        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
555        curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
556        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
557
558        // Execute the API request
559        $response = curl_exec($ch);
560        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
561        $error = curl_error($ch);
562        curl_close($ch);
563
564        // Handle cURL errors
565        if ($error) {
566            throw new Exception('API request failed: ' . $error);
567        }
568
569        // Handle HTTP errors
570        if ($httpCode !== 200) {
571            throw new Exception('API request failed with HTTP code: ' . $httpCode);
572        }
573
574        // Parse and validate the JSON response
575        $result = json_decode($response, true);
576
577        // Extract the content from the response if available
578        if (isset($result['choices'][0]['message']['content'])) {
579            $content = trim($result['choices'][0]['message']['content']);
580            // Reset tool call counts when we get final content
581            $this->toolCallCounts = [];
582            return $content;
583        }
584
585        // Handle tool calls if present
586        if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) {
587            $toolCalls = $result['choices'][0]['message']['tool_calls'];
588            // Start with original messages
589            $messages = $data['messages'];
590            // Add assistant's message with tool calls, keeping all original fields except for content (which is null)
591            $assistantMessage = [];
592            foreach ($result['choices'][0]['message'] as $key => $value) {
593                if ($key !== 'content') {
594                    $assistantMessage[$key] = $value;
595                }
596            }
597            // Add assistant's message with tool calls
598            $messages[] = $assistantMessage;
599
600            // Process each tool call and track counts to prevent infinite loops
601            foreach ($toolCalls as $toolCall) {
602                $toolName = $toolCall['function']['name'];
603                // Increment tool call count
604                if (!isset($this->toolCallCounts[$toolName])) {
605                    $this->toolCallCounts[$toolName] = 0;
606                }
607                $this->toolCallCounts[$toolName]++;
608
609                $toolResponse = $this->handleToolCall($toolCall);
610                $messages[] = $toolResponse;
611            }
612
613            // Check if any tool has been called more than 3 times
614            $toolsCalledCount = 0;
615            foreach ($this->toolCallCounts as $count) {
616                if ($count > 3) {
617                    // If any tool called more than 3 times, disable tools to break loop
618                    $toolsCalled = true;
619                    break;
620                }
621                $toolsCalledCount += $count;
622            }
623
624            // If total tool calls exceed 10, also disable tools
625            if ($toolsCalledCount > 10) {
626                $toolsCalled = true;
627            }
628
629            // Make another API call with tool responses
630            $data['messages'] = $messages;
631            return $this->callAPIWithTools($data, $toolsCalled, $useTools);
632        }
633
634        // Throw exception for unexpected response format
635        throw new Exception('Unexpected API response format');
636    }
637
638    /**
639     * Load a prompt template from a DokuWiki page and replace placeholders
640     *
641     * Loads prompt templates from DokuWiki pages with IDs in the format
642     * dokullm:prompts:LANGUAGE:PROMPT_NAME
643     *
644     * The method implements a language fallback mechanism:
645     * 1. First tries to load the prompt in the configured language
646     * 2. If not found, falls back to English prompts
647     * 3. Throws an exception if neither is available
648     *
649     * After loading the prompt, it scans for placeholders and automatically
650     * adds missing ones with appropriate values before replacing all placeholders.
651     *
652     * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite')
653     * @param array $variables Associative array of placeholder => value pairs
654     * @return string The processed prompt with placeholders replaced
655     * @throws Exception If the prompt page cannot be loaded in any language
656     */
657    private function loadPrompt($promptName, $language, $variables = [])
658    {
659        // Default to 'en' if language is 'default' or not set
660        if ($language === 'default' || empty($language)) {
661            $language = 'en';
662        }
663
664        // Construct the page ID for the prompt in the configured language
665        $promptPageId = 'dokullm:prompts:' . $language . ':' . $promptName;
666
667        // Try to get the content of the prompt page in the configured language
668        $prompt = $this->getPageContent($promptPageId);
669
670        // If the language-specific prompt doesn't exist, try English as fallback
671        if ($prompt === false && $language !== 'en') {
672            $promptPageId = 'dokullm:prompts:en:' . $promptName;
673            $prompt = $this->getPageContent($promptPageId);
674        }
675
676        // If still no prompt found, throw an exception
677        if ($prompt === false) {
678            throw new Exception('Prompt page not found: ' . $promptPageId);
679        }
680
681        // Find placeholders in the prompt
682        $placeholders = $this->findPlaceholders($prompt);
683
684        // Add missing placeholders with appropriate values
685        foreach ($placeholders as $placeholder) {
686            // Skip if already provided in variables
687            if (isset($variables[$placeholder])) {
688                continue;
689            }
690
691            // Add appropriate values for specific placeholders
692            switch ($placeholder) {
693                case 'template':
694                    // If we have a page_template in variables, use it
695                    $variables[$placeholder] = $this->getTemplateContent($variables['page_template']);
696                    break;
697
698                case 'snippets':
699                    $variables[$placeholder] = $this->getSnippets(10);
700                    break;
701
702                case 'examples':
703                    // If we have example page IDs in metadata, add examples content
704                    $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']);
705                    break;
706
707                case 'previous':
708                    // If we have a previous report page ID in metadata, add previous content
709                    $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']);
710
711                    // Add current and previous dates to metadata
712                    $variables['current_date'] = $this->getPageDate();
713                    $variables['previous_date'] = !empty($variables['page_previous']) ?
714                                                $this->getPageDate($variables['page_previous']) :
715                                                '';
716                    break;
717
718                default:
719                    // For other placeholders, leave them empty or set a default value
720                    $variables[$placeholder] = '';
721                    break;
722            }
723        }
724
725        // Replace placeholders with actual values
726        // Placeholders are in the format {placeholder_name}
727        foreach ($variables as $placeholder => $value) {
728            $prompt = str_replace('{' . $placeholder . '}', $value, $prompt);
729        }
730
731        // Return the processed prompt
732        return $prompt;
733    }
734
735    /**
736     * Load system prompt with optional command-specific appendage
737     *
738     * Loads the main system prompt and appends any command-specific system prompt
739     * if available.
740     *
741     * @param string $action The action/command name
742     * @param array $variables Associative array of placeholder => value pairs
743     * @return string The combined system prompt
744     */
745    private function loadSystemPrompt($action, $language, $variables = [])
746    {
747        // Load system prompt which provides general instructions to the LLM
748        $systemPrompt = $this->loadPrompt('system', $language, $variables);
749
750        // Check if there's a command-specific system prompt appendage
751        if (!empty($action)) {
752            try {
753                $commandSystemPrompt = $this->loadPrompt($action . ':system', $language, $variables);
754                if ($commandSystemPrompt !== false) {
755                    $systemPrompt .= "\n" . $commandSystemPrompt;
756                }
757            } catch (Exception $e) {
758                // Ignore exceptions when loading command-specific system prompt
759                // This allows the main system prompt to still be used
760            }
761        }
762
763        return $systemPrompt;
764    }
765
766    /**
767     * Get the content of a DokuWiki page
768     *
769     * Retrieves the raw content of a DokuWiki page by its ID.
770     * Used for loading template and example page content for context.
771     *
772     * @param string $pageId The page ID to retrieve
773     * @return string|false The page content or false if not found/readable
774     */
775    public function getPageContent($pageId)
776    {
777        // Convert page ID to file path
778        $pageFile = wikiFN($pageId);
779
780        // Check if file exists and is readable
781        if (file_exists($pageFile) && is_readable($pageFile)) {
782            return file_get_contents($pageFile);
783        }
784
785        return false;
786    }
787
788    /**
789     * Extract date from page ID or file timestamp
790     *
791     * Attempts to extract a date in YYmmdd format from the page ID.
792     * If not found, uses the file's last modification timestamp.
793     *
794     * @param string $pageId Optional page ID to extract date from (defaults to current page)
795     * @return string Formatted date string (YYYY-MM-DD)
796     */
797    private function getPageDate($pageId = null)
798    {
799        global $ID;
800
801        // Use provided page ID or current page ID
802        $targetPageId = $pageId ?: $ID;
803
804        // Try to extract date from page ID (looking for YYmmdd pattern)
805        if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) {
806            // Convert YYmmdd to YYYY-MM-DD
807            $year = $matches[1];
808            $month = $matches[2];
809            $day = $matches[3];
810
811            // Assume 20xx for years 00-69, 19xx for years 70-99
812            $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year;
813
814            return $fullYear . '-' . $month . '-' . $day;
815        }
816
817        // Fallback to file timestamp
818        $pageFile = wikiFN($targetPageId);
819        if (file_exists($pageFile)) {
820            $timestamp = filemtime($pageFile);
821            return date('Y-m-d', $timestamp);
822        }
823
824        // Return empty string if no date can be determined
825        return '';
826    }
827
828    /**
829     * Get current text
830     *
831     * Retrieves the current text stored from the process function.
832     *
833     * @return string The current text
834     */
835    private function getCurrentText()
836    {
837        return $this->currentText;
838    }
839
840    /**
841     * Scan text for placeholders
842     *
843     * Finds all placeholders in the format {placeholder_name} in the provided text
844     * and returns an array of unique placeholder names.
845     *
846     * @param string $text The text to scan for placeholders
847     * @return array List of unique placeholder names found in the text
848     */
849    public function findPlaceholders($text)
850    {
851        $placeholders = [];
852        $pattern = '/\{([^}]+)\}/';
853
854        if (preg_match_all($pattern, $text, $matches)) {
855            // Get unique placeholder names
856            $placeholders = array_unique($matches[1]);
857        }
858
859        return $placeholders;
860    }
861
862    /**
863     * Get template content for the current text
864     *
865     * Convenience function to retrieve template content. If a pageId is provided,
866     * retrieves content directly from that page. Otherwise, queries ChromaDB for
867     * a relevant template based on the current text.
868     *
869     * @param string|null $pageId Optional page ID to retrieve template from directly
870     * @return string The template content or empty string if not found
871     */
872    private function getTemplateContent($pageId = null)
873    {
874        // If pageId is provided, use it directly
875        if ($pageId !== null) {
876            $templateContent = $this->getPageContent($pageId);
877            if ($templateContent !== false) {
878                return $templateContent;
879            }
880        }
881
882        // Otherwise, get template suggestion for the current text
883        $pageId = $this->queryChromaDBTemplate($this->getCurrentText());
884        if (!empty($pageId)) {
885            $templateContent = $this->getPageContent($pageId[0]);
886            if ($templateContent !== false) {
887                return $templateContent;
888            }
889        }
890        return '( no template )';
891    }
892
893    /**
894     * Get snippets content for the current text
895     *
896     * Convenience function to retrieve relevant snippets for the current text.
897     * Queries ChromaDB for relevant snippets and returns them formatted.
898     *
899     * @param int $count Number of snippets to retrieve (default: 10)
900     * @return string Formatted snippets content or empty string if not found
901     */
902    private function getSnippets($count = 10)
903    {
904        // Get example snippets for the current text
905        $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count);
906        if (!empty($snippets)) {
907            $formattedSnippets = [];
908            foreach ($snippets as $index => $snippet) {
909                $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>';
910            }
911            return implode("\n", $formattedSnippets);
912        }
913        return '( no examples )';
914    }
915
916    /**
917     * Get examples content from example page IDs
918     *
919     * Convenience function to retrieve content from example pages.
920     * Returns the content of each page packed in XML elements.
921     *
922     * @param array $exampleIds List of example page IDs
923     * @return string Formatted examples content or empty string if not found
924     */
925    private function getExamplesContent($exampleIds = [])
926    {
927        if (empty($exampleIds) || !is_array($exampleIds)) {
928            return '( no examples )';
929        }
930
931        $examplesContent = [];
932        foreach ($exampleIds as $index => $exampleId) {
933            $content = $this->getPageContent($exampleId);
934            if ($content !== false) {
935                $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>';
936            }
937        }
938
939        return implode("\n", $examplesContent);
940    }
941
942    /**
943     * Get previous report content from previous page ID
944     *
945     * Convenience function to retrieve content from a previous report page.
946     * Returns the content of the previous page or a default message if not found.
947     *
948     * @param string $previousId Previous page ID
949     * @return string Previous report content or default message if not found
950     */
951    private function getPreviousContent($previousId = '')
952    {
953        if (empty($previousId)) {
954            return '( no previous report )';
955        }
956
957        $content = $this->getPageContent($previousId);
958        if ($content !== false) {
959            return $content;
960        }
961
962        return '( previous report not found )';
963    }
964
965    /**
966     * Get ChromaDB client with configuration
967     *
968     * Creates and returns a ChromaDB client with the appropriate configuration.
969     * Extracts modality from the current page ID to use as the collection name.
970     *
971     * @return array Array containing the ChromaDB client and collection name
972     */
973    private function getChromaDBClient()
974    {
975        // Get ChromaDB configuration from DokuWiki plugin configuration
976        $chromaHost = $this->getConf('chroma_host', 'localhost');
977        $chromaPort = $this->getConf('chroma_port', 8000);
978        $chromaTenant = $this->getConf('chroma_tenant', 'dokullm');
979        $chromaDatabase = $this->getConf('chroma_database', 'dokullm');
980        $chromaDefaultCollection = $this->getConf('chroma_collection', 'documents');
981        $ollamaHost = $this->getConf('ollama_host', 'localhost');
982        $ollamaPort = $this->getConf('ollama_port', 11434);
983        $ollamaModel = $this->getConf('ollama_embeddings_model', 'nomic-embed-text');
984
985        // Use the first part of the current page ID as collection name, fallback to default
986        global $ID;
987        $chromaCollection = $chromaDefaultCollection; // Default collection name
988
989        if (!empty($ID)) {
990            // Split the page ID by ':' and take the first part as collection name
991            $parts = explode(':', $ID);
992            if (isset($parts[0]) && !empty($parts[0])) {
993                // If the first part is 'playground', use the default collection
994                // Otherwise, use the first part as the collection name
995                if ($parts[0] === 'playground') {
996                    $chromaCollection = $chromaDefaultCollection;
997                } else {
998                    $chromaCollection = $parts[0];
999                }
1000            }
1001        }
1002
1003        // Create ChromaDB client with all required parameters
1004        $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient(
1005            $chromaHost,
1006            $chromaPort,
1007            $chromaTenant,
1008            $chromaDatabase,
1009            $ollamaHost,
1010            $ollamaPort,
1011            $ollamaModel
1012        );
1013
1014
1015        return [$chromaClient, $chromaCollection];
1016    }
1017
1018    /**
1019     * Query ChromaDB for relevant documents
1020     *
1021     * Generates embeddings for the input text and queries ChromaDB for similar documents.
1022     * Extracts modality from the current page ID to use as the collection name.
1023     *
1024     * @param string $text The text to find similar documents for
1025     * @param int $limit Maximum number of documents to retrieve (default: 5)
1026     * @param array|null $where Optional filter conditions for metadata
1027     * @return array List of document IDs
1028     */
1029    private function queryChromaDB($text, $limit = 5, $where = null)
1030    {
1031        try {
1032            // Get ChromaDB client and collection name
1033            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
1034            // Query for similar documents
1035            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
1036
1037            // Extract document IDs from results
1038            $documentIds = [];
1039            if (isset($results['ids'][0]) && is_array($results['ids'][0])) {
1040                foreach ($results['ids'][0] as $id) {
1041                    // Use the ChromaDB ID directly without conversion
1042                    $documentIds[] = $id;
1043                }
1044            }
1045
1046            return $documentIds;
1047        } catch (Exception $e) {
1048            // Log error but don't fail the operation
1049            error_log('ChromaDB query failed: ' . $e->getMessage());
1050            return [];
1051        }
1052    }
1053
1054    /**
1055     * Query ChromaDB for relevant documents and return text snippets
1056     *
1057     * Generates embeddings for the input text and queries ChromaDB for similar documents.
1058     * Returns the actual text snippets instead of document IDs.
1059     *
1060     * @param string $text The text to find similar documents for
1061     * @param int $limit Maximum number of documents to retrieve (default: 10)
1062     * @param array|null $where Optional filter conditions for metadata
1063     * @return array List of text snippets
1064     */
1065    private function queryChromaDBSnippets($text, $limit = 10, $where = null)
1066    {
1067        try {
1068            // Get ChromaDB client and collection name
1069            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
1070            // Query for similar documents
1071            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
1072
1073            // Extract document texts from results
1074            $snippets = [];
1075            if (isset($results['documents'][0]) && is_array($results['documents'][0])) {
1076                foreach ($results['documents'][0] as $document) {
1077                    $snippets[] = $document;
1078                }
1079            }
1080
1081            return $snippets;
1082        } catch (Exception $e) {
1083            // Log error but don't fail the operation
1084            error_log('ChromaDB query failed: ' . $e->getMessage());
1085            return [];
1086        }
1087    }
1088
1089    /**
1090     * Query ChromaDB for a template document
1091     *
1092     * Generates embeddings for the input text and queries ChromaDB for a template document
1093     * by filtering with metadata 'template=true'.
1094     *
1095     * @param string $text The text to find a template for
1096     * @return array List of template document IDs (maximum 1)
1097     */
1098    public function queryChromaDBTemplate($text)
1099    {
1100        $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']);
1101
1102        // Remove chunk number (e.g., "@2") from the ID to get the base document ID
1103        if (!empty($templateIds)) {
1104            $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]);
1105        }
1106
1107        return $templateIds;
1108    }
1109
1110}
1111