xref: /plugin/dokullm/LlmClient.php (revision 7a6e4f6a147fff035a571a3e0f58b775b09806f1)
1<?php
2namespace dokuwiki\plugin\dokullm;
3
4/**
5 * LLM Client for the dokullm plugin
6 *
7 * This class provides methods to interact with an LLM API for various
8 * text processing tasks such as completion, rewriting, grammar correction,
9 * summarization, conclusion creation, text analysis, and custom prompts.
10 *
11 * The client handles:
12 * - API configuration and authentication
13 * - Prompt template loading and processing
14 * - Context-aware requests with metadata
15 * - DokuWiki page content retrieval
16 */
17
18// must be run within Dokuwiki
19if (!defined('DOKU_INC')) {
20    die();
21}
22
23/**
24 * LLM Client class for handling API communications
25 *
26 * Manages configuration settings and provides methods for various
27 * text processing operations through an LLM API.
28 * Implements caching for tool calls to avoid duplicate processing.
29 */
30class LlmClient
31{
32    /** @var string The API endpoint URL */
33    private $api_url;
34
35    /** @var array Cache for tool call results */
36    private $toolCallCache = [];
37
38    /** @var string Current text for tool usage */
39    private $currentText = '';
40
41    /** @var array Track tool call counts to prevent infinite loops */
42    private $toolCallCounts = [];
43
44    /** @var string The API authentication key */
45    private $api_key;
46
47    /** @var string The model identifier to use */
48    private $model;
49
50    /** @var int The request timeout in seconds */
51    private $timeout;
52
53    /** @var float The temperature setting for response randomness */
54    private $temperature;
55
56    /** @var float The top-p setting for nucleus sampling */
57    private $top_p;
58
59    /** @var int The top-k setting for token selection */
60    private $top_k;
61
62    /** @var float The min-p setting for minimum probability threshold */
63    private $min_p;
64
65    /** @var bool Whether to enable thinking in the LLM responses */
66    private $think;
67
68    /**
69     * Initialize the LLM client with configuration settings
70     *
71     * Retrieves configuration values from DokuWiki's configuration system
72     * for API URL, key, model, timeout, and LLM sampling parameters.
73     *
74     * Configuration values:
75     * - api_url: The LLM API endpoint URL
76     * - api_key: Authentication key for the API (optional)
77     * - model: The model identifier to use for requests
78     * - timeout: Request timeout in seconds
79     * - language: Language code for prompt templates
80     * - temperature: Temperature setting for response randomness (0.0-1.0)
81     * - top_p: Top-p (nucleus sampling) setting (0.0-1.0)
82     * - top_k: Top-k setting (integer >= 1)
83     * - min_p: Minimum probability threshold (0.0-1.0)
84     * - think: Whether to enable thinking in LLM responses (boolean)
85     */
86    public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $language = null)
87    {
88        $this->api_url = $api_url;
89        $this->api_key = $api_key;
90        $this->model = $model;
91        $this->timeout = $timeout;
92        $this->temperature = $temperature;
93        $this->top_p = $top_p;
94        $this->top_k = $top_k;
95        $this->min_p = $min_p;
96        $this->think = $think;
97        $this->language = $language;
98    }
99
100
101
102    public function process($action, $text, $metadata = [], $useContext = true)
103    {
104        // Store the current text for tool usage
105        $this->currentText = $text;
106
107        // Add text, think and action to metadata
108        $metadata['text'] = $text;
109        $metadata['think'] = $this->think ? '/think' : '/no_think';
110        $metadata['action'] = $action;
111
112        // If we have 'template' in metadata, move it to 'page_template'
113        if (isset($metadata['template'])) {
114            $metadata['page_template'] = $metadata['template'];
115            unset($metadata['template']);
116        }
117
118        // If we have 'examples' in metadata, move it to 'page_examples'
119        if (isset($metadata['examples'])) {
120            $metadata['page_examples'] = $metadata['examples'];
121            unset($metadata['examples']);
122        }
123
124        // If we have 'previous' in metadata, move it to 'page_previous'
125        if (isset($metadata['previous'])) {
126            $metadata['page_previous'] = $metadata['previous'];
127            unset($metadata['previous']);
128        }
129
130        $prompt = $this->loadPrompt($action, $metadata);
131
132        return $this->callAPI($action, $prompt, $metadata, $useContext);
133    }
134
135    /**
136     * Process text with a custom user prompt
137     *
138     * Sends a custom prompt to the LLM along with the provided text.
139     *
140     * @param string $text The text to process
141     * @param string $customPrompt The custom prompt to use
142     * @param array $metadata Optional metadata containing template and examples
143     * @param bool $useContext Whether to include template and examples in the context (default: true)
144     * @return string The processed text
145     */
146    public function processCustomPrompt($text, $metadata = [], $useContext = true)
147    {
148        // Store the current text for tool usage
149        $this->currentText = $text;
150
151        // Format the prompt with the text and custom prompt
152        $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text;
153
154        return $this->callAPI('custom', $prompt, $metadata, $useContext);
155    }
156
157    /**
158     * Get the list of available tools for the LLM
159     *
160     * Defines the tools that can be used by the LLM during processing.
161     *
162     * @return array List of tool definitions
163     */
164    private function getAvailableTools()
165    {
166        return [
167            [
168                'type' => 'function',
169                'function' => [
170                    'name' => 'get_document',
171                    'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.',
172                    'parameters' => [
173                        'type' => 'object',
174                        'properties' => [
175                            'id' => [
176                                'type' => 'string',
177                                'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.'
178                            ]
179                        ],
180                        'required' => ['id']
181                    ]
182                ]
183            ],
184            [
185                'type' => 'function',
186                'function' => [
187                    'name' => 'get_template',
188                    'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.',
189                    'parameters' => [
190                        'type' => 'object',
191                        'properties' => [
192                            'language' => [
193                                'type' => 'string',
194                                'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).',
195                                'default' => 'ro'
196                            ]
197                        ]
198                    ]
199                ]
200            ],
201            [
202                'type' => 'function',
203                'function' => [
204                    'name' => 'get_examples',
205                    'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.',
206                    'parameters' => [
207                        'type' => 'object',
208                        'properties' => [
209                            'count' => [
210                                'type' => 'integer',
211                                'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.',
212                                'default' => 5
213                            ]
214                        ]
215                    ]
216                ]
217            ]
218        ];
219    }
220
221    /**
222     * Call the LLM API with the specified prompt
223     *
224     * Makes an HTTP POST request to the configured API endpoint with
225     * the prompt and other parameters. Handles authentication if an
226     * API key is configured.
227     *
228     * The method constructs a conversation with system and user messages,
229     * including context information from metadata when available.
230     *
231     * Complex logic includes:
232     * 1. Loading and enhancing the system prompt with metadata context
233     * 2. Building the API request with model parameters
234     * 3. Handling authentication with API key if configured
235     * 4. Making the HTTP request with proper error handling
236     * 5. Parsing and validating the API response
237     * 6. Supporting tool usage with automatic tool calling when enabled
238     * 7. Implementing context enhancement with templates, examples, and snippets
239     *
240     * The context information includes:
241     * - Template content: Used as a starting point for the response
242     * - Example pages: Full content of specified example pages
243     * - Text snippets: Relevant text examples from ChromaDB
244     *
245     * When tools are enabled, the method supports automatic tool calling:
246     * - Tools can retrieve documents, templates, and examples as needed
247     * - Tool responses are cached to avoid duplicate calls with identical parameters
248     * - Infinite loop protection prevents excessive tool calls
249     *
250     * @param string $command The command name for loading command-specific system prompts
251     * @param string $prompt The prompt to send to the LLM as user message
252     * @param array $metadata Optional metadata containing template, examples, and snippets
253     * @param bool $useContext Whether to include template and examples in the context (default: true)
254     * @return string The response content from the LLM
255     * @throws Exception If the API request fails or returns unexpected format
256     */
257
258    private function callAPI($command, $prompt, $metadata = [], $useContext = true, $useTools = false)
259    {
260        // Load system prompt which provides general instructions to the LLM
261        $systemPrompt = $this->loadSystemPrompt($command, []);
262
263        // Enhance the prompt with context information from metadata
264        // This provides the LLM with additional context about templates and examples
265        if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) {
266            $contextInfo = "\n\n<context>\n";
267
268            // Add template content if specified in metadata
269            if (!empty($metadata['template'])) {
270                $templateContent = $this->getPageContent($metadata['template']);
271                if ($templateContent !== false) {
272                    $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n";
273                }
274            }
275
276            // Add example pages content if specified in metadata
277            if (!empty($metadata['examples'])) {
278                $examplesContent = [];
279                foreach ($metadata['examples'] as $example) {
280                    $content = $this->getPageContent($example);
281                    if ($content !== false) {
282                        $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n";
283                    }
284                }
285                if (!empty($examplesContent)) {
286                    $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n";
287                }
288            }
289
290            // Add text snippets if specified in metadata
291            if (!empty($metadata['snippets'])) {
292                $snippetsContent = [];
293                foreach ($metadata['snippets'] as $index => $snippet) {
294                    // These are text snippets from ChromaDB
295                    $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n";
296                }
297                if (!empty($snippetsContent)) {
298                    $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n";
299                }
300            }
301
302            $contextInfo .= "\n</context>\n";
303
304            // Append context information to system prompt
305            $prompt = $contextInfo . "\n\n" . $prompt;
306        }
307
308        // Prepare API request data with model parameters
309        $data = [
310            'model' => $this->model,
311            'messages' => [
312                ['role' => 'system', 'content' => $systemPrompt],
313                ['role' => 'user', 'content' => $prompt]
314            ],
315            'max_tokens' => 6144,
316            'stream' => false,
317            'keep_alive' => '30m',
318            'think' => true
319        ];
320
321        // Add tools to the request only if useTools is true
322        if ($useTools) {
323            // Define available tools
324            $data['tools'] = $this->getAvailableTools();
325            $data['tool_choice'] = 'auto';
326            $data['parallel_tool_calls'] = false;
327        }
328
329        // Only add parameters if they are defined and not null
330        if ($this->temperature !== null) {
331            $data['temperature'] = $this->temperature;
332        }
333        if ($this->top_p !== null) {
334            $data['top_p'] = $this->top_p;
335        }
336        if ($this->top_k !== null) {
337            $data['top_k'] = $this->top_k;
338        }
339        if ($this->min_p !== null) {
340            $data['min_p'] = $this->min_p;
341        }
342
343        // Make an API call with tool responses
344        return $this->callAPIWithTools($data, false);
345    }
346
347    /**
348     * Handle tool calls from the LLM
349     *
350     * Processes tool calls made by the LLM and returns appropriate responses.
351     * Implements caching to avoid duplicate calls with identical parameters.
352     *
353     * @param array $toolCall The tool call data from the LLM
354     * @return array The tool response message
355     */
356    private function handleToolCall($toolCall)
357    {
358        $toolName = $toolCall['function']['name'];
359        $arguments = json_decode($toolCall['function']['arguments'], true);
360
361        // Create a cache key from the tool name and arguments
362        $cacheKey = md5($toolName . serialize($arguments));
363
364        // Check if we have a cached result for this tool call
365        if (isset($this->toolCallCache[$cacheKey])) {
366            // Return cached result and indicate it was found in cache
367            $toolResponse = $this->toolCallCache[$cacheKey];
368            // Update with current tool call ID
369            $toolResponse['tool_call_id'] = $toolCall['id'];
370            $toolResponse['cached'] = true; // Indicate this response was cached
371            return $toolResponse;
372        }
373
374        $toolResponse = [
375            'role' => 'tool',
376            'tool_call_id' => $toolCall['id'],
377            'cached' => false // Indicate this is a fresh response
378        ];
379
380        switch ($toolName) {
381            case 'get_document':
382                $documentId = $arguments['id'];
383                $content = $this->getPageContent($documentId);
384                if ($content === false) {
385                    $toolResponse['content'] = 'Document not found: ' . $documentId;
386                } else {
387                    $toolResponse['content'] = $content;
388                }
389                break;
390
391            case 'get_template':
392                // Get template content using the convenience function
393                $toolResponse['content'] = $this->getTemplateContent();
394                break;
395
396            case 'get_examples':
397                // Get examples content using the convenience function
398                $count = isset($arguments['count']) ? (int)$arguments['count'] : 5;
399                $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>';
400                break;
401
402            default:
403                $toolResponse['content'] = 'Unknown tool: ' . $toolName;
404        }
405
406        // Cache the result for future calls with the same parameters
407        $cacheEntry = $toolResponse;
408        // Remove tool_call_id and cached flag from cache as they change per call
409        unset($cacheEntry['tool_call_id']);
410        unset($cacheEntry['cached']);
411        $this->toolCallCache[$cacheKey] = $cacheEntry;
412
413        return $toolResponse;
414    }
415
416    /**
417     * Make an API call with tool responses
418     *
419     * Sends a follow-up request to the LLM with tool responses.
420     * Implements complex logic for handling tool calls with caching and loop protection.
421     *
422     * Complex logic includes:
423     * 1. Making HTTP requests with proper authentication and error handling
424     * 2. Processing tool calls from the LLM response
425     * 3. Caching tool responses to avoid duplicate calls with identical parameters
426     * 4. Tracking tool call counts to prevent infinite loops
427     * 5. Implementing loop protection with call count limits
428     * 6. Handling recursive tool calls until final content is generated
429     *
430     * Loop protection works by:
431     * - Tracking individual tool call counts (max 3 per tool)
432     * - Tracking total tool calls (max 10 total)
433     * - Disabling tools when limits are exceeded to break potential loops
434     *
435     * @param array $data The API request data including messages with tool responses
436     * @param bool $toolsCalled Whether tools have already been called (used for loop protection)
437     * @param bool $useTools Whether to process tool calls (used for loop protection)
438     * @return string The final response content
439     */
440    private function callAPIWithTools($data, $toolsCalled = false, $useTools = true)
441    {
442        // Set up HTTP headers, including authentication if API key is configured
443        $headers = [
444            'Content-Type: application/json'
445        ];
446
447        if (!empty($this->api_key)) {
448            $headers[] = 'Authorization: Bearer ' . $this->api_key;
449        }
450
451       // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops
452        if ($toolsCalled) {
453            unset($data['tools']);
454            unset($data['tool_choice']);
455        }
456
457        // Initialize and configure cURL for the API request
458        $ch = curl_init();
459        curl_setopt($ch, CURLOPT_URL, $this->api_url);
460        curl_setopt($ch, CURLOPT_POST, true);
461        curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
462        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
463        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
464        curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
465        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
466
467        // Execute the API request
468        $response = curl_exec($ch);
469        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
470        $error = curl_error($ch);
471        curl_close($ch);
472
473        // Handle cURL errors
474        if ($error) {
475            throw new Exception('API request failed: ' . $error);
476        }
477
478        // Handle HTTP errors
479        if ($httpCode !== 200) {
480            throw new Exception('API request failed with HTTP code: ' . $httpCode);
481        }
482
483        // Parse and validate the JSON response
484        $result = json_decode($response, true);
485
486        // Extract the content from the response if available
487        if (isset($result['choices'][0]['message']['content'])) {
488            $content = trim($result['choices'][0]['message']['content']);
489            // Reset tool call counts when we get final content
490            $this->toolCallCounts = [];
491            return $content;
492        }
493
494        // Handle tool calls if present
495        if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) {
496            $toolCalls = $result['choices'][0]['message']['tool_calls'];
497            // Start with original messages
498            $messages = $data['messages'];
499            // Add assistant's message with tool calls, keeping all original fields except for content (which is null)
500            $assistantMessage = [];
501            foreach ($result['choices'][0]['message'] as $key => $value) {
502                if ($key !== 'content') {
503                    $assistantMessage[$key] = $value;
504                }
505            }
506            // Add assistant's message with tool calls
507            $messages[] = $assistantMessage;
508
509            // Process each tool call and track counts to prevent infinite loops
510            foreach ($toolCalls as $toolCall) {
511                $toolName = $toolCall['function']['name'];
512                // Increment tool call count
513                if (!isset($this->toolCallCounts[$toolName])) {
514                    $this->toolCallCounts[$toolName] = 0;
515                }
516                $this->toolCallCounts[$toolName]++;
517
518                $toolResponse = $this->handleToolCall($toolCall);
519                $messages[] = $toolResponse;
520            }
521
522            // Check if any tool has been called more than 3 times
523            $toolsCalledCount = 0;
524            foreach ($this->toolCallCounts as $count) {
525                if ($count > 3) {
526                    // If any tool called more than 3 times, disable tools to break loop
527                    $toolsCalled = true;
528                    break;
529                }
530                $toolsCalledCount += $count;
531            }
532
533            // If total tool calls exceed 10, also disable tools
534            if ($toolsCalledCount > 10) {
535                $toolsCalled = true;
536            }
537
538            // Make another API call with tool responses
539            $data['messages'] = $messages;
540            return $this->callAPIWithTools($data, $toolsCalled, $useTools);
541        }
542
543        // Throw exception for unexpected response format
544        throw new Exception('Unexpected API response format');
545    }
546
547    /**
548     * Load a prompt template from a DokuWiki page and replace placeholders
549     *
550     * Loads prompt templates from DokuWiki pages with IDs in the format
551     * dokullm:prompts:LANGUAGE:PROMPT_NAME
552     *
553     * The method implements a language fallback mechanism:
554     * 1. First tries to load the prompt in the configured language
555     * 2. If not found, falls back to English prompts
556     * 3. Throws an exception if neither is available
557     *
558     * After loading the prompt, it scans for placeholders and automatically
559     * adds missing ones with appropriate values before replacing all placeholders.
560     *
561     * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite')
562     * @param array $variables Associative array of placeholder => value pairs
563     * @return string The processed prompt with placeholders replaced
564     * @throws Exception If the prompt page cannot be loaded in any language
565     */
566    private function loadPrompt($promptName, $variables = [])
567    {
568        // Default to 'en' if language is 'default' or not set
569        if ($this->language === 'default' || empty($this->language)) {
570            $this->language = 'en';
571        }
572
573        // Construct the page ID for the prompt in the configured language
574        $promptPageId = 'dokullm:prompts:' . $this->language . ':' . $promptName;
575
576        // Try to get the content of the prompt page in the configured language
577        $prompt = $this->getPageContent($promptPageId);
578
579        // If the language-specific prompt doesn't exist, try English as fallback
580        if ($prompt === false && $this->language !== 'en') {
581            $promptPageId = 'dokullm:prompts:en:' . $promptName;
582            $prompt = $this->getPageContent($promptPageId);
583        }
584
585        // If still no prompt found, throw an exception
586        if ($prompt === false) {
587            throw new Exception('Prompt page not found: ' . $promptPageId);
588        }
589
590        // Find placeholders in the prompt
591        $placeholders = $this->findPlaceholders($prompt);
592
593        // Add missing placeholders with appropriate values
594        foreach ($placeholders as $placeholder) {
595            // Skip if already provided in variables
596            if (isset($variables[$placeholder])) {
597                continue;
598            }
599
600            // Add appropriate values for specific placeholders
601            switch ($placeholder) {
602                case 'template':
603                    // If we have a page_template in variables, use it
604                    $variables[$placeholder] = $this->getTemplateContent($variables['page_template']);
605                    break;
606
607                case 'snippets':
608                    $variables[$placeholder] = $this->getSnippets(10);
609                    break;
610
611                case 'examples':
612                    // If we have example page IDs in metadata, add examples content
613                    $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']);
614                    break;
615
616                case 'previous':
617                    // If we have a previous report page ID in metadata, add previous content
618                    $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']);
619
620                    // Add current and previous dates to metadata
621                    $variables['current_date'] = $this->getPageDate();
622                    $variables['previous_date'] = !empty($variables['page_previous']) ?
623                                                $this->getPageDate($variables['page_previous']) :
624                                                '';
625                    break;
626
627                default:
628                    // For other placeholders, leave them empty or set a default value
629                    $variables[$placeholder] = '';
630                    break;
631            }
632        }
633
634        // Replace placeholders with actual values
635        // Placeholders are in the format {placeholder_name}
636        foreach ($variables as $placeholder => $value) {
637            $prompt = str_replace('{' . $placeholder . '}', $value, $prompt);
638        }
639
640        // Return the processed prompt
641        return $prompt;
642    }
643
644    /**
645     * Load system prompt with optional command-specific appendage
646     *
647     * Loads the main system prompt and appends any command-specific system prompt
648     * if available.
649     *
650     * @param string $action The action/command name
651     * @param array $variables Associative array of placeholder => value pairs
652     * @return string The combined system prompt
653     */
654    private function loadSystemPrompt($action, $variables = [])
655    {
656        // Load system prompt which provides general instructions to the LLM
657        $systemPrompt = $this->loadPrompt('system', $variables);
658
659        // Check if there's a command-specific system prompt appendage
660        if (!empty($action)) {
661            try {
662                $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables);
663                if ($commandSystemPrompt !== false) {
664                    $systemPrompt .= "\n" . $commandSystemPrompt;
665                }
666            } catch (Exception $e) {
667                // Ignore exceptions when loading command-specific system prompt
668                // This allows the main system prompt to still be used
669            }
670        }
671
672        return $systemPrompt;
673    }
674
675    /**
676     * Get the content of a DokuWiki page
677     *
678     * Retrieves the raw content of a DokuWiki page by its ID.
679     * Used for loading template and example page content for context.
680     *
681     * @param string $pageId The page ID to retrieve
682     * @return string|false The page content or false if not found/readable
683     */
684    public function getPageContent($pageId)
685    {
686        // Convert page ID to file path
687        $pageFile = wikiFN($pageId);
688
689        // Check if file exists and is readable
690        if (file_exists($pageFile) && is_readable($pageFile)) {
691            return file_get_contents($pageFile);
692        }
693
694        return false;
695    }
696
697    /**
698     * Extract date from page ID or file timestamp
699     *
700     * Attempts to extract a date in YYmmdd format from the page ID.
701     * If not found, uses the file's last modification timestamp.
702     *
703     * @param string $pageId Optional page ID to extract date from (defaults to current page)
704     * @return string Formatted date string (YYYY-MM-DD)
705     */
706    private function getPageDate($pageId = null)
707    {
708        global $ID;
709
710        // Use provided page ID or current page ID
711        $targetPageId = $pageId ?: $ID;
712
713        // Try to extract date from page ID (looking for YYmmdd pattern)
714        if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) {
715            // Convert YYmmdd to YYYY-MM-DD
716            $year = $matches[1];
717            $month = $matches[2];
718            $day = $matches[3];
719
720            // Assume 20xx for years 00-69, 19xx for years 70-99
721            $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year;
722
723            return $fullYear . '-' . $month . '-' . $day;
724        }
725
726        // Fallback to file timestamp
727        $pageFile = wikiFN($targetPageId);
728        if (file_exists($pageFile)) {
729            $timestamp = filemtime($pageFile);
730            return date('Y-m-d', $timestamp);
731        }
732
733        // Return empty string if no date can be determined
734        return '';
735    }
736
737    /**
738     * Get current text
739     *
740     * Retrieves the current text stored from the process function.
741     *
742     * @return string The current text
743     */
744    private function getCurrentText()
745    {
746        return $this->currentText;
747    }
748
749    /**
750     * Scan text for placeholders
751     *
752     * Finds all placeholders in the format {placeholder_name} in the provided text
753     * and returns an array of unique placeholder names.
754     *
755     * @param string $text The text to scan for placeholders
756     * @return array List of unique placeholder names found in the text
757     */
758    public function findPlaceholders($text)
759    {
760        $placeholders = [];
761        $pattern = '/\{([^}]+)\}/';
762
763        if (preg_match_all($pattern, $text, $matches)) {
764            // Get unique placeholder names
765            $placeholders = array_unique($matches[1]);
766        }
767
768        return $placeholders;
769    }
770
771    /**
772     * Get template content for the current text
773     *
774     * Convenience function to retrieve template content. If a pageId is provided,
775     * retrieves content directly from that page. Otherwise, queries ChromaDB for
776     * a relevant template based on the current text.
777     *
778     * @param string|null $pageId Optional page ID to retrieve template from directly
779     * @return string The template content or empty string if not found
780     */
781    private function getTemplateContent($pageId = null)
782    {
783        // If pageId is provided, use it directly
784        if ($pageId !== null) {
785            $templateContent = $this->getPageContent($pageId);
786            if ($templateContent !== false) {
787                return $templateContent;
788            }
789        }
790
791        // Otherwise, get template suggestion for the current text
792        $pageId = $this->queryChromaDBTemplate($this->getCurrentText());
793        if (!empty($pageId)) {
794            $templateContent = $this->getPageContent($pageId[0]);
795            if ($templateContent !== false) {
796                return $templateContent;
797            }
798        }
799        return '( no template )';
800    }
801
802    /**
803     * Get snippets content for the current text
804     *
805     * Convenience function to retrieve relevant snippets for the current text.
806     * Queries ChromaDB for relevant snippets and returns them formatted.
807     *
808     * @param int $count Number of snippets to retrieve (default: 10)
809     * @return string Formatted snippets content or empty string if not found
810     */
811    private function getSnippets($count = 10)
812    {
813        // Get example snippets for the current text
814        $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count);
815        if (!empty($snippets)) {
816            $formattedSnippets = [];
817            foreach ($snippets as $index => $snippet) {
818                $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>';
819            }
820            return implode("\n", $formattedSnippets);
821        }
822        return '( no examples )';
823    }
824
825    /**
826     * Get examples content from example page IDs
827     *
828     * Convenience function to retrieve content from example pages.
829     * Returns the content of each page packed in XML elements.
830     *
831     * @param array $exampleIds List of example page IDs
832     * @return string Formatted examples content or empty string if not found
833     */
834    private function getExamplesContent($exampleIds = [])
835    {
836        if (empty($exampleIds) || !is_array($exampleIds)) {
837            return '( no examples )';
838        }
839
840        $examplesContent = [];
841        foreach ($exampleIds as $index => $exampleId) {
842            $content = $this->getPageContent($exampleId);
843            if ($content !== false) {
844                $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>';
845            }
846        }
847
848        return implode("\n", $examplesContent);
849    }
850
851    /**
852     * Get previous report content from previous page ID
853     *
854     * Convenience function to retrieve content from a previous report page.
855     * Returns the content of the previous page or a default message if not found.
856     *
857     * @param string $previousId Previous page ID
858     * @return string Previous report content or default message if not found
859     */
860    private function getPreviousContent($previousId = '')
861    {
862        if (empty($previousId)) {
863            return '( no previous report )';
864        }
865
866        $content = $this->getPageContent($previousId);
867        if ($content !== false) {
868            return $content;
869        }
870
871        return '( previous report not found )';
872    }
873
874    /**
875     * Get ChromaDB client with configuration
876     *
877     * Creates and returns a ChromaDB client with the appropriate configuration.
878     * Extracts modality from the current page ID to use as the collection name.
879     *
880     * @return array Array containing the ChromaDB client and collection name
881     */
882    private function getChromaDBClient()
883    {
884        // Get ChromaDB configuration from DokuWiki plugin configuration
885        $chromaHost = $this->getConf('chroma_host');
886        $chromaPort = $this->getConf('chroma_port');
887        $chromaTenant = $this->getConf('chroma_tenant');
888        $chromaDatabase = $this->getConf('chroma_database');
889        $chromaDefaultCollection = $this->getConf('chroma_collection');
890        $ollamaHost = $this->getConf('ollama_host');
891        $ollamaPort = $this->getConf('ollama_port');
892        $ollamaModel = $this->getConf('ollama_embeddings_model');
893
894        // Use the first part of the current page ID as collection name, fallback to default
895        global $ID;
896        $chromaCollection = $chromaDefaultCollection; // Default collection name
897
898        if (!empty($ID)) {
899            // Split the page ID by ':' and take the first part as collection name
900            $parts = explode(':', $ID);
901            if (isset($parts[0]) && !empty($parts[0])) {
902                // If the first part is 'playground', use the default collection
903                // Otherwise, use the first part as the collection name
904                if ($parts[0] === 'playground') {
905                    $chromaCollection = $chromaDefaultCollection;
906                } else {
907                    $chromaCollection = $parts[0];
908                }
909            }
910        }
911
912        // Create ChromaDB client with all required parameters
913        $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient(
914            $chromaHost,
915            $chromaPort,
916            $chromaTenant,
917            $chromaDatabase,
918            $ollamaHost,
919            $ollamaPort,
920            $ollamaModel
921        );
922
923
924        return [$chromaClient, $chromaCollection];
925    }
926
927    /**
928     * Query ChromaDB for relevant documents
929     *
930     * Generates embeddings for the input text and queries ChromaDB for similar documents.
931     * Extracts modality from the current page ID to use as the collection name.
932     *
933     * @param string $text The text to find similar documents for
934     * @param int $limit Maximum number of documents to retrieve (default: 5)
935     * @param array|null $where Optional filter conditions for metadata
936     * @return array List of document IDs
937     */
938    private function queryChromaDB($text, $limit = 5, $where = null)
939    {
940        try {
941            // Get ChromaDB client and collection name
942            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
943            // Query for similar documents
944            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
945
946            // Extract document IDs from results
947            $documentIds = [];
948            if (isset($results['ids'][0]) && is_array($results['ids'][0])) {
949                foreach ($results['ids'][0] as $id) {
950                    // Use the ChromaDB ID directly without conversion
951                    $documentIds[] = $id;
952                }
953            }
954
955            return $documentIds;
956        } catch (Exception $e) {
957            // Log error but don't fail the operation
958            error_log('ChromaDB query failed: ' . $e->getMessage());
959            return [];
960        }
961    }
962
963    /**
964     * Query ChromaDB for relevant documents and return text snippets
965     *
966     * Generates embeddings for the input text and queries ChromaDB for similar documents.
967     * Returns the actual text snippets instead of document IDs.
968     *
969     * @param string $text The text to find similar documents for
970     * @param int $limit Maximum number of documents to retrieve (default: 10)
971     * @param array|null $where Optional filter conditions for metadata
972     * @return array List of text snippets
973     */
974    private function queryChromaDBSnippets($text, $limit = 10, $where = null)
975    {
976        try {
977            // Get ChromaDB client and collection name
978            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
979            // Query for similar documents
980            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
981
982            // Extract document texts from results
983            $snippets = [];
984            if (isset($results['documents'][0]) && is_array($results['documents'][0])) {
985                foreach ($results['documents'][0] as $document) {
986                    $snippets[] = $document;
987                }
988            }
989
990            return $snippets;
991        } catch (Exception $e) {
992            // Log error but don't fail the operation
993            error_log('ChromaDB query failed: ' . $e->getMessage());
994            return [];
995        }
996    }
997
998    /**
999     * Query ChromaDB for a template document
1000     *
1001     * Generates embeddings for the input text and queries ChromaDB for a template document
1002     * by filtering with metadata 'template=true'.
1003     *
1004     * @param string $text The text to find a template for
1005     * @return array List of template document IDs (maximum 1)
1006     */
1007    public function queryChromaDBTemplate($text)
1008    {
1009        $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']);
1010
1011        // Remove chunk number (e.g., "@2") from the ID to get the base document ID
1012        if (!empty($templateIds)) {
1013            $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]);
1014        }
1015
1016        return $templateIds;
1017    }
1018
1019}
1020