xref: /plugin/dokullm/LlmClient.php (revision a8c7401153bb2c238fb51cea882d9c47202f2535)
1<?php
2namespace dokuwiki\plugin\dokullm;
3
4/**
5 * LLM Client for the dokullm plugin
6 *
7 * This class provides methods to interact with an LLM API for various
8 * text processing tasks such as completion, rewriting, grammar correction,
9 * summarization, conclusion creation, text analysis, and custom prompts.
10 *
11 * The client handles:
12 * - API configuration and authentication
13 * - Prompt template loading and processing
14 * - Context-aware requests with metadata
15 * - DokuWiki page content retrieval
16 */
17
18// must be run within Dokuwiki
19if (!defined('DOKU_INC')) {
20    die();
21}
22
23/**
24 * LLM Client class for handling API communications
25 *
26 * Manages configuration settings and provides methods for various
27 * text processing operations through an LLM API.
28 * Implements caching for tool calls to avoid duplicate processing.
29 */
30class LlmClient
31{
32    /** @var string The API endpoint URL */
33    private $api_url;
34
35    /** @var array Cache for tool call results */
36    private $toolCallCache = [];
37
38    /** @var string Current text for tool usage */
39    private $currentText = '';
40
41    /** @var array Track tool call counts to prevent infinite loops */
42    private $toolCallCounts = [];
43
44    /** @var string The API authentication key */
45    private $api_key;
46
47    /** @var string The model identifier to use */
48    private $model;
49
50    /** @var int The request timeout in seconds */
51    private $timeout;
52
53    /** @var float The temperature setting for response randomness */
54    private $temperature;
55
56    /** @var float The top-p setting for nucleus sampling */
57    private $top_p;
58
59    /** @var int The top-k setting for token selection */
60    private $top_k;
61
62    /** @var float The min-p setting for minimum probability threshold */
63    private $min_p;
64
65    /** @var bool Whether to enable thinking in LLM responses */
66    private $think;
67
68    /** @var object|null ChromaDB client instance */
69    private $chromaClient;
70
71    /** @var string|null Page ID */
72    private $pageId;
73
74    /**
75     * Initialize the LLM client with configuration settings
76     *
77     * Retrieves configuration values from DokuWiki's configuration system
78     * for API URL, key, model, timeout, and LLM sampling parameters.
79     *
80     * Configuration values:
81     * - api_url: The LLM API endpoint URL
82     * - api_key: Authentication key for the API (optional)
83     * - model: The model identifier to use for requests
84     * - timeout: Request timeout in seconds
85     * - profile: Profile for prompt templates
86     * - temperature: Temperature setting for response randomness (0.0-1.0)
87     * - top_p: Top-p (nucleus sampling) setting (0.0-1.0)
88     * - top_k: Top-k setting (integer >= 1)
89     * - min_p: Minimum probability threshold (0.0-1.0)
90     * - think: Whether to enable thinking in LLM responses (boolean)
91     * - chromaClient: ChromaDB client instance (optional)
92     * - pageId: Page ID (optional)
93     */
94    public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $profile = null, $chromaClient = null, $pageId = null, $enableChromaDB = null)
95    {
96        $this->api_url = $api_url;
97        $this->api_key = $api_key;
98        $this->model = $model;
99        $this->timeout = $timeout;
100        $this->temperature = $temperature;
101        $this->top_p = $top_p;
102        $this->top_k = $top_k;
103        $this->min_p = $min_p;
104        $this->think = $think;
105        $this->profile = $profile;
106        $this->chromaClient = $chromaClient;
107        $this->pageId = $pageId;
108        $this->enableChromaDB = $enableChromaDB ?? false;
109    }
110
111
112
113    public function process($action, $text, $metadata = [], $useContext = true)
114    {
115        // Store the current text for tool usage
116        $this->currentText = $text;
117
118        // Add text, think and action to metadata
119        $metadata['text'] = $text;
120        $metadata['think'] = $this->think ? '/think' : '/no_think';
121        $metadata['action'] = $action;
122
123        // If we have 'template' in metadata, move it to 'page_template'
124        if (isset($metadata['template'])) {
125            $metadata['page_template'] = $metadata['template'];
126            unset($metadata['template']);
127        }
128
129        // If we have 'examples' in metadata, move it to 'page_examples'
130        if (isset($metadata['examples'])) {
131            $metadata['page_examples'] = $metadata['examples'];
132            unset($metadata['examples']);
133        }
134
135        // If we have 'previous' in metadata, move it to 'page_previous'
136        if (isset($metadata['previous'])) {
137            $metadata['page_previous'] = $metadata['previous'];
138            unset($metadata['previous']);
139        }
140
141        $prompt = $this->loadPrompt($action, $metadata);
142
143        return $this->callAPI($action, $prompt, $metadata, $useContext);
144    }
145
146    /**
147     * Process text with a custom user prompt
148     *
149     * Sends a custom prompt to the LLM along with the provided text.
150     *
151     * @param string $text The text to process
152     * @param string $customPrompt The custom prompt to use
153     * @param array $metadata Optional metadata containing template and examples
154     * @param bool $useContext Whether to include template and examples in the context (default: true)
155     * @return string The processed text
156     */
157    public function processCustomPrompt($text, $metadata = [], $useContext = true)
158    {
159        // Store the current text for tool usage
160        $this->currentText = $text;
161
162        // Format the prompt with the text and custom prompt
163        $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text;
164
165        return $this->callAPI('custom', $prompt, $metadata, $useContext);
166    }
167
168    /**
169     * Get the list of available tools for the LLM
170     *
171     * Defines the tools that can be used by the LLM during processing.
172     *
173     * @return array List of tool definitions
174     */
175    private function getAvailableTools()
176    {
177        return [
178            [
179                'type' => 'function',
180                'function' => [
181                    'name' => 'get_document',
182                    'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.',
183                    'parameters' => [
184                        'type' => 'object',
185                        'properties' => [
186                            'id' => [
187                                'type' => 'string',
188                                'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.'
189                            ]
190                        ],
191                        'required' => ['id']
192                    ]
193                ]
194            ],
195            [
196                'type' => 'function',
197                'function' => [
198                    'name' => 'get_template',
199                    'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.',
200                    'parameters' => [
201                        'type' => 'object',
202                        'properties' => [
203                            'type' => [
204                                'type' => 'string',
205                                'description' => 'The type of the template (e.g., "mri" for MRI reports, "daily" for daily reports).',
206                                'default' => ''
207                            ]
208                        ]
209                    ]
210                ]
211            ],
212            [
213                'type' => 'function',
214                'function' => [
215                    'name' => 'get_examples',
216                    'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.',
217                    'parameters' => [
218                        'type' => 'object',
219                        'properties' => [
220                            'count' => [
221                                'type' => 'integer',
222                                'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.',
223                                'default' => 5
224                            ]
225                        ]
226                    ]
227                ]
228            ]
229        ];
230    }
231
232    /**
233     * Call the LLM API with the specified prompt
234     *
235     * Makes an HTTP POST request to the configured API endpoint with
236     * the prompt and other parameters. Handles authentication if an
237     * API key is configured.
238     *
239     * The method constructs a conversation with system and user messages,
240     * including context information from metadata when available.
241     *
242     * Complex logic includes:
243     * 1. Loading and enhancing the system prompt with metadata context
244     * 2. Building the API request with model parameters
245     * 3. Handling authentication with API key if configured
246     * 4. Making the HTTP request with proper error handling
247     * 5. Parsing and validating the API response
248     * 6. Supporting tool usage with automatic tool calling when enabled
249     * 7. Implementing context enhancement with templates, examples, and snippets
250     *
251     * The context information includes:
252     * - Template content: Used as a starting point for the response
253     * - Example pages: Full content of specified example pages
254     * - Text snippets: Relevant text examples from ChromaDB
255     *
256     * When tools are enabled, the method supports automatic tool calling:
257     * - Tools can retrieve documents, templates, and examples as needed
258     * - Tool responses are cached to avoid duplicate calls with identical parameters
259     * - Infinite loop protection prevents excessive tool calls
260     *
261     * @param string $command The command name for loading command-specific system prompts
262     * @param string $prompt The prompt to send to the LLM as user message
263     * @param array $metadata Optional metadata containing template, examples, and snippets
264     * @param bool $useContext Whether to include template and examples in the context (default: true)
265     * @return string The response content from the LLM
266     * @throws Exception If the API request fails or returns unexpected format
267     */
268
269    private function callAPI($command, $prompt, $metadata = [], $useContext = true, $useTools = false)
270    {
271        // Load system prompt which provides general instructions to the LLM
272        $systemPrompt = $this->loadSystemPrompt($command, []);
273
274        // Enhance the prompt with context information from metadata
275        // This provides the LLM with additional context about templates and examples
276        if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) {
277            $contextInfo = "\n\n<context>\n";
278
279            // Add template content if specified in metadata
280            if (!empty($metadata['template'])) {
281                $templateContent = $this->getPageContent($metadata['template']);
282                if ($templateContent !== false) {
283                    $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n";
284                }
285            }
286
287            // Add example pages content if specified in metadata
288            if (!empty($metadata['examples'])) {
289                $examplesContent = [];
290                foreach ($metadata['examples'] as $example) {
291                    $content = $this->getPageContent($example);
292                    if ($content !== false) {
293                        $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n";
294                    }
295                }
296                if (!empty($examplesContent)) {
297                    $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n";
298                }
299            }
300
301            // Add text snippets if specified in metadata
302            if (!empty($metadata['snippets'])) {
303                $snippetsContent = [];
304                foreach ($metadata['snippets'] as $index => $snippet) {
305                    // These are text snippets from ChromaDB
306                    $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n";
307                }
308                if (!empty($snippetsContent)) {
309                    $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n";
310                }
311            }
312
313            $contextInfo .= "\n</context>\n";
314
315            // Append context information to system prompt
316            $prompt = $contextInfo . "\n\n" . $prompt;
317        }
318
319        // Prepare API request data with model parameters
320        $data = [
321            'model' => $this->model,
322            'messages' => [
323                ['role' => 'system', 'content' => $systemPrompt],
324                ['role' => 'user', 'content' => $prompt]
325            ],
326            'max_tokens' => 6144,
327            'stream' => false,
328            'keep_alive' => '30m',
329            'think' => true
330        ];
331
332        // Add tools to the request only if useTools is true
333        if ($useTools) {
334            // Define available tools
335            $data['tools'] = $this->getAvailableTools();
336            $data['tool_choice'] = 'auto';
337            $data['parallel_tool_calls'] = false;
338        }
339
340        // Only add parameters if they are defined and not null
341        if ($this->temperature !== null) {
342            $data['temperature'] = $this->temperature;
343        }
344        if ($this->top_p !== null) {
345            $data['top_p'] = $this->top_p;
346        }
347        if ($this->top_k !== null) {
348            $data['top_k'] = $this->top_k;
349        }
350        if ($this->min_p !== null) {
351            $data['min_p'] = $this->min_p;
352        }
353
354        // Make an API call with tool responses
355        return $this->callAPIWithTools($data, false);
356    }
357
358    /**
359     * Handle tool calls from the LLM
360     *
361     * Processes tool calls made by the LLM and returns appropriate responses.
362     * Implements caching to avoid duplicate calls with identical parameters.
363     *
364     * @param array $toolCall The tool call data from the LLM
365     * @return array The tool response message
366     */
367    private function handleToolCall($toolCall)
368    {
369        $toolName = $toolCall['function']['name'];
370        $arguments = json_decode($toolCall['function']['arguments'], true);
371
372        // Create a cache key from the tool name and arguments
373        $cacheKey = md5($toolName . serialize($arguments));
374
375        // Check if we have a cached result for this tool call
376        if (isset($this->toolCallCache[$cacheKey])) {
377            // Return cached result and indicate it was found in cache
378            $toolResponse = $this->toolCallCache[$cacheKey];
379            // Update with current tool call ID
380            $toolResponse['tool_call_id'] = $toolCall['id'];
381            $toolResponse['cached'] = true; // Indicate this response was cached
382            return $toolResponse;
383        }
384
385        $toolResponse = [
386            'role' => 'tool',
387            'tool_call_id' => $toolCall['id'],
388            'cached' => false // Indicate this is a fresh response
389        ];
390
391        switch ($toolName) {
392            case 'get_document':
393                $documentId = $arguments['id'];
394                $content = $this->getPageContent($documentId);
395                if ($content === false) {
396                    $toolResponse['content'] = 'Document not found: ' . $documentId;
397                } else {
398                    $toolResponse['content'] = $content;
399                }
400                break;
401
402            case 'get_template':
403                // Get template content using the convenience function
404                $toolResponse['content'] = $this->getTemplateContent();
405                break;
406
407            case 'get_examples':
408                // Get examples content using the convenience function
409                $count = isset($arguments['count']) ? (int)$arguments['count'] : 5;
410                $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>';
411                break;
412
413            default:
414                $toolResponse['content'] = 'Unknown tool: ' . $toolName;
415        }
416
417        // Cache the result for future calls with the same parameters
418        $cacheEntry = $toolResponse;
419        // Remove tool_call_id and cached flag from cache as they change per call
420        unset($cacheEntry['tool_call_id']);
421        unset($cacheEntry['cached']);
422        $this->toolCallCache[$cacheKey] = $cacheEntry;
423
424        return $toolResponse;
425    }
426
427    /**
428     * Make an API call with tool responses
429     *
430     * Sends a follow-up request to the LLM with tool responses.
431     * Implements complex logic for handling tool calls with caching and loop protection.
432     *
433     * Complex logic includes:
434     * 1. Making HTTP requests with proper authentication and error handling
435     * 2. Processing tool calls from the LLM response
436     * 3. Caching tool responses to avoid duplicate calls with identical parameters
437     * 4. Tracking tool call counts to prevent infinite loops
438     * 5. Implementing loop protection with call count limits
439     * 6. Handling recursive tool calls until final content is generated
440     *
441     * Loop protection works by:
442     * - Tracking individual tool call counts (max 3 per tool)
443     * - Tracking total tool calls (max 10 total)
444     * - Disabling tools when limits are exceeded to break potential loops
445     *
446     * @param array $data The API request data including messages with tool responses
447     * @param bool $toolsCalled Whether tools have already been called (used for loop protection)
448     * @param bool $useTools Whether to process tool calls (used for loop protection)
449     * @return string The final response content
450     */
451    private function callAPIWithTools($data, $toolsCalled = false, $useTools = true)
452    {
453        // Set up HTTP headers, including authentication if API key is configured
454        $headers = [
455            'Content-Type: application/json'
456        ];
457
458        if (!empty($this->api_key)) {
459            $headers[] = 'Authorization: Bearer ' . $this->api_key;
460        }
461
462       // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops
463        if ($toolsCalled) {
464            unset($data['tools']);
465            unset($data['tool_choice']);
466        }
467
468        // Initialize and configure cURL for the API request
469        $ch = curl_init();
470        curl_setopt($ch, CURLOPT_URL, $this->api_url);
471        curl_setopt($ch, CURLOPT_POST, true);
472        curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
473        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
474        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
475        curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
476        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
477
478        // Execute the API request
479        $response = curl_exec($ch);
480        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
481        $error = curl_error($ch);
482        curl_close($ch);
483
484        // Handle cURL errors
485        if ($error) {
486            throw new Exception('API request failed: ' . $error);
487        }
488
489        // Handle HTTP errors
490        if ($httpCode !== 200) {
491            throw new Exception('API request failed with HTTP code: ' . $httpCode);
492        }
493
494        // Parse and validate the JSON response
495        $result = json_decode($response, true);
496
497        // Extract the content from the response if available
498        if (isset($result['choices'][0]['message']['content'])) {
499            $content = trim($result['choices'][0]['message']['content']);
500            // Reset tool call counts when we get final content
501            $this->toolCallCounts = [];
502            return $content;
503        }
504
505        // Handle tool calls if present
506        if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) {
507            $toolCalls = $result['choices'][0]['message']['tool_calls'];
508            // Start with original messages
509            $messages = $data['messages'];
510            // Add assistant's message with tool calls, keeping all original fields except for content (which is null)
511            $assistantMessage = [];
512            foreach ($result['choices'][0]['message'] as $key => $value) {
513                if ($key !== 'content') {
514                    $assistantMessage[$key] = $value;
515                }
516            }
517            // Add assistant's message with tool calls
518            $messages[] = $assistantMessage;
519
520            // Process each tool call and track counts to prevent infinite loops
521            foreach ($toolCalls as $toolCall) {
522                $toolName = $toolCall['function']['name'];
523                // Increment tool call count
524                if (!isset($this->toolCallCounts[$toolName])) {
525                    $this->toolCallCounts[$toolName] = 0;
526                }
527                $this->toolCallCounts[$toolName]++;
528
529                $toolResponse = $this->handleToolCall($toolCall);
530                $messages[] = $toolResponse;
531            }
532
533            // Check if any tool has been called more than 3 times
534            $toolsCalledCount = 0;
535            foreach ($this->toolCallCounts as $count) {
536                if ($count > 3) {
537                    // If any tool called more than 3 times, disable tools to break loop
538                    $toolsCalled = true;
539                    break;
540                }
541                $toolsCalledCount += $count;
542            }
543
544            // If total tool calls exceed 10, also disable tools
545            if ($toolsCalledCount > 10) {
546                $toolsCalled = true;
547            }
548
549            // Make another API call with tool responses
550            $data['messages'] = $messages;
551            return $this->callAPIWithTools($data, $toolsCalled, $useTools);
552        }
553
554        // Throw exception for unexpected response format
555        throw new Exception('Unexpected API response format');
556    }
557
558    /**
559     * Load a prompt template from a DokuWiki page and replace placeholders
560     *
561     * Loads prompt templates from DokuWiki pages with IDs in the format
562     * dokullm:profiles:PROFILE:PROMPT_NAME
563     *
564     * The method implements a profile fallback mechanism:
565     * 1. First tries to load the prompt from the configured profile
566     * 2. If not found, falls back to default prompts
567     * 3. Throws an exception if neither is available
568     *
569     * After loading the prompt, it scans for placeholders and automatically
570     * adds missing ones with appropriate values before replacing all placeholders.
571     *
572     * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite')
573     * @param array $variables Associative array of placeholder => value pairs
574     * @return string The processed prompt with placeholders replaced
575     * @throws Exception If the prompt page cannot be loaded from any profile
576     */
577    private function loadPrompt($promptName, $variables = [])
578    {
579        // Default to 'default' if profile is not set
580        if (empty($this->profile)) {
581            $this->profile = 'default';
582        }
583
584        // Construct the page ID for the prompt in the configured profile
585        $promptPageId = 'dokullm:profiles:' . $this->profile . ':' . $promptName;
586
587        // Try to get the content of the prompt page in the configured profile
588        $prompt = $this->getPageContent($promptPageId);
589
590        // If the profile-specific prompt doesn't exist, try default as fallback
591        if ($prompt === false && $this->profile !== 'default') {
592            $promptPageId = 'dokullm:profile:default:' . $promptName;
593            $prompt = $this->getPageContent($promptPageId);
594        }
595
596        // If still no prompt found, throw an exception
597        if ($prompt === false) {
598            throw new Exception('Prompt page not found: ' . $promptPageId);
599        }
600
601        // Find placeholders in the prompt
602        $placeholders = $this->findPlaceholders($prompt);
603
604        // Add missing placeholders with appropriate values
605        foreach ($placeholders as $placeholder) {
606            // Skip if already provided in variables
607            if (isset($variables[$placeholder])) {
608                continue;
609            }
610
611            // Add appropriate values for specific placeholders
612            switch ($placeholder) {
613                case 'template':
614                    // If we have a page_template in variables, use it
615                    $variables[$placeholder] = $this->getTemplateContent($variables['page_template']);
616                    break;
617
618                case 'snippets':
619                    $variables[$placeholder] = $this->enableChromaDB ? $this->getSnippets(10) : '( no examples )';
620                    break;
621
622                case 'examples':
623                    // If we have example page IDs in metadata, add examples content
624                    $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']);
625                    break;
626
627                case 'previous':
628                    // If we have a previous report page ID in metadata, add previous content
629                    $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']);
630
631                    // Add current and previous dates to metadata
632                    $variables['current_date'] = $this->getPageDate($this->pageId);
633                    $variables['previous_date'] = !empty($variables['page_previous']) ?
634                                                $this->getPageDate($variables['page_previous']) :
635                                                '';
636                    break;
637
638                default:
639                    // For other placeholders, leave them empty or set a default value
640                    $variables[$placeholder] = '';
641                    break;
642            }
643        }
644
645        // Replace placeholders with actual values
646        // Placeholders are in the format {placeholder_name}
647        foreach ($variables as $placeholder => $value) {
648            $prompt = str_replace('{' . $placeholder . '}', $value, $prompt);
649        }
650
651        // Return the processed prompt
652        return $prompt;
653    }
654
655    /**
656     * Load system prompt with optional command-specific appendage
657     *
658     * Loads the main system prompt and appends any command-specific system prompt
659     * if available.
660     *
661     * @param string $action The action/command name
662     * @param array $variables Associative array of placeholder => value pairs
663     * @return string The combined system prompt
664     */
665    private function loadSystemPrompt($action, $variables = [])
666    {
667        // Load system prompt which provides general instructions to the LLM
668        $systemPrompt = $this->loadPrompt('system', $variables);
669
670        // Check if there's a command-specific system prompt appendage
671        if (!empty($action)) {
672            try {
673                $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables);
674                if ($commandSystemPrompt !== false) {
675                    $systemPrompt .= "\n" . $commandSystemPrompt;
676                }
677            } catch (Exception $e) {
678                // Ignore exceptions when loading command-specific system prompt
679                // This allows the main system prompt to still be used
680            }
681        }
682
683        return $systemPrompt;
684    }
685
686    /**
687     * Get the content of a DokuWiki page
688     *
689     * Retrieves the raw content of a DokuWiki page by its ID.
690     * Used for loading template and example page content for context.
691     *
692     * @param string $pageId The page ID to retrieve
693     * @return string|false The page content or false if not found/readable
694     */
695    public function getPageContent($pageId)
696    {
697        // Convert page ID to file path
698        $pageFile = wikiFN($pageId);
699
700        // Check if file exists and is readable
701        if (file_exists($pageFile) && is_readable($pageFile)) {
702            return file_get_contents($pageFile);
703        }
704
705        return false;
706    }
707
708    /**
709     * Extract date from page ID or file timestamp
710     *
711     * Attempts to extract a date in YYmmdd format from the page ID.
712     * If not found, uses the file's last modification timestamp.
713     *
714     * @param string $pageId Optional page ID to extract date from (defaults to current page)
715     * @return string Formatted date string (YYYY-MM-DD)
716     */
717    private function getPageDate($pageId = null)
718    {
719        // Use provided page ID or current page ID
720        $targetPageId = $pageId ?: $this->pageId;
721
722        // Try to extract date from page ID (looking for YYmmdd pattern)
723        if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) {
724            // Convert YYmmdd to YYYY-MM-DD
725            $year = $matches[1];
726            $month = $matches[2];
727            $day = $matches[3];
728
729            // Assume 20xx for years 00-69, 19xx for years 70-99
730            $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year;
731
732            return $fullYear . '-' . $month . '-' . $day;
733        }
734
735        // Fallback to file timestamp
736        $pageFile = wikiFN($targetPageId);
737        if (file_exists($pageFile)) {
738            $timestamp = filemtime($pageFile);
739            return date('Y-m-d', $timestamp);
740        }
741
742        // Return empty string if no date can be determined
743        return '';
744    }
745
746    /**
747     * Get current text
748     *
749     * Retrieves the current text stored from the process function.
750     *
751     * @return string The current text
752     */
753    private function getCurrentText()
754    {
755        return $this->currentText;
756    }
757
758    /**
759     * Scan text for placeholders
760     *
761     * Finds all placeholders in the format {placeholder_name} in the provided text
762     * and returns an array of unique placeholder names.
763     *
764     * @param string $text The text to scan for placeholders
765     * @return array List of unique placeholder names found in the text
766     */
767    public function findPlaceholders($text)
768    {
769        $placeholders = [];
770        $pattern = '/\{([^}]+)\}/';
771
772        if (preg_match_all($pattern, $text, $matches)) {
773            // Get unique placeholder names
774            $placeholders = array_unique($matches[1]);
775        }
776
777        return $placeholders;
778    }
779
780    /**
781     * Get template content for the current text
782     *
783     * Convenience function to retrieve template content. If a pageId is provided,
784     * retrieves content directly from that page. Otherwise, queries ChromaDB for
785     * a relevant template based on the current text.
786     *
787     * @param string|null $pageId Optional page ID to retrieve template from directly
788     * @return string The template content or empty string if not found
789     */
790    private function getTemplateContent($pageId = null)
791    {
792        // If pageId is provided, use it directly
793        if ($pageId !== null) {
794            $templateContent = $this->getPageContent($pageId);
795            if ($templateContent !== false) {
796                return $templateContent;
797            }
798        }
799
800        // If ChromaDB is disabled, return empty template
801        if (!$this->enableChromaDB) {
802            return '( no template )';
803        }
804
805        // Otherwise, get template suggestion for the current text
806        $pageId = $this->queryChromaDBTemplate($this->getCurrentText());
807        if (!empty($pageId)) {
808            $templateContent = $this->getPageContent($pageId[0]);
809            if ($templateContent !== false) {
810                return $templateContent;
811            }
812        }
813        return '( no template )';
814    }
815
816    /**
817     * Get snippets content for the current text
818     *
819     * Convenience function to retrieve relevant snippets for the current text.
820     * Queries ChromaDB for relevant snippets and returns them formatted.
821     *
822     * @param int $count Number of snippets to retrieve (default: 10)
823     * @return string Formatted snippets content or empty string if not found
824     */
825    private function getSnippets($count = 10)
826    {
827        // If ChromaDB is disabled, return empty snippets
828        if (!$this->enableChromaDB) {
829            return '( no examples )';
830        }
831
832        // Get example snippets for the current text
833        $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count);
834        if (!empty($snippets)) {
835            $formattedSnippets = [];
836            foreach ($snippets as $index => $snippet) {
837                $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>';
838            }
839            return implode("\n", $formattedSnippets);
840        }
841        return '( no examples )';
842    }
843
844    /**
845     * Get examples content from example page IDs
846     *
847     * Convenience function to retrieve content from example pages.
848     * Returns the content of each page packed in XML elements.
849     *
850     * @param array $exampleIds List of example page IDs
851     * @return string Formatted examples content or empty string if not found
852     */
853    private function getExamplesContent($exampleIds = [])
854    {
855        if (empty($exampleIds) || !is_array($exampleIds)) {
856            return '( no examples )';
857        }
858
859        $examplesContent = [];
860        foreach ($exampleIds as $index => $exampleId) {
861            $content = $this->getPageContent($exampleId);
862            if ($content !== false) {
863                $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>';
864            }
865        }
866
867        return implode("\n", $examplesContent);
868    }
869
870    /**
871     * Get previous report content from previous page ID
872     *
873     * Convenience function to retrieve content from a previous report page.
874     * Returns the content of the previous page or a default message if not found.
875     *
876     * @param string $previousId Previous page ID
877     * @return string Previous report content or default message if not found
878     */
879    private function getPreviousContent($previousId = '')
880    {
881        if (empty($previousId)) {
882            return '( no previous report )';
883        }
884
885        $content = $this->getPageContent($previousId);
886        if ($content !== false) {
887            return $content;
888        }
889
890        return '( previous report not found )';
891    }
892
893    /**
894     * Get ChromaDB client with configuration
895     *
896     * Returns the ChromaDB client and collection name.
897     * If a client was passed in the constructor, use it. Otherwise, this method
898     * should not be called as it depends on getConf() which is not available.
899     *
900     * @return array Array containing the ChromaDB client and collection name
901     * @throws Exception If no ChromaDB client is available
902     */
903    private function getChromaDBClient()
904    {
905        // If we have a ChromaDB client passed in constructor, use it
906        if ($this->chromaClient !== null) {
907            // Get the collection name based on the page ID
908	    // FIXME
909            $chromaCollection = 'reports';
910            $pageId = $pageId;
911
912            if (!empty($this->pageId)) {
913                // Split the page ID by ':' and take the first part as collection name
914                $parts = explode(':', $this->pageId);
915                if (isset($parts[0]) && !empty($parts[0])) {
916                    // If the first part is 'playground', use the default collection
917                    // Otherwise, use the first part as the collection name
918                    if ($parts[0] === 'playground') {
919                        $chromaCollection = '';
920                    } else {
921                        $chromaCollection = $parts[0];
922                    }
923                }
924            }
925
926            return [$this->chromaClient, $chromaCollection];
927        }
928
929        // If we don't have a ChromaDB client, we can't create one here
930        // because getConf() is not available in this context
931        throw new Exception('No ChromaDB client available');
932    }
933
934    /**
935     * Query ChromaDB for relevant documents
936     *
937     * Generates embeddings for the input text and queries ChromaDB for similar documents.
938     * Extracts modality from the current page ID to use as the collection name.
939     *
940     * @param string $text The text to find similar documents for
941     * @param int $limit Maximum number of documents to retrieve (default: 5)
942     * @param array|null $where Optional filter conditions for metadata
943     * @return array List of document IDs
944     */
945    private function queryChromaDB($text, $limit = 5, $where = null)
946    {
947        try {
948            // Get ChromaDB client and collection name
949            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
950            // Query for similar documents
951            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
952
953            // Extract document IDs from results
954            $documentIds = [];
955            if (isset($results['ids'][0]) && is_array($results['ids'][0])) {
956                foreach ($results['ids'][0] as $id) {
957                    // Use the ChromaDB ID directly without conversion
958                    $documentIds[] = $id;
959                }
960            }
961
962            return $documentIds;
963        } catch (Exception $e) {
964            // Log error but don't fail the operation
965            error_log('ChromaDB query failed: ' . $e->getMessage());
966            return [];
967        }
968    }
969
970    /**
971     * Query ChromaDB for relevant documents and return text snippets
972     *
973     * Generates embeddings for the input text and queries ChromaDB for similar documents.
974     * Returns the actual text snippets instead of document IDs.
975     *
976     * @param string $text The text to find similar documents for
977     * @param int $limit Maximum number of documents to retrieve (default: 10)
978     * @param array|null $where Optional filter conditions for metadata
979     * @return array List of text snippets
980     */
981    private function queryChromaDBSnippets($text, $limit = 10, $where = null)
982    {
983        try {
984            // Get ChromaDB client and collection name
985            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
986            // Query for similar documents
987            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
988
989            // Extract document texts from results
990            $snippets = [];
991            if (isset($results['documents'][0]) && is_array($results['documents'][0])) {
992                foreach ($results['documents'][0] as $document) {
993                    $snippets[] = $document;
994                }
995            }
996
997            return $snippets;
998        } catch (Exception $e) {
999            // Log error but don't fail the operation
1000            error_log('ChromaDB query failed: ' . $e->getMessage());
1001            return [];
1002        }
1003    }
1004
1005    /**
1006     * Query ChromaDB for a template document
1007     *
1008     * Generates embeddings for the input text and queries ChromaDB for a template document
1009     * by filtering with metadata 'template=true'.
1010     *
1011     * @param string $text The text to find a template for
1012     * @return array List of template document IDs (maximum 1)
1013     */
1014    public function queryChromaDBTemplate($text)
1015    {
1016        $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']);
1017
1018        // Remove chunk number (e.g., "@2") from the ID to get the base document ID
1019        if (!empty($templateIds)) {
1020            $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]);
1021        }
1022
1023        return $templateIds;
1024    }
1025
1026}
1027