xref: /plugin/dokullm/LlmClient.php (revision 40986135d739ccf5c15ff2ed7d324236e1d1e2f5)
1<?php
2namespace dokuwiki\plugin\dokullm;
3
4use Exception;
5
6/**
7 * LLM Client for the dokullm plugin
8 *
9 * This class provides methods to interact with an LLM API for various
10 * text processing tasks such as completion, rewriting, grammar correction,
11 * summarization, conclusion creation, text analysis, and custom prompts.
12 *
13 * The client handles:
14 * - API configuration and authentication
15 * - Prompt template loading and processing
16 * - Context-aware requests with metadata
17 * - DokuWiki page content retrieval
18 */
19
20// must be run within Dokuwiki
21if (!defined('DOKU_INC')) {
22    die();
23}
24
25/**
26 * LLM Client class for handling API communications
27 *
28 * Manages configuration settings and provides methods for various
29 * text processing operations through an LLM API.
30 * Implements caching for tool calls to avoid duplicate processing.
31 */
32class LlmClient
33{
34    /** @var string The API endpoint URL */
35    private $api_url;
36
37    /** @var array Cache for tool call results */
38    private $toolCallCache = [];
39
40    /** @var string Current text for tool usage */
41    private $currentText = '';
42
43    /** @var array Track tool call counts to prevent infinite loops */
44    private $toolCallCounts = [];
45
46    /** @var string The API authentication key */
47    private $api_key;
48
49    /** @var string The model identifier to use */
50    private $model;
51
52    /** @var int The request timeout in seconds */
53    private $timeout;
54
55    /** @var float The temperature setting for response randomness */
56    private $temperature;
57
58    /** @var float The top-p setting for nucleus sampling */
59    private $top_p;
60
61    /** @var int The top-k setting for token selection */
62    private $top_k;
63
64    /** @var float The min-p setting for minimum probability threshold */
65    private $min_p;
66
67    /** @var bool Whether to enable thinking in LLM responses */
68    private $think;
69
70    /** @var object|null ChromaDB client instance */
71    private $chromaClient;
72
73    /** @var string|null Page ID */
74    private $pageId;
75
76    /**
77     * Initialize the LLM client with configuration settings
78     *
79     * Retrieves configuration values from DokuWiki's configuration system
80     * for API URL, key, model, timeout, and LLM sampling parameters.
81     *
82     * Configuration values:
83     * - api_url: The LLM API endpoint URL
84     * - api_key: Authentication key for the API (optional)
85     * - model: The model identifier to use for requests
86     * - timeout: Request timeout in seconds
87     * - profile: Profile for prompt templates
88     * - temperature: Temperature setting for response randomness (0.0-1.0)
89     * - top_p: Top-p (nucleus sampling) setting (0.0-1.0)
90     * - top_k: Top-k setting (integer >= 1)
91     * - min_p: Minimum probability threshold (0.0-1.0)
92     * - think: Whether to enable thinking in LLM responses (boolean)
93     * - chromaClient: ChromaDB client instance (optional)
94     * - pageId: Page ID (optional)
95     */
96    public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $profile = null, $chromaClient = null, $pageId = null)
97    {
98        $this->api_url = $api_url;
99        $this->api_key = $api_key;
100        $this->model = $model;
101        $this->timeout = $timeout;
102        $this->temperature = $temperature;
103        $this->top_p = $top_p;
104        $this->top_k = $top_k;
105        $this->min_p = $min_p;
106        $this->think = $think;
107        $this->profile = $profile;
108        $this->chromaClient = $chromaClient;
109        $this->pageId = $pageId;
110    }
111
112
113
114    public function process($action, $text, $metadata = [], $useContext = true)
115    {
116        // Store the current text for tool usage
117        $this->currentText = $text;
118
119        // Add text, think and action to metadata
120        $metadata['text'] = $text;
121        $metadata['think'] = $this->think ? '/think' : '/no_think';
122        $metadata['action'] = $action;
123
124        // If we have 'template' in metadata, move it to 'page_template'
125        if (isset($metadata['template'])) {
126            $metadata['page_template'] = $metadata['template'];
127            unset($metadata['template']);
128        }
129
130        // If we have 'examples' in metadata, move it to 'page_examples'
131        if (isset($metadata['examples'])) {
132            $metadata['page_examples'] = $metadata['examples'];
133            unset($metadata['examples']);
134        }
135
136        // If we have 'previous' in metadata, move it to 'page_previous'
137        if (isset($metadata['previous'])) {
138            $metadata['page_previous'] = $metadata['previous'];
139            unset($metadata['previous']);
140        }
141
142        $prompt = $this->loadPrompt($action, $metadata);
143
144        return $this->callAPI($action, $prompt, $metadata, $useContext);
145    }
146
147    /**
148     * Process text with a custom user prompt
149     *
150     * Sends a custom prompt to the LLM along with the provided text.
151     *
152     * @param string $text The text to process
153     * @param string $customPrompt The custom prompt to use
154     * @param array $metadata Optional metadata containing template and examples
155     * @param bool $useContext Whether to include template and examples in the context (default: true)
156     * @return string The processed text
157     */
158
159    /**
160     * Get the list of available tools for the LLM
161     *
162     * Defines the tools that can be used by the LLM during processing.
163     *
164     * @return array List of tool definitions
165     */
166    private function getAvailableTools()
167    {
168        return [
169            [
170                'type' => 'function',
171                'function' => [
172                    'name' => 'get_document',
173                    'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.',
174                    'parameters' => [
175                        'type' => 'object',
176                        'properties' => [
177                            'id' => [
178                                'type' => 'string',
179                                'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.'
180                            ]
181                        ],
182                        'required' => ['id']
183                    ]
184                ]
185            ],
186            [
187                'type' => 'function',
188                'function' => [
189                    'name' => 'get_template',
190                    'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.',
191                    'parameters' => [
192                        'type' => 'object',
193                        'properties' => [
194                            'type' => [
195                                'type' => 'string',
196                                'description' => 'The type of the template (e.g., "mri" for MRI reports, "daily" for daily reports).',
197                                'default' => ''
198                            ]
199                        ]
200                    ]
201                ]
202            ],
203            [
204                'type' => 'function',
205                'function' => [
206                    'name' => 'get_examples',
207                    'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.',
208                    'parameters' => [
209                        'type' => 'object',
210                        'properties' => [
211                            'count' => [
212                                'type' => 'integer',
213                                'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.',
214                                'default' => 5
215                            ]
216                        ]
217                    ]
218                ]
219            ]
220        ];
221    }
222
223    /**
224     * Call the LLM API with the specified prompt
225     *
226     * Makes an HTTP POST request to the configured API endpoint with
227     * the prompt and other parameters. Handles authentication if an
228     * API key is configured.
229     *
230     * The method constructs a conversation with system and user messages,
231     * including context information from metadata when available.
232     *
233     * Complex logic includes:
234     * 1. Loading and enhancing the system prompt with metadata context
235     * 2. Building the API request with model parameters
236     * 3. Handling authentication with API key if configured
237     * 4. Making the HTTP request with proper error handling
238     * 5. Parsing and validating the API response
239     * 6. Supporting tool usage with automatic tool calling when enabled
240     * 7. Implementing context enhancement with templates, examples, and snippets
241     *
242     * The context information includes:
243     * - Template content: Used as a starting point for the response
244     * - Example pages: Full content of specified example pages
245     * - Text snippets: Relevant text examples from ChromaDB
246     *
247     * When tools are enabled, the method supports automatic tool calling:
248     * - Tools can retrieve documents, templates, and examples as needed
249     * - Tool responses are cached to avoid duplicate calls with identical parameters
250     * - Infinite loop protection prevents excessive tool calls
251     *
252     * @param string $command The command name for loading command-specific system prompts
253     * @param string $prompt The prompt to send to the LLM as user message
254     * @param array $metadata Optional metadata containing template, examples, and snippets
255     * @param bool $useContext Whether to include template and examples in the context (default: true)
256     * @return string The response content from the LLM
257     * @throws Exception If the API request fails or returns unexpected format
258     */
259
260    private function callAPI($command, $prompt, $metadata = [], $useContext = true, $useTools = false)
261    {
262        // Load system prompt which provides general instructions to the LLM
263        $systemPrompt = $this->loadSystemPrompt($command, []);
264
265        // Enhance the prompt with context information from metadata
266        // This provides the LLM with additional context about templates and examples
267        if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) {
268            $contextInfo = "\n\n<context>\n";
269
270            // Add template content if specified in metadata
271            if (!empty($metadata['template'])) {
272                $templateContent = $this->getPageContent($metadata['template']);
273                if ($templateContent !== false) {
274                    $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n";
275                }
276            }
277
278            // Add example pages content if specified in metadata
279            if (!empty($metadata['examples'])) {
280                $examplesContent = [];
281                foreach ($metadata['examples'] as $example) {
282                    $content = $this->getPageContent($example);
283                    if ($content !== false) {
284                        $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n";
285                    }
286                }
287                if (!empty($examplesContent)) {
288                    $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n";
289                }
290            }
291
292            // Add text snippets if specified in metadata
293            if (!empty($metadata['snippets'])) {
294                $snippetsContent = [];
295                foreach ($metadata['snippets'] as $index => $snippet) {
296                    // These are text snippets from ChromaDB
297                    $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n";
298                }
299                if (!empty($snippetsContent)) {
300                    $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n";
301                }
302            }
303
304            $contextInfo .= "\n</context>\n";
305
306            // Append context information to system prompt
307            $prompt = $contextInfo . "\n\n" . $prompt;
308        }
309
310        // Prepare API request data with model parameters
311        $data = [
312            'model' => $this->model,
313            'messages' => [
314                ['role' => 'system', 'content' => $systemPrompt],
315                ['role' => 'user', 'content' => $prompt]
316            ],
317            'max_tokens' => 6144,
318            'stream' => false,
319            'keep_alive' => '30m',
320            'think' => true
321        ];
322
323        // Add tools to the request only if useTools is true
324        if ($useTools) {
325            // Define available tools
326            $data['tools'] = $this->getAvailableTools();
327            $data['tool_choice'] = 'auto';
328            $data['parallel_tool_calls'] = false;
329        }
330
331        // Only add parameters if they are defined and not null
332        if ($this->temperature !== null) {
333            $data['temperature'] = $this->temperature;
334        }
335        if ($this->top_p !== null) {
336            $data['top_p'] = $this->top_p;
337        }
338        if ($this->top_k !== null) {
339            $data['top_k'] = $this->top_k;
340        }
341        if ($this->min_p !== null) {
342            $data['min_p'] = $this->min_p;
343        }
344
345        // Make an API call with tool responses
346        return $this->callAPIWithTools($data, false);
347    }
348
349    /**
350     * Handle tool calls from the LLM
351     *
352     * Processes tool calls made by the LLM and returns appropriate responses.
353     * Implements caching to avoid duplicate calls with identical parameters.
354     *
355     * @param array $toolCall The tool call data from the LLM
356     * @return array The tool response message
357     */
358    private function handleToolCall($toolCall)
359    {
360        $toolName = $toolCall['function']['name'];
361        $arguments = json_decode($toolCall['function']['arguments'], true);
362
363        // Create a cache key from the tool name and arguments
364        $cacheKey = md5($toolName . serialize($arguments));
365
366        // Check if we have a cached result for this tool call
367        if (isset($this->toolCallCache[$cacheKey])) {
368            // Return cached result and indicate it was found in cache
369            $toolResponse = $this->toolCallCache[$cacheKey];
370            // Update with current tool call ID
371            $toolResponse['tool_call_id'] = $toolCall['id'];
372            $toolResponse['cached'] = true; // Indicate this response was cached
373            return $toolResponse;
374        }
375
376        $toolResponse = [
377            'role' => 'tool',
378            'tool_call_id' => $toolCall['id'],
379            'cached' => false // Indicate this is a fresh response
380        ];
381
382        switch ($toolName) {
383            case 'get_document':
384                $documentId = $arguments['id'];
385                $content = $this->getPageContent($documentId);
386                if ($content === false) {
387                    $toolResponse['content'] = 'Document not found: ' . $documentId;
388                } else {
389                    $toolResponse['content'] = $content;
390                }
391                break;
392
393            case 'get_template':
394                // Get template content using the convenience function
395                $toolResponse['content'] = $this->getTemplateContent();
396                break;
397
398            case 'get_examples':
399                // Get examples content using the convenience function
400                $count = isset($arguments['count']) ? (int)$arguments['count'] : 5;
401                $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>';
402                break;
403
404            default:
405                $toolResponse['content'] = 'Unknown tool: ' . $toolName;
406        }
407
408        // Cache the result for future calls with the same parameters
409        $cacheEntry = $toolResponse;
410        // Remove tool_call_id and cached flag from cache as they change per call
411        unset($cacheEntry['tool_call_id']);
412        unset($cacheEntry['cached']);
413        $this->toolCallCache[$cacheKey] = $cacheEntry;
414
415        return $toolResponse;
416    }
417
418    /**
419     * Make an API call with tool responses
420     *
421     * Sends a follow-up request to the LLM with tool responses.
422     * Implements complex logic for handling tool calls with caching and loop protection.
423     *
424     * Complex logic includes:
425     * 1. Making HTTP requests with proper authentication and error handling
426     * 2. Processing tool calls from the LLM response
427     * 3. Caching tool responses to avoid duplicate calls with identical parameters
428     * 4. Tracking tool call counts to prevent infinite loops
429     * 5. Implementing loop protection with call count limits
430     * 6. Handling recursive tool calls until final content is generated
431     *
432     * Loop protection works by:
433     * - Tracking individual tool call counts (max 3 per tool)
434     * - Tracking total tool calls (max 10 total)
435     * - Disabling tools when limits are exceeded to break potential loops
436     *
437     * @param array $data The API request data including messages with tool responses
438     * @param bool $toolsCalled Whether tools have already been called (used for loop protection)
439     * @param bool $useTools Whether to process tool calls (used for loop protection)
440     * @return string The final response content
441     */
442    private function callAPIWithTools($data, $toolsCalled = false, $useTools = true)
443    {
444        // Set up HTTP headers, including authentication if API key is configured
445        $headers = [
446            'Content-Type: application/json'
447        ];
448
449        if (!empty($this->api_key)) {
450            $headers[] = 'Authorization: Bearer ' . $this->api_key;
451        }
452
453       // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops
454        if ($toolsCalled) {
455            unset($data['tools']);
456            unset($data['tool_choice']);
457        }
458
459        // Initialize and configure cURL for the API request
460        $ch = curl_init();
461        curl_setopt($ch, CURLOPT_URL, $this->api_url);
462        curl_setopt($ch, CURLOPT_POST, true);
463        curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
464        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
465        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
466        curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
467        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
468
469        // Execute the API request
470        $response = curl_exec($ch);
471        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
472        $error = curl_error($ch);
473        curl_close($ch);
474
475        // Handle cURL errors
476        if ($error) {
477            throw new Exception('API request failed: ' . $error);
478        }
479
480        // Handle HTTP errors
481        if ($httpCode !== 200) {
482            throw new Exception('API request failed with HTTP code: ' . $httpCode);
483        }
484
485        // Parse and validate the JSON response
486        $result = json_decode($response, true);
487
488        // Extract the content from the response if available
489        if (isset($result['choices'][0]['message']['content'])) {
490            $content = trim($result['choices'][0]['message']['content']);
491            // Reset tool call counts when we get final content
492            $this->toolCallCounts = [];
493            return $content;
494        }
495
496        // Handle tool calls if present
497        if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) {
498            $toolCalls = $result['choices'][0]['message']['tool_calls'];
499            // Start with original messages
500            $messages = $data['messages'];
501            // Add assistant's message with tool calls, keeping all original fields except for content (which is null)
502            $assistantMessage = [];
503            foreach ($result['choices'][0]['message'] as $key => $value) {
504                if ($key !== 'content') {
505                    $assistantMessage[$key] = $value;
506                }
507            }
508            // Add assistant's message with tool calls
509            $messages[] = $assistantMessage;
510
511            // Process each tool call and track counts to prevent infinite loops
512            foreach ($toolCalls as $toolCall) {
513                $toolName = $toolCall['function']['name'];
514                // Increment tool call count
515                if (!isset($this->toolCallCounts[$toolName])) {
516                    $this->toolCallCounts[$toolName] = 0;
517                }
518                $this->toolCallCounts[$toolName]++;
519
520                $toolResponse = $this->handleToolCall($toolCall);
521                $messages[] = $toolResponse;
522            }
523
524            // Check if any tool has been called more than 3 times
525            $toolsCalledCount = 0;
526            foreach ($this->toolCallCounts as $count) {
527                if ($count > 3) {
528                    // If any tool called more than 3 times, disable tools to break loop
529                    $toolsCalled = true;
530                    break;
531                }
532                $toolsCalledCount += $count;
533            }
534
535            // If total tool calls exceed 10, also disable tools
536            if ($toolsCalledCount > 10) {
537                $toolsCalled = true;
538            }
539
540            // Make another API call with tool responses
541            $data['messages'] = $messages;
542            return $this->callAPIWithTools($data, $toolsCalled, $useTools);
543        }
544
545        // Throw exception for unexpected response format
546        throw new Exception('Unexpected API response format');
547    }
548
549    /**
550     * Load a prompt template from a DokuWiki page and replace placeholders
551     *
552     * Loads prompt templates from DokuWiki pages with IDs in the format
553     * dokullm:profiles:PROFILE:PROMPT_NAME
554     *
555     * The method implements a profile fallback mechanism:
556     * 1. First tries to load the prompt from the configured profile
557     * 2. If not found, falls back to default prompts
558     * 3. Throws an exception if neither is available
559     *
560     * After loading the prompt, it scans for placeholders and automatically
561     * adds missing ones with appropriate values before replacing all placeholders.
562     *
563     * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite')
564     * @param array $variables Associative array of placeholder => value pairs
565     * @return string The processed prompt with placeholders replaced
566     * @throws Exception If the prompt page cannot be loaded from any profile
567     */
568    private function loadPrompt($promptName, $variables = [])
569    {
570        // Default to 'default' if profile is not set
571        if (empty($this->profile)) {
572            $this->profile = 'default';
573        }
574
575        // Construct the page ID for the prompt in the configured profile
576        $promptPageId = 'dokullm:profiles:' . $this->profile . ':' . $promptName;
577
578        // Try to get the content of the prompt page in the configured profile
579        $prompt = $this->getPageContent($promptPageId);
580
581        // If the profile-specific prompt doesn't exist, try default as fallback
582        if ($prompt === false && $this->profile !== 'default') {
583            $promptPageId = 'dokullm:profile:default:' . $promptName;
584            $prompt = $this->getPageContent($promptPageId);
585        }
586
587        // If still no prompt found, throw an exception
588        if ($prompt === false) {
589            throw new Exception('Prompt page not found: ' . $promptPageId);
590        }
591
592        // Find placeholders in the prompt
593        $placeholders = $this->findPlaceholders($prompt);
594
595        // Add missing placeholders with appropriate values
596        foreach ($placeholders as $placeholder) {
597            // Skip if already provided in variables
598            if (isset($variables[$placeholder])) {
599                continue;
600            }
601
602            // Add appropriate values for specific placeholders
603            switch ($placeholder) {
604                case 'template':
605                    // If we have a page_template in variables, use it
606                    $variables[$placeholder] = $this->getTemplateContent($variables['page_template']);
607                    break;
608
609                case 'snippets':
610                    $variables[$placeholder] = $this->chromaClient !== null ? $this->getSnippets(10) : '( no examples )';
611                    break;
612
613                case 'examples':
614                    // If we have example page IDs in metadata, add examples content
615                    $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']);
616                    break;
617
618                case 'previous':
619                    // If we have a previous report page ID in metadata, add previous content
620                    $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']);
621
622                    // Add current and previous dates to metadata
623                    $variables['current_date'] = $this->getPageDate($this->pageId);
624                    $variables['previous_date'] = !empty($variables['page_previous']) ?
625                                                $this->getPageDate($variables['page_previous']) :
626                                                '';
627                    break;
628
629                case 'prompt':
630                    // Add the custom prompt value
631                    $variables[$placeholder] = isset($variables['prompt']) ? $variables['prompt'] : '';
632                    break;
633
634                default:
635                    // For other placeholders, leave them empty or set a default value
636                    $variables[$placeholder] = '';
637                    break;
638            }
639        }
640
641        // Replace placeholders with actual values
642        // Placeholders are in the format {placeholder_name}
643        foreach ($variables as $placeholder => $value) {
644            $prompt = str_replace('{' . $placeholder . '}', $value, $prompt);
645        }
646
647        // Return the processed prompt
648        return $prompt;
649    }
650
651    /**
652     * Load system prompt with optional command-specific appendage
653     *
654     * Loads the main system prompt and appends any command-specific system prompt
655     * if available.
656     *
657     * @param string $action The action/command name
658     * @param array $variables Associative array of placeholder => value pairs
659     * @return string The combined system prompt
660     */
661    private function loadSystemPrompt($action, $variables = [])
662    {
663        // Load system prompt which provides general instructions to the LLM
664        $systemPrompt = $this->loadPrompt('system', $variables);
665
666        // Check if there's a command-specific system prompt appendage
667        if (!empty($action)) {
668            try {
669                $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables);
670                if ($commandSystemPrompt !== false) {
671                    $systemPrompt .= "\n" . $commandSystemPrompt;
672                }
673            } catch (Exception $e) {
674                // Ignore exceptions when loading command-specific system prompt
675                // This allows the main system prompt to still be used
676            }
677        }
678
679        return $systemPrompt;
680    }
681
682    /**
683     * Get the content of a DokuWiki page
684     *
685     * Retrieves the raw content of a DokuWiki page by its ID.
686     * Used for loading template and example page content for context.
687     *
688     * @param string $pageId The page ID to retrieve
689     * @return string|false The page content or false if not found/readable
690     * @throws Exception If access is denied
691     */
692    public function getPageContent($pageId)
693    {
694        // Clean the ID and check ACL
695        $cleanId = cleanID($pageId);
696        if (auth_quickaclcheck($cleanId) < AUTH_READ) {
697            throw new Exception('You are not allowed to read this file');
698        }
699
700        // Convert page ID to file path
701        $pageFile = wikiFN($cleanId);
702
703        // Check if file exists and is readable
704        if (file_exists($pageFile) && is_readable($pageFile)) {
705            return file_get_contents($pageFile);
706        }
707
708        return false;
709    }
710
711    /**
712     * Extract date from page ID or file timestamp
713     *
714     * Attempts to extract a date in YYmmdd format from the page ID.
715     * If not found, uses the file's last modification timestamp.
716     *
717     * @param string $pageId Optional page ID to extract date from (defaults to current page)
718     * @return string Formatted date string (YYYY-MM-DD)
719     */
720    private function getPageDate($pageId = null)
721    {
722        // Use provided page ID or current page ID
723        $targetPageId = $pageId ?: $this->pageId;
724
725        // Try to extract date from page ID (looking for YYmmdd pattern)
726        if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) {
727            // Convert YYmmdd to YYYY-MM-DD
728            $year = $matches[1];
729            $month = $matches[2];
730            $day = $matches[3];
731
732            // Assume 20xx for years 00-69, 19xx for years 70-99
733            $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year;
734
735            return $fullYear . '-' . $month . '-' . $day;
736        }
737
738        // Fallback to file timestamp
739        $pageFile = wikiFN($targetPageId);
740        if (file_exists($pageFile)) {
741            $timestamp = filemtime($pageFile);
742            return date('Y-m-d', $timestamp);
743        }
744
745        // Return empty string if no date can be determined
746        return '';
747    }
748
749    /**
750     * Get current text
751     *
752     * Retrieves the current text stored from the process function.
753     *
754     * @return string The current text
755     */
756    private function getCurrentText()
757    {
758        return $this->currentText;
759    }
760
761    /**
762     * Scan text for placeholders
763     *
764     * Finds all placeholders in the format {placeholder_name} in the provided text
765     * and returns an array of unique placeholder names.
766     *
767     * @param string $text The text to scan for placeholders
768     * @return array List of unique placeholder names found in the text
769     */
770    public function findPlaceholders($text)
771    {
772        $placeholders = [];
773        $pattern = '/\{([^}]+)\}/';
774
775        if (preg_match_all($pattern, $text, $matches)) {
776            // Get unique placeholder names
777            $placeholders = array_unique($matches[1]);
778        }
779
780        return $placeholders;
781    }
782
783    /**
784     * Get template content for the current text
785     *
786     * Convenience function to retrieve template content. If a pageId is provided,
787     * retrieves content directly from that page. Otherwise, queries ChromaDB for
788     * a relevant template based on the current text.
789     *
790     * @param string|null $pageId Optional page ID to retrieve template from directly
791     * @return string The template content or empty string if not found
792     */
793    private function getTemplateContent($pageId = null)
794    {
795        // If pageId is provided, use it directly
796        if ($pageId !== null) {
797            $templateContent = $this->getPageContent($pageId);
798            if ($templateContent !== false) {
799                return $templateContent;
800            }
801        }
802
803        // If ChromaDB is disabled, return empty template
804        if ($this->chromaClient === null) {
805            return '( no template )';
806        }
807
808        // Otherwise, get template suggestion for the current text
809        $pageId = $this->queryChromaDBTemplate($this->getCurrentText());
810        if (!empty($pageId)) {
811            $templateContent = $this->getPageContent($pageId[0]);
812            if ($templateContent !== false) {
813                return $templateContent;
814            }
815        }
816        return '( no template )';
817    }
818
819    /**
820     * Get snippets content for the current text
821     *
822     * Convenience function to retrieve relevant snippets for the current text.
823     * Queries ChromaDB for relevant snippets and returns them formatted.
824     *
825     * @param int $count Number of snippets to retrieve (default: 10)
826     * @return string Formatted snippets content or empty string if not found
827     */
828    private function getSnippets($count = 10)
829    {
830        // If ChromaDB is disabled, return empty snippets
831        if ($this->chromaClient === null) {
832            return '( no examples )';
833        }
834
835        // Get example snippets for the current text
836        $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count);
837        if (!empty($snippets)) {
838            $formattedSnippets = [];
839            foreach ($snippets as $index => $snippet) {
840                $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>';
841            }
842            return implode("\n", $formattedSnippets);
843        }
844        return '( no examples )';
845    }
846
847    /**
848     * Get examples content from example page IDs
849     *
850     * Convenience function to retrieve content from example pages.
851     * Returns the content of each page packed in XML elements.
852     *
853     * @param array $exampleIds List of example page IDs
854     * @return string Formatted examples content or empty string if not found
855     */
856    private function getExamplesContent($exampleIds = [])
857    {
858        if (empty($exampleIds) || !is_array($exampleIds)) {
859            return '( no examples )';
860        }
861
862        $examplesContent = [];
863        foreach ($exampleIds as $index => $exampleId) {
864            $content = $this->getPageContent($exampleId);
865            if ($content !== false) {
866                $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>';
867            }
868        }
869
870        return implode("\n", $examplesContent);
871    }
872
873    /**
874     * Get previous report content from previous page ID
875     *
876     * Convenience function to retrieve content from a previous report page.
877     * Returns the content of the previous page or a default message if not found.
878     *
879     * @param string $previousId Previous page ID
880     * @return string Previous report content or default message if not found
881     */
882    private function getPreviousContent($previousId = '')
883    {
884        if (empty($previousId)) {
885            return '( no previous report )';
886        }
887
888        $content = $this->getPageContent($previousId);
889        if ($content !== false) {
890            return $content;
891        }
892
893        return '( previous report not found )';
894    }
895
896    /**
897     * Get ChromaDB client with configuration
898     *
899     * Returns the ChromaDB client and collection name.
900     * If a client was passed in the constructor, use it. Otherwise, this method
901     * should not be called as it depends on getConf() which is not available.
902     *
903     * @return array Array containing the ChromaDB client and collection name
904     * @throws Exception If no ChromaDB client is available
905     */
906    private function getChromaDBClient()
907    {
908        // If we have a ChromaDB client passed in constructor, use it
909        if ($this->chromaClient !== null) {
910            // Get the collection name based on the page ID
911	    // FIXME
912            $chromaCollection = 'reports';
913            $pageId = $pageId;
914
915            if (!empty($this->pageId)) {
916                // Split the page ID by ':' and take the first part as collection name
917                $parts = explode(':', $this->pageId);
918                if (isset($parts[0]) && !empty($parts[0])) {
919                    // If the first part is 'playground', use the default collection
920                    // Otherwise, use the first part as the collection name
921                    if ($parts[0] === 'playground') {
922                        $chromaCollection = '';
923                    } else {
924                        $chromaCollection = $parts[0];
925                    }
926                }
927            }
928
929            return [$this->chromaClient, $chromaCollection];
930        }
931
932        // If we don't have a ChromaDB client, we can't create one here
933        // because getConf() is not available in this context
934        throw new Exception('No ChromaDB client available');
935    }
936
937    /**
938     * Query ChromaDB for relevant documents
939     *
940     * Generates embeddings for the input text and queries ChromaDB for similar documents.
941     * Extracts modality from the current page ID to use as the collection name.
942     *
943     * @param string $text The text to find similar documents for
944     * @param int $limit Maximum number of documents to retrieve (default: 5)
945     * @param array|null $where Optional filter conditions for metadata
946     * @return array List of document IDs
947     */
948    private function queryChromaDB($text, $limit = 5, $where = null)
949    {
950        try {
951            // Get ChromaDB client and collection name
952            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
953            // Query for similar documents
954            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
955
956            // Extract document IDs from results
957            $documentIds = [];
958            if (isset($results['ids'][0]) && is_array($results['ids'][0])) {
959                foreach ($results['ids'][0] as $id) {
960                    // Use the ChromaDB ID directly without conversion
961                    $documentIds[] = $id;
962                }
963            }
964
965            return $documentIds;
966        } catch (Exception $e) {
967            // Log error but don't fail the operation
968            error_log('ChromaDB query failed: ' . $e->getMessage());
969            return [];
970        }
971    }
972
973    /**
974     * Query ChromaDB for relevant documents and return text snippets
975     *
976     * Generates embeddings for the input text and queries ChromaDB for similar documents.
977     * Returns the actual text snippets instead of document IDs.
978     *
979     * @param string $text The text to find similar documents for
980     * @param int $limit Maximum number of documents to retrieve (default: 10)
981     * @param array|null $where Optional filter conditions for metadata
982     * @return array List of text snippets
983     */
984    private function queryChromaDBSnippets($text, $limit = 10, $where = null)
985    {
986        try {
987            // Get ChromaDB client and collection name
988            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
989            // Query for similar documents
990            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
991
992            // Extract document texts from results
993            $snippets = [];
994            if (isset($results['documents'][0]) && is_array($results['documents'][0])) {
995                foreach ($results['documents'][0] as $document) {
996                    $snippets[] = $document;
997                }
998            }
999
1000            return $snippets;
1001        } catch (Exception $e) {
1002            // Log error but don't fail the operation
1003            error_log('ChromaDB query failed: ' . $e->getMessage());
1004            return [];
1005        }
1006    }
1007
1008    /**
1009     * Query ChromaDB for a template document
1010     *
1011     * Generates embeddings for the input text and queries ChromaDB for a template document
1012     * by filtering with metadata 'template=true'.
1013     *
1014     * @param string $text The text to find a template for
1015     * @return array List of template document IDs (maximum 1)
1016     */
1017    public function queryChromaDBTemplate($text)
1018    {
1019        $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']);
1020
1021        // Remove chunk number (e.g., "@2") from the ID to get the base document ID
1022        if (!empty($templateIds)) {
1023            $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]);
1024        }
1025
1026        return $templateIds;
1027    }
1028
1029}
1030