xref: /plugin/dokullm/LlmClient.php (revision 72d9a73b4120a7805576b4701833811f8fbaed8f)
1<?php
2namespace dokuwiki\plugin\dokullm;
3
4/**
5 * LLM Client for the dokullm plugin
6 *
7 * This class provides methods to interact with an LLM API for various
8 * text processing tasks such as completion, rewriting, grammar correction,
9 * summarization, conclusion creation, text analysis, and custom prompts.
10 *
11 * The client handles:
12 * - API configuration and authentication
13 * - Prompt template loading and processing
14 * - Context-aware requests with metadata
15 * - DokuWiki page content retrieval
16 */
17
18// must be run within Dokuwiki
19if (!defined('DOKU_INC')) {
20    die();
21}
22
23/**
24 * LLM Client class for handling API communications
25 *
26 * Manages configuration settings and provides methods for various
27 * text processing operations through an LLM API.
28 * Implements caching for tool calls to avoid duplicate processing.
29 */
30class LlmClient
31{
32    /** @var string The API endpoint URL */
33    private $api_url;
34
35    /** @var array Cache for tool call results */
36    private $toolCallCache = [];
37
38    /** @var string Current text for tool usage */
39    private $currentText = '';
40
41    /** @var array Track tool call counts to prevent infinite loops */
42    private $toolCallCounts = [];
43
44    /** @var string The API authentication key */
45    private $api_key;
46
47    /** @var string The model identifier to use */
48    private $model;
49
50    /** @var int The request timeout in seconds */
51    private $timeout;
52
53    /** @var float The temperature setting for response randomness */
54    private $temperature;
55
56    /** @var float The top-p setting for nucleus sampling */
57    private $top_p;
58
59    /** @var int The top-k setting for token selection */
60    private $top_k;
61
62    /** @var float The min-p setting for minimum probability threshold */
63    private $min_p;
64
65    /** @var bool Whether to enable thinking in LLM responses */
66    private $think;
67
68    /** @var object|null ChromaDB client instance */
69    private $chromaClient;
70
71    /** @var string|null Page ID */
72    private $pageId;
73
74    /**
75     * Initialize the LLM client with configuration settings
76     *
77     * Retrieves configuration values from DokuWiki's configuration system
78     * for API URL, key, model, timeout, and LLM sampling parameters.
79     *
80     * Configuration values:
81     * - api_url: The LLM API endpoint URL
82     * - api_key: Authentication key for the API (optional)
83     * - model: The model identifier to use for requests
84     * - timeout: Request timeout in seconds
85     * - language: Language code for prompt templates
86     * - temperature: Temperature setting for response randomness (0.0-1.0)
87     * - top_p: Top-p (nucleus sampling) setting (0.0-1.0)
88     * - top_k: Top-k setting (integer >= 1)
89     * - min_p: Minimum probability threshold (0.0-1.0)
90     * - think: Whether to enable thinking in LLM responses (boolean)
91     * - chromaClient: ChromaDB client instance (optional)
92     * - pageId: Page ID (optional)
93     */
94    public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $language = null, $chromaClient = null, $pageId = null)
95    {
96        $this->api_url = $api_url;
97        $this->api_key = $api_key;
98        $this->model = $model;
99        $this->timeout = $timeout;
100        $this->temperature = $temperature;
101        $this->top_p = $top_p;
102        $this->top_k = $top_k;
103        $this->min_p = $min_p;
104        $this->think = $think;
105        $this->language = $language;
106        $this->chromaClient = $chromaClient;
107        $this->pageId = $pageId;
108    }
109
110
111
112    public function process($action, $text, $metadata = [], $useContext = true)
113    {
114        // Store the current text for tool usage
115        $this->currentText = $text;
116
117        // Add text, think and action to metadata
118        $metadata['text'] = $text;
119        $metadata['think'] = $this->think ? '/think' : '/no_think';
120        $metadata['action'] = $action;
121
122        // If we have 'template' in metadata, move it to 'page_template'
123        if (isset($metadata['template'])) {
124            $metadata['page_template'] = $metadata['template'];
125            unset($metadata['template']);
126        }
127
128        // If we have 'examples' in metadata, move it to 'page_examples'
129        if (isset($metadata['examples'])) {
130            $metadata['page_examples'] = $metadata['examples'];
131            unset($metadata['examples']);
132        }
133
134        // If we have 'previous' in metadata, move it to 'page_previous'
135        if (isset($metadata['previous'])) {
136            $metadata['page_previous'] = $metadata['previous'];
137            unset($metadata['previous']);
138        }
139
140        $prompt = $this->loadPrompt($action, $metadata);
141
142        return $this->callAPI($action, $prompt, $metadata, $useContext);
143    }
144
145    /**
146     * Process text with a custom user prompt
147     *
148     * Sends a custom prompt to the LLM along with the provided text.
149     *
150     * @param string $text The text to process
151     * @param string $customPrompt The custom prompt to use
152     * @param array $metadata Optional metadata containing template and examples
153     * @param bool $useContext Whether to include template and examples in the context (default: true)
154     * @return string The processed text
155     */
156    public function processCustomPrompt($text, $metadata = [], $useContext = true)
157    {
158        // Store the current text for tool usage
159        $this->currentText = $text;
160
161        // Format the prompt with the text and custom prompt
162        $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text;
163
164        return $this->callAPI('custom', $prompt, $metadata, $useContext);
165    }
166
167    /**
168     * Get the list of available tools for the LLM
169     *
170     * Defines the tools that can be used by the LLM during processing.
171     *
172     * @return array List of tool definitions
173     */
174    private function getAvailableTools()
175    {
176        return [
177            [
178                'type' => 'function',
179                'function' => [
180                    'name' => 'get_document',
181                    'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.',
182                    'parameters' => [
183                        'type' => 'object',
184                        'properties' => [
185                            'id' => [
186                                'type' => 'string',
187                                'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.'
188                            ]
189                        ],
190                        'required' => ['id']
191                    ]
192                ]
193            ],
194            [
195                'type' => 'function',
196                'function' => [
197                    'name' => 'get_template',
198                    'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.',
199                    'parameters' => [
200                        'type' => 'object',
201                        'properties' => [
202                            'language' => [
203                                'type' => 'string',
204                                'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).',
205                                'default' => 'ro'
206                            ]
207                        ]
208                    ]
209                ]
210            ],
211            [
212                'type' => 'function',
213                'function' => [
214                    'name' => 'get_examples',
215                    'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.',
216                    'parameters' => [
217                        'type' => 'object',
218                        'properties' => [
219                            'count' => [
220                                'type' => 'integer',
221                                'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.',
222                                'default' => 5
223                            ]
224                        ]
225                    ]
226                ]
227            ]
228        ];
229    }
230
231    /**
232     * Call the LLM API with the specified prompt
233     *
234     * Makes an HTTP POST request to the configured API endpoint with
235     * the prompt and other parameters. Handles authentication if an
236     * API key is configured.
237     *
238     * The method constructs a conversation with system and user messages,
239     * including context information from metadata when available.
240     *
241     * Complex logic includes:
242     * 1. Loading and enhancing the system prompt with metadata context
243     * 2. Building the API request with model parameters
244     * 3. Handling authentication with API key if configured
245     * 4. Making the HTTP request with proper error handling
246     * 5. Parsing and validating the API response
247     * 6. Supporting tool usage with automatic tool calling when enabled
248     * 7. Implementing context enhancement with templates, examples, and snippets
249     *
250     * The context information includes:
251     * - Template content: Used as a starting point for the response
252     * - Example pages: Full content of specified example pages
253     * - Text snippets: Relevant text examples from ChromaDB
254     *
255     * When tools are enabled, the method supports automatic tool calling:
256     * - Tools can retrieve documents, templates, and examples as needed
257     * - Tool responses are cached to avoid duplicate calls with identical parameters
258     * - Infinite loop protection prevents excessive tool calls
259     *
260     * @param string $command The command name for loading command-specific system prompts
261     * @param string $prompt The prompt to send to the LLM as user message
262     * @param array $metadata Optional metadata containing template, examples, and snippets
263     * @param bool $useContext Whether to include template and examples in the context (default: true)
264     * @return string The response content from the LLM
265     * @throws Exception If the API request fails or returns unexpected format
266     */
267
268    private function callAPI($command, $prompt, $metadata = [], $useContext = true, $useTools = false)
269    {
270        // Load system prompt which provides general instructions to the LLM
271        $systemPrompt = $this->loadSystemPrompt($command, []);
272
273        // Enhance the prompt with context information from metadata
274        // This provides the LLM with additional context about templates and examples
275        if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) {
276            $contextInfo = "\n\n<context>\n";
277
278            // Add template content if specified in metadata
279            if (!empty($metadata['template'])) {
280                $templateContent = $this->getPageContent($metadata['template']);
281                if ($templateContent !== false) {
282                    $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n";
283                }
284            }
285
286            // Add example pages content if specified in metadata
287            if (!empty($metadata['examples'])) {
288                $examplesContent = [];
289                foreach ($metadata['examples'] as $example) {
290                    $content = $this->getPageContent($example);
291                    if ($content !== false) {
292                        $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n";
293                    }
294                }
295                if (!empty($examplesContent)) {
296                    $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n";
297                }
298            }
299
300            // Add text snippets if specified in metadata
301            if (!empty($metadata['snippets'])) {
302                $snippetsContent = [];
303                foreach ($metadata['snippets'] as $index => $snippet) {
304                    // These are text snippets from ChromaDB
305                    $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n";
306                }
307                if (!empty($snippetsContent)) {
308                    $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n";
309                }
310            }
311
312            $contextInfo .= "\n</context>\n";
313
314            // Append context information to system prompt
315            $prompt = $contextInfo . "\n\n" . $prompt;
316        }
317
318        // Prepare API request data with model parameters
319        $data = [
320            'model' => $this->model,
321            'messages' => [
322                ['role' => 'system', 'content' => $systemPrompt],
323                ['role' => 'user', 'content' => $prompt]
324            ],
325            'max_tokens' => 6144,
326            'stream' => false,
327            'keep_alive' => '30m',
328            'think' => true
329        ];
330
331        // Add tools to the request only if useTools is true
332        if ($useTools) {
333            // Define available tools
334            $data['tools'] = $this->getAvailableTools();
335            $data['tool_choice'] = 'auto';
336            $data['parallel_tool_calls'] = false;
337        }
338
339        // Only add parameters if they are defined and not null
340        if ($this->temperature !== null) {
341            $data['temperature'] = $this->temperature;
342        }
343        if ($this->top_p !== null) {
344            $data['top_p'] = $this->top_p;
345        }
346        if ($this->top_k !== null) {
347            $data['top_k'] = $this->top_k;
348        }
349        if ($this->min_p !== null) {
350            $data['min_p'] = $this->min_p;
351        }
352
353        // Make an API call with tool responses
354        return $this->callAPIWithTools($data, false);
355    }
356
357    /**
358     * Handle tool calls from the LLM
359     *
360     * Processes tool calls made by the LLM and returns appropriate responses.
361     * Implements caching to avoid duplicate calls with identical parameters.
362     *
363     * @param array $toolCall The tool call data from the LLM
364     * @return array The tool response message
365     */
366    private function handleToolCall($toolCall)
367    {
368        $toolName = $toolCall['function']['name'];
369        $arguments = json_decode($toolCall['function']['arguments'], true);
370
371        // Create a cache key from the tool name and arguments
372        $cacheKey = md5($toolName . serialize($arguments));
373
374        // Check if we have a cached result for this tool call
375        if (isset($this->toolCallCache[$cacheKey])) {
376            // Return cached result and indicate it was found in cache
377            $toolResponse = $this->toolCallCache[$cacheKey];
378            // Update with current tool call ID
379            $toolResponse['tool_call_id'] = $toolCall['id'];
380            $toolResponse['cached'] = true; // Indicate this response was cached
381            return $toolResponse;
382        }
383
384        $toolResponse = [
385            'role' => 'tool',
386            'tool_call_id' => $toolCall['id'],
387            'cached' => false // Indicate this is a fresh response
388        ];
389
390        switch ($toolName) {
391            case 'get_document':
392                $documentId = $arguments['id'];
393                $content = $this->getPageContent($documentId);
394                if ($content === false) {
395                    $toolResponse['content'] = 'Document not found: ' . $documentId;
396                } else {
397                    $toolResponse['content'] = $content;
398                }
399                break;
400
401            case 'get_template':
402                // Get template content using the convenience function
403                $toolResponse['content'] = $this->getTemplateContent();
404                break;
405
406            case 'get_examples':
407                // Get examples content using the convenience function
408                $count = isset($arguments['count']) ? (int)$arguments['count'] : 5;
409                $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>';
410                break;
411
412            default:
413                $toolResponse['content'] = 'Unknown tool: ' . $toolName;
414        }
415
416        // Cache the result for future calls with the same parameters
417        $cacheEntry = $toolResponse;
418        // Remove tool_call_id and cached flag from cache as they change per call
419        unset($cacheEntry['tool_call_id']);
420        unset($cacheEntry['cached']);
421        $this->toolCallCache[$cacheKey] = $cacheEntry;
422
423        return $toolResponse;
424    }
425
426    /**
427     * Make an API call with tool responses
428     *
429     * Sends a follow-up request to the LLM with tool responses.
430     * Implements complex logic for handling tool calls with caching and loop protection.
431     *
432     * Complex logic includes:
433     * 1. Making HTTP requests with proper authentication and error handling
434     * 2. Processing tool calls from the LLM response
435     * 3. Caching tool responses to avoid duplicate calls with identical parameters
436     * 4. Tracking tool call counts to prevent infinite loops
437     * 5. Implementing loop protection with call count limits
438     * 6. Handling recursive tool calls until final content is generated
439     *
440     * Loop protection works by:
441     * - Tracking individual tool call counts (max 3 per tool)
442     * - Tracking total tool calls (max 10 total)
443     * - Disabling tools when limits are exceeded to break potential loops
444     *
445     * @param array $data The API request data including messages with tool responses
446     * @param bool $toolsCalled Whether tools have already been called (used for loop protection)
447     * @param bool $useTools Whether to process tool calls (used for loop protection)
448     * @return string The final response content
449     */
450    private function callAPIWithTools($data, $toolsCalled = false, $useTools = true)
451    {
452        // Set up HTTP headers, including authentication if API key is configured
453        $headers = [
454            'Content-Type: application/json'
455        ];
456
457        if (!empty($this->api_key)) {
458            $headers[] = 'Authorization: Bearer ' . $this->api_key;
459        }
460
461       // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops
462        if ($toolsCalled) {
463            unset($data['tools']);
464            unset($data['tool_choice']);
465        }
466
467        // Initialize and configure cURL for the API request
468        $ch = curl_init();
469        curl_setopt($ch, CURLOPT_URL, $this->api_url);
470        curl_setopt($ch, CURLOPT_POST, true);
471        curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
472        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
473        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
474        curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
475        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
476
477        // Execute the API request
478        $response = curl_exec($ch);
479        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
480        $error = curl_error($ch);
481        curl_close($ch);
482
483        // Handle cURL errors
484        if ($error) {
485            throw new Exception('API request failed: ' . $error);
486        }
487
488        // Handle HTTP errors
489        if ($httpCode !== 200) {
490            throw new Exception('API request failed with HTTP code: ' . $httpCode);
491        }
492
493        // Parse and validate the JSON response
494        $result = json_decode($response, true);
495
496        // Extract the content from the response if available
497        if (isset($result['choices'][0]['message']['content'])) {
498            $content = trim($result['choices'][0]['message']['content']);
499            // Reset tool call counts when we get final content
500            $this->toolCallCounts = [];
501            return $content;
502        }
503
504        // Handle tool calls if present
505        if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) {
506            $toolCalls = $result['choices'][0]['message']['tool_calls'];
507            // Start with original messages
508            $messages = $data['messages'];
509            // Add assistant's message with tool calls, keeping all original fields except for content (which is null)
510            $assistantMessage = [];
511            foreach ($result['choices'][0]['message'] as $key => $value) {
512                if ($key !== 'content') {
513                    $assistantMessage[$key] = $value;
514                }
515            }
516            // Add assistant's message with tool calls
517            $messages[] = $assistantMessage;
518
519            // Process each tool call and track counts to prevent infinite loops
520            foreach ($toolCalls as $toolCall) {
521                $toolName = $toolCall['function']['name'];
522                // Increment tool call count
523                if (!isset($this->toolCallCounts[$toolName])) {
524                    $this->toolCallCounts[$toolName] = 0;
525                }
526                $this->toolCallCounts[$toolName]++;
527
528                $toolResponse = $this->handleToolCall($toolCall);
529                $messages[] = $toolResponse;
530            }
531
532            // Check if any tool has been called more than 3 times
533            $toolsCalledCount = 0;
534            foreach ($this->toolCallCounts as $count) {
535                if ($count > 3) {
536                    // If any tool called more than 3 times, disable tools to break loop
537                    $toolsCalled = true;
538                    break;
539                }
540                $toolsCalledCount += $count;
541            }
542
543            // If total tool calls exceed 10, also disable tools
544            if ($toolsCalledCount > 10) {
545                $toolsCalled = true;
546            }
547
548            // Make another API call with tool responses
549            $data['messages'] = $messages;
550            return $this->callAPIWithTools($data, $toolsCalled, $useTools);
551        }
552
553        // Throw exception for unexpected response format
554        throw new Exception('Unexpected API response format');
555    }
556
557    /**
558     * Load a prompt template from a DokuWiki page and replace placeholders
559     *
560     * Loads prompt templates from DokuWiki pages with IDs in the format
561     * dokullm:prompts:LANGUAGE:PROMPT_NAME
562     *
563     * The method implements a language fallback mechanism:
564     * 1. First tries to load the prompt in the configured language
565     * 2. If not found, falls back to English prompts
566     * 3. Throws an exception if neither is available
567     *
568     * After loading the prompt, it scans for placeholders and automatically
569     * adds missing ones with appropriate values before replacing all placeholders.
570     *
571     * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite')
572     * @param array $variables Associative array of placeholder => value pairs
573     * @return string The processed prompt with placeholders replaced
574     * @throws Exception If the prompt page cannot be loaded in any language
575     */
576    private function loadPrompt($promptName, $variables = [])
577    {
578        // Default to 'en' if language is 'default' or not set
579        if ($this->language === 'default' || empty($this->language)) {
580            $this->language = 'en';
581        }
582
583        // Construct the page ID for the prompt in the configured language
584        $promptPageId = 'dokullm:prompts:' . $this->language . ':' . $promptName;
585
586        // Try to get the content of the prompt page in the configured language
587        $prompt = $this->getPageContent($promptPageId);
588
589        // If the language-specific prompt doesn't exist, try English as fallback
590        if ($prompt === false && $this->language !== 'en') {
591            $promptPageId = 'dokullm:prompts:en:' . $promptName;
592            $prompt = $this->getPageContent($promptPageId);
593        }
594
595        // If still no prompt found, throw an exception
596        if ($prompt === false) {
597            throw new Exception('Prompt page not found: ' . $promptPageId);
598        }
599
600        // Find placeholders in the prompt
601        $placeholders = $this->findPlaceholders($prompt);
602
603        // Add missing placeholders with appropriate values
604        foreach ($placeholders as $placeholder) {
605            // Skip if already provided in variables
606            if (isset($variables[$placeholder])) {
607                continue;
608            }
609
610            // Add appropriate values for specific placeholders
611            switch ($placeholder) {
612                case 'template':
613                    // If we have a page_template in variables, use it
614                    $variables[$placeholder] = $this->getTemplateContent($variables['page_template']);
615                    break;
616
617                case 'snippets':
618                    $variables[$placeholder] = $this->getSnippets(10);
619                    break;
620
621                case 'examples':
622                    // If we have example page IDs in metadata, add examples content
623                    $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']);
624                    break;
625
626                case 'previous':
627                    // If we have a previous report page ID in metadata, add previous content
628                    $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']);
629
630                    // Add current and previous dates to metadata
631                    $variables['current_date'] = $this->getPageDate();
632                    $variables['previous_date'] = !empty($variables['page_previous']) ?
633                                                $this->getPageDate($variables['page_previous']) :
634                                                '';
635                    break;
636
637                default:
638                    // For other placeholders, leave them empty or set a default value
639                    $variables[$placeholder] = '';
640                    break;
641            }
642        }
643
644        // Replace placeholders with actual values
645        // Placeholders are in the format {placeholder_name}
646        foreach ($variables as $placeholder => $value) {
647            $prompt = str_replace('{' . $placeholder . '}', $value, $prompt);
648        }
649
650        // Return the processed prompt
651        return $prompt;
652    }
653
654    /**
655     * Load system prompt with optional command-specific appendage
656     *
657     * Loads the main system prompt and appends any command-specific system prompt
658     * if available.
659     *
660     * @param string $action The action/command name
661     * @param array $variables Associative array of placeholder => value pairs
662     * @return string The combined system prompt
663     */
664    private function loadSystemPrompt($action, $variables = [])
665    {
666        // Load system prompt which provides general instructions to the LLM
667        $systemPrompt = $this->loadPrompt('system', $variables);
668
669        // Check if there's a command-specific system prompt appendage
670        if (!empty($action)) {
671            try {
672                $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables);
673                if ($commandSystemPrompt !== false) {
674                    $systemPrompt .= "\n" . $commandSystemPrompt;
675                }
676            } catch (Exception $e) {
677                // Ignore exceptions when loading command-specific system prompt
678                // This allows the main system prompt to still be used
679            }
680        }
681
682        return $systemPrompt;
683    }
684
685    /**
686     * Get the content of a DokuWiki page
687     *
688     * Retrieves the raw content of a DokuWiki page by its ID.
689     * Used for loading template and example page content for context.
690     *
691     * @param string $pageId The page ID to retrieve
692     * @return string|false The page content or false if not found/readable
693     */
694    public function getPageContent($pageId)
695    {
696        // Convert page ID to file path
697        $pageFile = wikiFN($pageId);
698
699        // Check if file exists and is readable
700        if (file_exists($pageFile) && is_readable($pageFile)) {
701            return file_get_contents($pageFile);
702        }
703
704        return false;
705    }
706
707    /**
708     * Extract date from page ID or file timestamp
709     *
710     * Attempts to extract a date in YYmmdd format from the page ID.
711     * If not found, uses the file's last modification timestamp.
712     *
713     * @param string $pageId Optional page ID to extract date from (defaults to current page)
714     * @return string Formatted date string (YYYY-MM-DD)
715     */
716    private function getPageDate($pageId = null)
717    {
718        global $ID;
719
720        // Use provided page ID or current page ID
721        $targetPageId = $pageId ?: $ID;
722
723        // Try to extract date from page ID (looking for YYmmdd pattern)
724        if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) {
725            // Convert YYmmdd to YYYY-MM-DD
726            $year = $matches[1];
727            $month = $matches[2];
728            $day = $matches[3];
729
730            // Assume 20xx for years 00-69, 19xx for years 70-99
731            $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year;
732
733            return $fullYear . '-' . $month . '-' . $day;
734        }
735
736        // Fallback to file timestamp
737        $pageFile = wikiFN($targetPageId);
738        if (file_exists($pageFile)) {
739            $timestamp = filemtime($pageFile);
740            return date('Y-m-d', $timestamp);
741        }
742
743        // Return empty string if no date can be determined
744        return '';
745    }
746
747    /**
748     * Get current text
749     *
750     * Retrieves the current text stored from the process function.
751     *
752     * @return string The current text
753     */
754    private function getCurrentText()
755    {
756        return $this->currentText;
757    }
758
759    /**
760     * Scan text for placeholders
761     *
762     * Finds all placeholders in the format {placeholder_name} in the provided text
763     * and returns an array of unique placeholder names.
764     *
765     * @param string $text The text to scan for placeholders
766     * @return array List of unique placeholder names found in the text
767     */
768    public function findPlaceholders($text)
769    {
770        $placeholders = [];
771        $pattern = '/\{([^}]+)\}/';
772
773        if (preg_match_all($pattern, $text, $matches)) {
774            // Get unique placeholder names
775            $placeholders = array_unique($matches[1]);
776        }
777
778        return $placeholders;
779    }
780
781    /**
782     * Get template content for the current text
783     *
784     * Convenience function to retrieve template content. If a pageId is provided,
785     * retrieves content directly from that page. Otherwise, queries ChromaDB for
786     * a relevant template based on the current text.
787     *
788     * @param string|null $pageId Optional page ID to retrieve template from directly
789     * @return string The template content or empty string if not found
790     */
791    private function getTemplateContent($pageId = null)
792    {
793        // If pageId is provided, use it directly
794        if ($pageId !== null) {
795            $templateContent = $this->getPageContent($pageId);
796            if ($templateContent !== false) {
797                return $templateContent;
798            }
799        }
800
801        // Otherwise, get template suggestion for the current text
802        $pageId = $this->queryChromaDBTemplate($this->getCurrentText());
803        if (!empty($pageId)) {
804            $templateContent = $this->getPageContent($pageId[0]);
805            if ($templateContent !== false) {
806                return $templateContent;
807            }
808        }
809        return '( no template )';
810    }
811
812    /**
813     * Get snippets content for the current text
814     *
815     * Convenience function to retrieve relevant snippets for the current text.
816     * Queries ChromaDB for relevant snippets and returns them formatted.
817     *
818     * @param int $count Number of snippets to retrieve (default: 10)
819     * @return string Formatted snippets content or empty string if not found
820     */
821    private function getSnippets($count = 10)
822    {
823        // Get example snippets for the current text
824        $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count);
825        if (!empty($snippets)) {
826            $formattedSnippets = [];
827            foreach ($snippets as $index => $snippet) {
828                $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>';
829            }
830            return implode("\n", $formattedSnippets);
831        }
832        return '( no examples )';
833    }
834
835    /**
836     * Get examples content from example page IDs
837     *
838     * Convenience function to retrieve content from example pages.
839     * Returns the content of each page packed in XML elements.
840     *
841     * @param array $exampleIds List of example page IDs
842     * @return string Formatted examples content or empty string if not found
843     */
844    private function getExamplesContent($exampleIds = [])
845    {
846        if (empty($exampleIds) || !is_array($exampleIds)) {
847            return '( no examples )';
848        }
849
850        $examplesContent = [];
851        foreach ($exampleIds as $index => $exampleId) {
852            $content = $this->getPageContent($exampleId);
853            if ($content !== false) {
854                $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>';
855            }
856        }
857
858        return implode("\n", $examplesContent);
859    }
860
861    /**
862     * Get previous report content from previous page ID
863     *
864     * Convenience function to retrieve content from a previous report page.
865     * Returns the content of the previous page or a default message if not found.
866     *
867     * @param string $previousId Previous page ID
868     * @return string Previous report content or default message if not found
869     */
870    private function getPreviousContent($previousId = '')
871    {
872        if (empty($previousId)) {
873            return '( no previous report )';
874        }
875
876        $content = $this->getPageContent($previousId);
877        if ($content !== false) {
878            return $content;
879        }
880
881        return '( previous report not found )';
882    }
883
884    /**
885     * Get ChromaDB client with configuration
886     *
887     * Returns the ChromaDB client and collection name.
888     * If a client was passed in the constructor, use it. Otherwise, this method
889     * should not be called as it depends on getConf() which is not available.
890     *
891     * @return array Array containing the ChromaDB client and collection name
892     * @throws Exception If no ChromaDB client is available
893     */
894    private function getChromaDBClient()
895    {
896        // If we have a ChromaDB client passed in constructor, use it
897        if ($this->chromaClient !== null) {
898            // Get the collection name based on the page ID
899            $chromaCollection = 'reports';
900            $pageId = $this->pageId;
901
902            if (!empty($pageId)) {
903                // Split the page ID by ':' and take the first part as collection name
904                $parts = explode(':', $pageId);
905                if (isset($parts[0]) && !empty($parts[0])) {
906                    // If the first part is 'playground', use the default collection
907                    // Otherwise, use the first part as the collection name
908                    if ($parts[0] === 'playground') {
909                        $chromaCollection = $pageId;
910                    } else {
911                        $chromaCollection = $parts[0];
912                    }
913                }
914            }
915
916            return [$this->chromaClient, $chromaCollection];
917        }
918
919        // If we don't have a ChromaDB client, we can't create one here
920        // because getConf() is not available in this context
921        throw new Exception('No ChromaDB client available');
922    }
923
924    /**
925     * Query ChromaDB for relevant documents
926     *
927     * Generates embeddings for the input text and queries ChromaDB for similar documents.
928     * Extracts modality from the current page ID to use as the collection name.
929     *
930     * @param string $text The text to find similar documents for
931     * @param int $limit Maximum number of documents to retrieve (default: 5)
932     * @param array|null $where Optional filter conditions for metadata
933     * @return array List of document IDs
934     */
935    private function queryChromaDB($text, $limit = 5, $where = null)
936    {
937        try {
938            // Get ChromaDB client and collection name
939            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
940            // Query for similar documents
941            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
942
943            // Extract document IDs from results
944            $documentIds = [];
945            if (isset($results['ids'][0]) && is_array($results['ids'][0])) {
946                foreach ($results['ids'][0] as $id) {
947                    // Use the ChromaDB ID directly without conversion
948                    $documentIds[] = $id;
949                }
950            }
951
952            return $documentIds;
953        } catch (Exception $e) {
954            // Log error but don't fail the operation
955            error_log('ChromaDB query failed: ' . $e->getMessage());
956            return [];
957        }
958    }
959
960    /**
961     * Query ChromaDB for relevant documents and return text snippets
962     *
963     * Generates embeddings for the input text and queries ChromaDB for similar documents.
964     * Returns the actual text snippets instead of document IDs.
965     *
966     * @param string $text The text to find similar documents for
967     * @param int $limit Maximum number of documents to retrieve (default: 10)
968     * @param array|null $where Optional filter conditions for metadata
969     * @return array List of text snippets
970     */
971    private function queryChromaDBSnippets($text, $limit = 10, $where = null)
972    {
973        try {
974            // Get ChromaDB client and collection name
975            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
976            // Query for similar documents
977            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
978
979            // Extract document texts from results
980            $snippets = [];
981            if (isset($results['documents'][0]) && is_array($results['documents'][0])) {
982                foreach ($results['documents'][0] as $document) {
983                    $snippets[] = $document;
984                }
985            }
986
987            return $snippets;
988        } catch (Exception $e) {
989            // Log error but don't fail the operation
990            error_log('ChromaDB query failed: ' . $e->getMessage());
991            return [];
992        }
993    }
994
995    /**
996     * Query ChromaDB for a template document
997     *
998     * Generates embeddings for the input text and queries ChromaDB for a template document
999     * by filtering with metadata 'template=true'.
1000     *
1001     * @param string $text The text to find a template for
1002     * @return array List of template document IDs (maximum 1)
1003     */
1004    public function queryChromaDBTemplate($text)
1005    {
1006        $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']);
1007
1008        // Remove chunk number (e.g., "@2") from the ID to get the base document ID
1009        if (!empty($templateIds)) {
1010            $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]);
1011        }
1012
1013        return $templateIds;
1014    }
1015
1016}
1017