xref: /plugin/dokullm/LlmClient.php (revision 2de956786abaa433f5e92ab7a5535375f6c92298)
1<?php
2namespace dokuwiki\plugin\dokullm;
3
4/**
5 * LLM Client for the dokullm plugin
6 *
7 * This class provides methods to interact with an LLM API for various
8 * text processing tasks such as completion, rewriting, grammar correction,
9 * summarization, conclusion creation, text analysis, and custom prompts.
10 *
11 * The client handles:
12 * - API configuration and authentication
13 * - Prompt template loading and processing
14 * - Context-aware requests with metadata
15 * - DokuWiki page content retrieval
16 */
17
18// must be run within Dokuwiki
19if (!defined('DOKU_INC')) {
20    die();
21}
22
23/**
24 * LLM Client class for handling API communications
25 *
26 * Manages configuration settings and provides methods for various
27 * text processing operations through an LLM API.
28 * Implements caching for tool calls to avoid duplicate processing.
29 */
30class LlmClient
31{
32    /** @var string The API endpoint URL */
33    private $api_url;
34
35    /** @var array Cache for tool call results */
36    private $toolCallCache = [];
37
38    /** @var string Current text for tool usage */
39    private $currentText = '';
40
41    /** @var array Track tool call counts to prevent infinite loops */
42    private $toolCallCounts = [];
43
44    /** @var string The API authentication key */
45    private $api_key;
46
47    /** @var string The model identifier to use */
48    private $model;
49
50    /** @var int The request timeout in seconds */
51    private $timeout;
52
53    /** @var float The temperature setting for response randomness */
54    private $temperature;
55
56    /** @var float The top-p setting for nucleus sampling */
57    private $top_p;
58
59    /** @var int The top-k setting for token selection */
60    private $top_k;
61
62    /** @var float The min-p setting for minimum probability threshold */
63    private $min_p;
64
65    /** @var bool Whether to enable thinking in LLM responses */
66    private $think;
67
68    /** @var object|null ChromaDB client instance */
69    private $chromaClient;
70
71    /**
72     * Initialize the LLM client with configuration settings
73     *
74     * Retrieves configuration values from DokuWiki's configuration system
75     * for API URL, key, model, timeout, and LLM sampling parameters.
76     *
77     * Configuration values:
78     * - api_url: The LLM API endpoint URL
79     * - api_key: Authentication key for the API (optional)
80     * - model: The model identifier to use for requests
81     * - timeout: Request timeout in seconds
82     * - language: Language code for prompt templates
83     * - temperature: Temperature setting for response randomness (0.0-1.0)
84     * - top_p: Top-p (nucleus sampling) setting (0.0-1.0)
85     * - top_k: Top-k setting (integer >= 1)
86     * - min_p: Minimum probability threshold (0.0-1.0)
87     * - think: Whether to enable thinking in LLM responses (boolean)
88     * - chromaClient: ChromaDB client instance (optional)
89     */
90    public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $language = null, $chromaClient = null)
91    {
92        $this->api_url = $api_url;
93        $this->api_key = $api_key;
94        $this->model = $model;
95        $this->timeout = $timeout;
96        $this->temperature = $temperature;
97        $this->top_p = $top_p;
98        $this->top_k = $top_k;
99        $this->min_p = $min_p;
100        $this->think = $think;
101        $this->language = $language;
102        $this->chromaClient = $chromaClient;
103    }
104
105
106
107    public function process($action, $text, $metadata = [], $useContext = true)
108    {
109        // Store the current text for tool usage
110        $this->currentText = $text;
111
112        // Add text, think and action to metadata
113        $metadata['text'] = $text;
114        $metadata['think'] = $this->think ? '/think' : '/no_think';
115        $metadata['action'] = $action;
116
117        // If we have 'template' in metadata, move it to 'page_template'
118        if (isset($metadata['template'])) {
119            $metadata['page_template'] = $metadata['template'];
120            unset($metadata['template']);
121        }
122
123        // If we have 'examples' in metadata, move it to 'page_examples'
124        if (isset($metadata['examples'])) {
125            $metadata['page_examples'] = $metadata['examples'];
126            unset($metadata['examples']);
127        }
128
129        // If we have 'previous' in metadata, move it to 'page_previous'
130        if (isset($metadata['previous'])) {
131            $metadata['page_previous'] = $metadata['previous'];
132            unset($metadata['previous']);
133        }
134
135        $prompt = $this->loadPrompt($action, $metadata);
136
137        return $this->callAPI($action, $prompt, $metadata, $useContext);
138    }
139
140    /**
141     * Process text with a custom user prompt
142     *
143     * Sends a custom prompt to the LLM along with the provided text.
144     *
145     * @param string $text The text to process
146     * @param string $customPrompt The custom prompt to use
147     * @param array $metadata Optional metadata containing template and examples
148     * @param bool $useContext Whether to include template and examples in the context (default: true)
149     * @return string The processed text
150     */
151    public function processCustomPrompt($text, $metadata = [], $useContext = true)
152    {
153        // Store the current text for tool usage
154        $this->currentText = $text;
155
156        // Format the prompt with the text and custom prompt
157        $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text;
158
159        return $this->callAPI('custom', $prompt, $metadata, $useContext);
160    }
161
162    /**
163     * Get the list of available tools for the LLM
164     *
165     * Defines the tools that can be used by the LLM during processing.
166     *
167     * @return array List of tool definitions
168     */
169    private function getAvailableTools()
170    {
171        return [
172            [
173                'type' => 'function',
174                'function' => [
175                    'name' => 'get_document',
176                    'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.',
177                    'parameters' => [
178                        'type' => 'object',
179                        'properties' => [
180                            'id' => [
181                                'type' => 'string',
182                                'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.'
183                            ]
184                        ],
185                        'required' => ['id']
186                    ]
187                ]
188            ],
189            [
190                'type' => 'function',
191                'function' => [
192                    'name' => 'get_template',
193                    'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.',
194                    'parameters' => [
195                        'type' => 'object',
196                        'properties' => [
197                            'language' => [
198                                'type' => 'string',
199                                'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).',
200                                'default' => 'ro'
201                            ]
202                        ]
203                    ]
204                ]
205            ],
206            [
207                'type' => 'function',
208                'function' => [
209                    'name' => 'get_examples',
210                    'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.',
211                    'parameters' => [
212                        'type' => 'object',
213                        'properties' => [
214                            'count' => [
215                                'type' => 'integer',
216                                'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.',
217                                'default' => 5
218                            ]
219                        ]
220                    ]
221                ]
222            ]
223        ];
224    }
225
226    /**
227     * Call the LLM API with the specified prompt
228     *
229     * Makes an HTTP POST request to the configured API endpoint with
230     * the prompt and other parameters. Handles authentication if an
231     * API key is configured.
232     *
233     * The method constructs a conversation with system and user messages,
234     * including context information from metadata when available.
235     *
236     * Complex logic includes:
237     * 1. Loading and enhancing the system prompt with metadata context
238     * 2. Building the API request with model parameters
239     * 3. Handling authentication with API key if configured
240     * 4. Making the HTTP request with proper error handling
241     * 5. Parsing and validating the API response
242     * 6. Supporting tool usage with automatic tool calling when enabled
243     * 7. Implementing context enhancement with templates, examples, and snippets
244     *
245     * The context information includes:
246     * - Template content: Used as a starting point for the response
247     * - Example pages: Full content of specified example pages
248     * - Text snippets: Relevant text examples from ChromaDB
249     *
250     * When tools are enabled, the method supports automatic tool calling:
251     * - Tools can retrieve documents, templates, and examples as needed
252     * - Tool responses are cached to avoid duplicate calls with identical parameters
253     * - Infinite loop protection prevents excessive tool calls
254     *
255     * @param string $command The command name for loading command-specific system prompts
256     * @param string $prompt The prompt to send to the LLM as user message
257     * @param array $metadata Optional metadata containing template, examples, and snippets
258     * @param bool $useContext Whether to include template and examples in the context (default: true)
259     * @return string The response content from the LLM
260     * @throws Exception If the API request fails or returns unexpected format
261     */
262
263    private function callAPI($command, $prompt, $metadata = [], $useContext = true, $useTools = false)
264    {
265        // Load system prompt which provides general instructions to the LLM
266        $systemPrompt = $this->loadSystemPrompt($command, []);
267
268        // Enhance the prompt with context information from metadata
269        // This provides the LLM with additional context about templates and examples
270        if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) {
271            $contextInfo = "\n\n<context>\n";
272
273            // Add template content if specified in metadata
274            if (!empty($metadata['template'])) {
275                $templateContent = $this->getPageContent($metadata['template']);
276                if ($templateContent !== false) {
277                    $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n";
278                }
279            }
280
281            // Add example pages content if specified in metadata
282            if (!empty($metadata['examples'])) {
283                $examplesContent = [];
284                foreach ($metadata['examples'] as $example) {
285                    $content = $this->getPageContent($example);
286                    if ($content !== false) {
287                        $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n";
288                    }
289                }
290                if (!empty($examplesContent)) {
291                    $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n";
292                }
293            }
294
295            // Add text snippets if specified in metadata
296            if (!empty($metadata['snippets'])) {
297                $snippetsContent = [];
298                foreach ($metadata['snippets'] as $index => $snippet) {
299                    // These are text snippets from ChromaDB
300                    $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n";
301                }
302                if (!empty($snippetsContent)) {
303                    $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n";
304                }
305            }
306
307            $contextInfo .= "\n</context>\n";
308
309            // Append context information to system prompt
310            $prompt = $contextInfo . "\n\n" . $prompt;
311        }
312
313        // Prepare API request data with model parameters
314        $data = [
315            'model' => $this->model,
316            'messages' => [
317                ['role' => 'system', 'content' => $systemPrompt],
318                ['role' => 'user', 'content' => $prompt]
319            ],
320            'max_tokens' => 6144,
321            'stream' => false,
322            'keep_alive' => '30m',
323            'think' => true
324        ];
325
326        // Add tools to the request only if useTools is true
327        if ($useTools) {
328            // Define available tools
329            $data['tools'] = $this->getAvailableTools();
330            $data['tool_choice'] = 'auto';
331            $data['parallel_tool_calls'] = false;
332        }
333
334        // Only add parameters if they are defined and not null
335        if ($this->temperature !== null) {
336            $data['temperature'] = $this->temperature;
337        }
338        if ($this->top_p !== null) {
339            $data['top_p'] = $this->top_p;
340        }
341        if ($this->top_k !== null) {
342            $data['top_k'] = $this->top_k;
343        }
344        if ($this->min_p !== null) {
345            $data['min_p'] = $this->min_p;
346        }
347
348        // Make an API call with tool responses
349        return $this->callAPIWithTools($data, false);
350    }
351
352    /**
353     * Handle tool calls from the LLM
354     *
355     * Processes tool calls made by the LLM and returns appropriate responses.
356     * Implements caching to avoid duplicate calls with identical parameters.
357     *
358     * @param array $toolCall The tool call data from the LLM
359     * @return array The tool response message
360     */
361    private function handleToolCall($toolCall)
362    {
363        $toolName = $toolCall['function']['name'];
364        $arguments = json_decode($toolCall['function']['arguments'], true);
365
366        // Create a cache key from the tool name and arguments
367        $cacheKey = md5($toolName . serialize($arguments));
368
369        // Check if we have a cached result for this tool call
370        if (isset($this->toolCallCache[$cacheKey])) {
371            // Return cached result and indicate it was found in cache
372            $toolResponse = $this->toolCallCache[$cacheKey];
373            // Update with current tool call ID
374            $toolResponse['tool_call_id'] = $toolCall['id'];
375            $toolResponse['cached'] = true; // Indicate this response was cached
376            return $toolResponse;
377        }
378
379        $toolResponse = [
380            'role' => 'tool',
381            'tool_call_id' => $toolCall['id'],
382            'cached' => false // Indicate this is a fresh response
383        ];
384
385        switch ($toolName) {
386            case 'get_document':
387                $documentId = $arguments['id'];
388                $content = $this->getPageContent($documentId);
389                if ($content === false) {
390                    $toolResponse['content'] = 'Document not found: ' . $documentId;
391                } else {
392                    $toolResponse['content'] = $content;
393                }
394                break;
395
396            case 'get_template':
397                // Get template content using the convenience function
398                $toolResponse['content'] = $this->getTemplateContent();
399                break;
400
401            case 'get_examples':
402                // Get examples content using the convenience function
403                $count = isset($arguments['count']) ? (int)$arguments['count'] : 5;
404                $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>';
405                break;
406
407            default:
408                $toolResponse['content'] = 'Unknown tool: ' . $toolName;
409        }
410
411        // Cache the result for future calls with the same parameters
412        $cacheEntry = $toolResponse;
413        // Remove tool_call_id and cached flag from cache as they change per call
414        unset($cacheEntry['tool_call_id']);
415        unset($cacheEntry['cached']);
416        $this->toolCallCache[$cacheKey] = $cacheEntry;
417
418        return $toolResponse;
419    }
420
421    /**
422     * Make an API call with tool responses
423     *
424     * Sends a follow-up request to the LLM with tool responses.
425     * Implements complex logic for handling tool calls with caching and loop protection.
426     *
427     * Complex logic includes:
428     * 1. Making HTTP requests with proper authentication and error handling
429     * 2. Processing tool calls from the LLM response
430     * 3. Caching tool responses to avoid duplicate calls with identical parameters
431     * 4. Tracking tool call counts to prevent infinite loops
432     * 5. Implementing loop protection with call count limits
433     * 6. Handling recursive tool calls until final content is generated
434     *
435     * Loop protection works by:
436     * - Tracking individual tool call counts (max 3 per tool)
437     * - Tracking total tool calls (max 10 total)
438     * - Disabling tools when limits are exceeded to break potential loops
439     *
440     * @param array $data The API request data including messages with tool responses
441     * @param bool $toolsCalled Whether tools have already been called (used for loop protection)
442     * @param bool $useTools Whether to process tool calls (used for loop protection)
443     * @return string The final response content
444     */
445    private function callAPIWithTools($data, $toolsCalled = false, $useTools = true)
446    {
447        // Set up HTTP headers, including authentication if API key is configured
448        $headers = [
449            'Content-Type: application/json'
450        ];
451
452        if (!empty($this->api_key)) {
453            $headers[] = 'Authorization: Bearer ' . $this->api_key;
454        }
455
456       // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops
457        if ($toolsCalled) {
458            unset($data['tools']);
459            unset($data['tool_choice']);
460        }
461
462        // Initialize and configure cURL for the API request
463        $ch = curl_init();
464        curl_setopt($ch, CURLOPT_URL, $this->api_url);
465        curl_setopt($ch, CURLOPT_POST, true);
466        curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
467        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
468        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
469        curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
470        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
471
472        // Execute the API request
473        $response = curl_exec($ch);
474        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
475        $error = curl_error($ch);
476        curl_close($ch);
477
478        // Handle cURL errors
479        if ($error) {
480            throw new Exception('API request failed: ' . $error);
481        }
482
483        // Handle HTTP errors
484        if ($httpCode !== 200) {
485            throw new Exception('API request failed with HTTP code: ' . $httpCode);
486        }
487
488        // Parse and validate the JSON response
489        $result = json_decode($response, true);
490
491        // Extract the content from the response if available
492        if (isset($result['choices'][0]['message']['content'])) {
493            $content = trim($result['choices'][0]['message']['content']);
494            // Reset tool call counts when we get final content
495            $this->toolCallCounts = [];
496            return $content;
497        }
498
499        // Handle tool calls if present
500        if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) {
501            $toolCalls = $result['choices'][0]['message']['tool_calls'];
502            // Start with original messages
503            $messages = $data['messages'];
504            // Add assistant's message with tool calls, keeping all original fields except for content (which is null)
505            $assistantMessage = [];
506            foreach ($result['choices'][0]['message'] as $key => $value) {
507                if ($key !== 'content') {
508                    $assistantMessage[$key] = $value;
509                }
510            }
511            // Add assistant's message with tool calls
512            $messages[] = $assistantMessage;
513
514            // Process each tool call and track counts to prevent infinite loops
515            foreach ($toolCalls as $toolCall) {
516                $toolName = $toolCall['function']['name'];
517                // Increment tool call count
518                if (!isset($this->toolCallCounts[$toolName])) {
519                    $this->toolCallCounts[$toolName] = 0;
520                }
521                $this->toolCallCounts[$toolName]++;
522
523                $toolResponse = $this->handleToolCall($toolCall);
524                $messages[] = $toolResponse;
525            }
526
527            // Check if any tool has been called more than 3 times
528            $toolsCalledCount = 0;
529            foreach ($this->toolCallCounts as $count) {
530                if ($count > 3) {
531                    // If any tool called more than 3 times, disable tools to break loop
532                    $toolsCalled = true;
533                    break;
534                }
535                $toolsCalledCount += $count;
536            }
537
538            // If total tool calls exceed 10, also disable tools
539            if ($toolsCalledCount > 10) {
540                $toolsCalled = true;
541            }
542
543            // Make another API call with tool responses
544            $data['messages'] = $messages;
545            return $this->callAPIWithTools($data, $toolsCalled, $useTools);
546        }
547
548        // Throw exception for unexpected response format
549        throw new Exception('Unexpected API response format');
550    }
551
552    /**
553     * Load a prompt template from a DokuWiki page and replace placeholders
554     *
555     * Loads prompt templates from DokuWiki pages with IDs in the format
556     * dokullm:prompts:LANGUAGE:PROMPT_NAME
557     *
558     * The method implements a language fallback mechanism:
559     * 1. First tries to load the prompt in the configured language
560     * 2. If not found, falls back to English prompts
561     * 3. Throws an exception if neither is available
562     *
563     * After loading the prompt, it scans for placeholders and automatically
564     * adds missing ones with appropriate values before replacing all placeholders.
565     *
566     * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite')
567     * @param array $variables Associative array of placeholder => value pairs
568     * @return string The processed prompt with placeholders replaced
569     * @throws Exception If the prompt page cannot be loaded in any language
570     */
571    private function loadPrompt($promptName, $variables = [])
572    {
573        // Default to 'en' if language is 'default' or not set
574        if ($this->language === 'default' || empty($this->language)) {
575            $this->language = 'en';
576        }
577
578        // Construct the page ID for the prompt in the configured language
579        $promptPageId = 'dokullm:prompts:' . $this->language . ':' . $promptName;
580
581        // Try to get the content of the prompt page in the configured language
582        $prompt = $this->getPageContent($promptPageId);
583
584        // If the language-specific prompt doesn't exist, try English as fallback
585        if ($prompt === false && $this->language !== 'en') {
586            $promptPageId = 'dokullm:prompts:en:' . $promptName;
587            $prompt = $this->getPageContent($promptPageId);
588        }
589
590        // If still no prompt found, throw an exception
591        if ($prompt === false) {
592            throw new Exception('Prompt page not found: ' . $promptPageId);
593        }
594
595        // Find placeholders in the prompt
596        $placeholders = $this->findPlaceholders($prompt);
597
598        // Add missing placeholders with appropriate values
599        foreach ($placeholders as $placeholder) {
600            // Skip if already provided in variables
601            if (isset($variables[$placeholder])) {
602                continue;
603            }
604
605            // Add appropriate values for specific placeholders
606            switch ($placeholder) {
607                case 'template':
608                    // If we have a page_template in variables, use it
609                    $variables[$placeholder] = $this->getTemplateContent($variables['page_template']);
610                    break;
611
612                case 'snippets':
613                    $variables[$placeholder] = $this->getSnippets(10);
614                    break;
615
616                case 'examples':
617                    // If we have example page IDs in metadata, add examples content
618                    $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']);
619                    break;
620
621                case 'previous':
622                    // If we have a previous report page ID in metadata, add previous content
623                    $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']);
624
625                    // Add current and previous dates to metadata
626                    $variables['current_date'] = $this->getPageDate();
627                    $variables['previous_date'] = !empty($variables['page_previous']) ?
628                                                $this->getPageDate($variables['page_previous']) :
629                                                '';
630                    break;
631
632                default:
633                    // For other placeholders, leave them empty or set a default value
634                    $variables[$placeholder] = '';
635                    break;
636            }
637        }
638
639        // Replace placeholders with actual values
640        // Placeholders are in the format {placeholder_name}
641        foreach ($variables as $placeholder => $value) {
642            $prompt = str_replace('{' . $placeholder . '}', $value, $prompt);
643        }
644
645        // Return the processed prompt
646        return $prompt;
647    }
648
649    /**
650     * Load system prompt with optional command-specific appendage
651     *
652     * Loads the main system prompt and appends any command-specific system prompt
653     * if available.
654     *
655     * @param string $action The action/command name
656     * @param array $variables Associative array of placeholder => value pairs
657     * @return string The combined system prompt
658     */
659    private function loadSystemPrompt($action, $variables = [])
660    {
661        // Load system prompt which provides general instructions to the LLM
662        $systemPrompt = $this->loadPrompt('system', $variables);
663
664        // Check if there's a command-specific system prompt appendage
665        if (!empty($action)) {
666            try {
667                $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables);
668                if ($commandSystemPrompt !== false) {
669                    $systemPrompt .= "\n" . $commandSystemPrompt;
670                }
671            } catch (Exception $e) {
672                // Ignore exceptions when loading command-specific system prompt
673                // This allows the main system prompt to still be used
674            }
675        }
676
677        return $systemPrompt;
678    }
679
680    /**
681     * Get the content of a DokuWiki page
682     *
683     * Retrieves the raw content of a DokuWiki page by its ID.
684     * Used for loading template and example page content for context.
685     *
686     * @param string $pageId The page ID to retrieve
687     * @return string|false The page content or false if not found/readable
688     */
689    public function getPageContent($pageId)
690    {
691        // Convert page ID to file path
692        $pageFile = wikiFN($pageId);
693
694        // Check if file exists and is readable
695        if (file_exists($pageFile) && is_readable($pageFile)) {
696            return file_get_contents($pageFile);
697        }
698
699        return false;
700    }
701
702    /**
703     * Extract date from page ID or file timestamp
704     *
705     * Attempts to extract a date in YYmmdd format from the page ID.
706     * If not found, uses the file's last modification timestamp.
707     *
708     * @param string $pageId Optional page ID to extract date from (defaults to current page)
709     * @return string Formatted date string (YYYY-MM-DD)
710     */
711    private function getPageDate($pageId = null)
712    {
713        global $ID;
714
715        // Use provided page ID or current page ID
716        $targetPageId = $pageId ?: $ID;
717
718        // Try to extract date from page ID (looking for YYmmdd pattern)
719        if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) {
720            // Convert YYmmdd to YYYY-MM-DD
721            $year = $matches[1];
722            $month = $matches[2];
723            $day = $matches[3];
724
725            // Assume 20xx for years 00-69, 19xx for years 70-99
726            $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year;
727
728            return $fullYear . '-' . $month . '-' . $day;
729        }
730
731        // Fallback to file timestamp
732        $pageFile = wikiFN($targetPageId);
733        if (file_exists($pageFile)) {
734            $timestamp = filemtime($pageFile);
735            return date('Y-m-d', $timestamp);
736        }
737
738        // Return empty string if no date can be determined
739        return '';
740    }
741
742    /**
743     * Get current text
744     *
745     * Retrieves the current text stored from the process function.
746     *
747     * @return string The current text
748     */
749    private function getCurrentText()
750    {
751        return $this->currentText;
752    }
753
754    /**
755     * Scan text for placeholders
756     *
757     * Finds all placeholders in the format {placeholder_name} in the provided text
758     * and returns an array of unique placeholder names.
759     *
760     * @param string $text The text to scan for placeholders
761     * @return array List of unique placeholder names found in the text
762     */
763    public function findPlaceholders($text)
764    {
765        $placeholders = [];
766        $pattern = '/\{([^}]+)\}/';
767
768        if (preg_match_all($pattern, $text, $matches)) {
769            // Get unique placeholder names
770            $placeholders = array_unique($matches[1]);
771        }
772
773        return $placeholders;
774    }
775
776    /**
777     * Get template content for the current text
778     *
779     * Convenience function to retrieve template content. If a pageId is provided,
780     * retrieves content directly from that page. Otherwise, queries ChromaDB for
781     * a relevant template based on the current text.
782     *
783     * @param string|null $pageId Optional page ID to retrieve template from directly
784     * @return string The template content or empty string if not found
785     */
786    private function getTemplateContent($pageId = null)
787    {
788        // If pageId is provided, use it directly
789        if ($pageId !== null) {
790            $templateContent = $this->getPageContent($pageId);
791            if ($templateContent !== false) {
792                return $templateContent;
793            }
794        }
795
796        // Otherwise, get template suggestion for the current text
797        $pageId = $this->queryChromaDBTemplate($this->getCurrentText());
798        if (!empty($pageId)) {
799            $templateContent = $this->getPageContent($pageId[0]);
800            if ($templateContent !== false) {
801                return $templateContent;
802            }
803        }
804        return '( no template )';
805    }
806
807    /**
808     * Get snippets content for the current text
809     *
810     * Convenience function to retrieve relevant snippets for the current text.
811     * Queries ChromaDB for relevant snippets and returns them formatted.
812     *
813     * @param int $count Number of snippets to retrieve (default: 10)
814     * @return string Formatted snippets content or empty string if not found
815     */
816    private function getSnippets($count = 10)
817    {
818        // Get example snippets for the current text
819        $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count);
820        if (!empty($snippets)) {
821            $formattedSnippets = [];
822            foreach ($snippets as $index => $snippet) {
823                $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>';
824            }
825            return implode("\n", $formattedSnippets);
826        }
827        return '( no examples )';
828    }
829
830    /**
831     * Get examples content from example page IDs
832     *
833     * Convenience function to retrieve content from example pages.
834     * Returns the content of each page packed in XML elements.
835     *
836     * @param array $exampleIds List of example page IDs
837     * @return string Formatted examples content or empty string if not found
838     */
839    private function getExamplesContent($exampleIds = [])
840    {
841        if (empty($exampleIds) || !is_array($exampleIds)) {
842            return '( no examples )';
843        }
844
845        $examplesContent = [];
846        foreach ($exampleIds as $index => $exampleId) {
847            $content = $this->getPageContent($exampleId);
848            if ($content !== false) {
849                $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>';
850            }
851        }
852
853        return implode("\n", $examplesContent);
854    }
855
856    /**
857     * Get previous report content from previous page ID
858     *
859     * Convenience function to retrieve content from a previous report page.
860     * Returns the content of the previous page or a default message if not found.
861     *
862     * @param string $previousId Previous page ID
863     * @return string Previous report content or default message if not found
864     */
865    private function getPreviousContent($previousId = '')
866    {
867        if (empty($previousId)) {
868            return '( no previous report )';
869        }
870
871        $content = $this->getPageContent($previousId);
872        if ($content !== false) {
873            return $content;
874        }
875
876        return '( previous report not found )';
877    }
878
879    /**
880     * Get ChromaDB client with configuration
881     *
882     * Returns the ChromaDB client and collection name.
883     * If a client was passed in the constructor, use it. Otherwise, this method
884     * should not be called as it depends on getConf() which is not available.
885     *
886     * @return array Array containing the ChromaDB client and collection name
887     * @throws Exception If no ChromaDB client is available
888     */
889    private function getChromaDBClient()
890    {
891        // If we have a ChromaDB client passed in constructor, use it
892        if ($this->chromaClient !== null) {
893            // Get the collection name based on the current page ID
894            $chromaDefaultCollection = 'documents'; // Default fallback
895            global $ID;
896            $chromaCollection = $chromaDefaultCollection;
897
898            if (!empty($ID)) {
899                // Split the page ID by ':' and take the first part as collection name
900                $parts = explode(':', $ID);
901                if (isset($parts[0]) && !empty($parts[0])) {
902                    // If the first part is 'playground', use the default collection
903                    // Otherwise, use the first part as the collection name
904                    if ($parts[0] === 'playground') {
905                        $chromaCollection = $chromaDefaultCollection;
906                    } else {
907                        $chromaCollection = $parts[0];
908                    }
909                }
910            }
911
912            return [$this->chromaClient, $chromaCollection];
913        }
914
915        // If we don't have a ChromaDB client, we can't create one here
916        // because getConf() is not available in this context
917        throw new Exception('No ChromaDB client available');
918    }
919
920    /**
921     * Query ChromaDB for relevant documents
922     *
923     * Generates embeddings for the input text and queries ChromaDB for similar documents.
924     * Extracts modality from the current page ID to use as the collection name.
925     *
926     * @param string $text The text to find similar documents for
927     * @param int $limit Maximum number of documents to retrieve (default: 5)
928     * @param array|null $where Optional filter conditions for metadata
929     * @return array List of document IDs
930     */
931    private function queryChromaDB($text, $limit = 5, $where = null)
932    {
933        try {
934            // Get ChromaDB client and collection name
935            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
936            // Query for similar documents
937            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
938
939            // Extract document IDs from results
940            $documentIds = [];
941            if (isset($results['ids'][0]) && is_array($results['ids'][0])) {
942                foreach ($results['ids'][0] as $id) {
943                    // Use the ChromaDB ID directly without conversion
944                    $documentIds[] = $id;
945                }
946            }
947
948            return $documentIds;
949        } catch (Exception $e) {
950            // Log error but don't fail the operation
951            error_log('ChromaDB query failed: ' . $e->getMessage());
952            return [];
953        }
954    }
955
956    /**
957     * Query ChromaDB for relevant documents and return text snippets
958     *
959     * Generates embeddings for the input text and queries ChromaDB for similar documents.
960     * Returns the actual text snippets instead of document IDs.
961     *
962     * @param string $text The text to find similar documents for
963     * @param int $limit Maximum number of documents to retrieve (default: 10)
964     * @param array|null $where Optional filter conditions for metadata
965     * @return array List of text snippets
966     */
967    private function queryChromaDBSnippets($text, $limit = 10, $where = null)
968    {
969        try {
970            // Get ChromaDB client and collection name
971            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
972            // Query for similar documents
973            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
974
975            // Extract document texts from results
976            $snippets = [];
977            if (isset($results['documents'][0]) && is_array($results['documents'][0])) {
978                foreach ($results['documents'][0] as $document) {
979                    $snippets[] = $document;
980                }
981            }
982
983            return $snippets;
984        } catch (Exception $e) {
985            // Log error but don't fail the operation
986            error_log('ChromaDB query failed: ' . $e->getMessage());
987            return [];
988        }
989    }
990
991    /**
992     * Query ChromaDB for a template document
993     *
994     * Generates embeddings for the input text and queries ChromaDB for a template document
995     * by filtering with metadata 'template=true'.
996     *
997     * @param string $text The text to find a template for
998     * @return array List of template document IDs (maximum 1)
999     */
1000    public function queryChromaDBTemplate($text)
1001    {
1002        $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']);
1003
1004        // Remove chunk number (e.g., "@2") from the ID to get the base document ID
1005        if (!empty($templateIds)) {
1006            $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]);
1007        }
1008
1009        return $templateIds;
1010    }
1011
1012}
1013