xref: /plugin/dokullm/LlmClient.php (revision e2481ee1ee522bf07dcc8cb05b808a10fecfa8d2)
1<?php
2namespace dokuwiki\plugin\dokullm;
3
4use Exception;
5
6/**
7 * LLM Client for the dokullm plugin
8 *
9 * This class provides methods to interact with an LLM API for various
10 * text processing tasks such as completion, rewriting, grammar correction,
11 * summarization, conclusion creation, text analysis, and custom prompts.
12 *
13 * The client handles:
14 * - API configuration and authentication
15 * - Prompt template loading and processing
16 * - Context-aware requests with metadata
17 * - DokuWiki page content retrieval
18 */
19
20// must be run within Dokuwiki
21if (!defined('DOKU_INC')) {
22    die();
23}
24
25/**
26 * LLM Client class for handling API communications
27 *
28 * Manages configuration settings and provides methods for various
29 * text processing operations through an LLM API.
30 * Implements caching for tool calls to avoid duplicate processing.
31 */
32class LlmClient
33{
34    /** @var string The API endpoint URL */
35    private $api_url;
36
37    /** @var array Cache for tool call results */
38    private $toolCallCache = [];
39
40    /** @var string Current text for tool usage */
41    private $currentText = '';
42
43    /** @var array Track tool call counts to prevent infinite loops */
44    private $toolCallCounts = [];
45
46    /** @var string The API authentication key */
47    private $api_key;
48
49    /** @var string The model identifier to use */
50    private $model;
51
52    /** @var int The request timeout in seconds */
53    private $timeout;
54
55    /** @var float The temperature setting for response randomness */
56    private $temperature;
57
58    /** @var float The top-p setting for nucleus sampling */
59    private $top_p;
60
61    /** @var int The top-k setting for token selection */
62    private $top_k;
63
64    /** @var float The min-p setting for minimum probability threshold */
65    private $min_p;
66
67    /** @var bool Whether to enable thinking in LLM responses */
68    private $think;
69
70    /** @var object|null ChromaDB client instance */
71    private $chromaClient;
72
73    /** @var string|null Page ID */
74    private $pageId;
75
76    /**
77     * Initialize the LLM client with configuration settings
78     *
79     * Retrieves configuration values from DokuWiki's configuration system
80     * for API URL, key, model, timeout, and LLM sampling parameters.
81     *
82     * Configuration values:
83     * - api_url: The LLM API endpoint URL
84     * - api_key: Authentication key for the API (optional)
85     * - model: The model identifier to use for requests
86     * - timeout: Request timeout in seconds
87     * - profile: Profile for prompt templates
88     * - temperature: Temperature setting for response randomness (0.0-1.0)
89     * - top_p: Top-p (nucleus sampling) setting (0.0-1.0)
90     * - top_k: Top-k setting (integer >= 1)
91     * - min_p: Minimum probability threshold (0.0-1.0)
92     * - think: Whether to enable thinking in LLM responses (boolean)
93     * - chromaClient: ChromaDB client instance (optional)
94     * - pageId: Page ID (optional)
95     */
96    public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null, $profile = null, $chromaClient = null, $pageId = null)
97    {
98        $this->api_url = $api_url;
99        $this->api_key = $api_key;
100        $this->model = $model;
101        $this->timeout = $timeout;
102        $this->temperature = $temperature;
103        $this->top_p = $top_p;
104        $this->top_k = $top_k;
105        $this->min_p = $min_p;
106        $this->think = $think;
107        $this->profile = $profile;
108        $this->chromaClient = $chromaClient;
109        $this->pageId = $pageId;
110    }
111
112
113
114    public function process($action, $text, $metadata = [], $useContext = true)
115    {
116        // Store the current text for tool usage
117        $this->currentText = $text;
118
119        // Add text, think and action to metadata
120        $metadata['text'] = $text;
121        $metadata['think'] = $this->think ? '/think' : '/no_think';
122        $metadata['action'] = $action;
123
124        // If we have 'template' in metadata, move it to 'page_template'
125        if (isset($metadata['template'])) {
126            $metadata['page_template'] = $metadata['template'];
127            unset($metadata['template']);
128        }
129
130        // If we have 'examples' in metadata, move it to 'page_examples'
131        if (isset($metadata['examples'])) {
132            $metadata['page_examples'] = $metadata['examples'];
133            unset($metadata['examples']);
134        }
135
136        // If we have 'previous' in metadata, move it to 'page_previous'
137        if (isset($metadata['previous'])) {
138            $metadata['page_previous'] = $metadata['previous'];
139            unset($metadata['previous']);
140        }
141
142        $prompt = $this->loadPrompt($action, $metadata);
143
144        return $this->callAPI($action, $prompt, $metadata, $useContext);
145    }
146
147    /**
148     * Process text with a custom user prompt
149     *
150     * Sends a custom prompt to the LLM along with the provided text.
151     *
152     * @param string $text The text to process
153     * @param string $customPrompt The custom prompt to use
154     * @param array $metadata Optional metadata containing template and examples
155     * @param bool $useContext Whether to include template and examples in the context (default: true)
156     * @return string The processed text
157     */
158    public function processCustomPrompt($text, $metadata = [], $useContext = true)
159    {
160        // Store the current text for tool usage
161        $this->currentText = $text;
162
163        // Format the prompt with the text and custom prompt
164        $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text;
165
166        return $this->callAPI('custom', $prompt, $metadata, $useContext);
167    }
168
169    /**
170     * Get the list of available tools for the LLM
171     *
172     * Defines the tools that can be used by the LLM during processing.
173     *
174     * @return array List of tool definitions
175     */
176    private function getAvailableTools()
177    {
178        return [
179            [
180                'type' => 'function',
181                'function' => [
182                    'name' => 'get_document',
183                    'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.',
184                    'parameters' => [
185                        'type' => 'object',
186                        'properties' => [
187                            'id' => [
188                                'type' => 'string',
189                                'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.'
190                            ]
191                        ],
192                        'required' => ['id']
193                    ]
194                ]
195            ],
196            [
197                'type' => 'function',
198                'function' => [
199                    'name' => 'get_template',
200                    'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.',
201                    'parameters' => [
202                        'type' => 'object',
203                        'properties' => [
204                            'type' => [
205                                'type' => 'string',
206                                'description' => 'The type of the template (e.g., "mri" for MRI reports, "daily" for daily reports).',
207                                'default' => ''
208                            ]
209                        ]
210                    ]
211                ]
212            ],
213            [
214                'type' => 'function',
215                'function' => [
216                    'name' => 'get_examples',
217                    'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.',
218                    'parameters' => [
219                        'type' => 'object',
220                        'properties' => [
221                            'count' => [
222                                'type' => 'integer',
223                                'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.',
224                                'default' => 5
225                            ]
226                        ]
227                    ]
228                ]
229            ]
230        ];
231    }
232
233    /**
234     * Call the LLM API with the specified prompt
235     *
236     * Makes an HTTP POST request to the configured API endpoint with
237     * the prompt and other parameters. Handles authentication if an
238     * API key is configured.
239     *
240     * The method constructs a conversation with system and user messages,
241     * including context information from metadata when available.
242     *
243     * Complex logic includes:
244     * 1. Loading and enhancing the system prompt with metadata context
245     * 2. Building the API request with model parameters
246     * 3. Handling authentication with API key if configured
247     * 4. Making the HTTP request with proper error handling
248     * 5. Parsing and validating the API response
249     * 6. Supporting tool usage with automatic tool calling when enabled
250     * 7. Implementing context enhancement with templates, examples, and snippets
251     *
252     * The context information includes:
253     * - Template content: Used as a starting point for the response
254     * - Example pages: Full content of specified example pages
255     * - Text snippets: Relevant text examples from ChromaDB
256     *
257     * When tools are enabled, the method supports automatic tool calling:
258     * - Tools can retrieve documents, templates, and examples as needed
259     * - Tool responses are cached to avoid duplicate calls with identical parameters
260     * - Infinite loop protection prevents excessive tool calls
261     *
262     * @param string $command The command name for loading command-specific system prompts
263     * @param string $prompt The prompt to send to the LLM as user message
264     * @param array $metadata Optional metadata containing template, examples, and snippets
265     * @param bool $useContext Whether to include template and examples in the context (default: true)
266     * @return string The response content from the LLM
267     * @throws Exception If the API request fails or returns unexpected format
268     */
269
270    private function callAPI($command, $prompt, $metadata = [], $useContext = true, $useTools = false)
271    {
272        // Load system prompt which provides general instructions to the LLM
273        $systemPrompt = $this->loadSystemPrompt($command, []);
274
275        // Enhance the prompt with context information from metadata
276        // This provides the LLM with additional context about templates and examples
277        if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) {
278            $contextInfo = "\n\n<context>\n";
279
280            // Add template content if specified in metadata
281            if (!empty($metadata['template'])) {
282                $templateContent = $this->getPageContent($metadata['template']);
283                if ($templateContent !== false) {
284                    $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n";
285                }
286            }
287
288            // Add example pages content if specified in metadata
289            if (!empty($metadata['examples'])) {
290                $examplesContent = [];
291                foreach ($metadata['examples'] as $example) {
292                    $content = $this->getPageContent($example);
293                    if ($content !== false) {
294                        $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n";
295                    }
296                }
297                if (!empty($examplesContent)) {
298                    $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n";
299                }
300            }
301
302            // Add text snippets if specified in metadata
303            if (!empty($metadata['snippets'])) {
304                $snippetsContent = [];
305                foreach ($metadata['snippets'] as $index => $snippet) {
306                    // These are text snippets from ChromaDB
307                    $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n";
308                }
309                if (!empty($snippetsContent)) {
310                    $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n";
311                }
312            }
313
314            $contextInfo .= "\n</context>\n";
315
316            // Append context information to system prompt
317            $prompt = $contextInfo . "\n\n" . $prompt;
318        }
319
320        // Prepare API request data with model parameters
321        $data = [
322            'model' => $this->model,
323            'messages' => [
324                ['role' => 'system', 'content' => $systemPrompt],
325                ['role' => 'user', 'content' => $prompt]
326            ],
327            'max_tokens' => 6144,
328            'stream' => false,
329            'keep_alive' => '30m',
330            'think' => true
331        ];
332
333        // Add tools to the request only if useTools is true
334        if ($useTools) {
335            // Define available tools
336            $data['tools'] = $this->getAvailableTools();
337            $data['tool_choice'] = 'auto';
338            $data['parallel_tool_calls'] = false;
339        }
340
341        // Only add parameters if they are defined and not null
342        if ($this->temperature !== null) {
343            $data['temperature'] = $this->temperature;
344        }
345        if ($this->top_p !== null) {
346            $data['top_p'] = $this->top_p;
347        }
348        if ($this->top_k !== null) {
349            $data['top_k'] = $this->top_k;
350        }
351        if ($this->min_p !== null) {
352            $data['min_p'] = $this->min_p;
353        }
354
355        // Make an API call with tool responses
356        return $this->callAPIWithTools($data, false);
357    }
358
359    /**
360     * Handle tool calls from the LLM
361     *
362     * Processes tool calls made by the LLM and returns appropriate responses.
363     * Implements caching to avoid duplicate calls with identical parameters.
364     *
365     * @param array $toolCall The tool call data from the LLM
366     * @return array The tool response message
367     */
368    private function handleToolCall($toolCall)
369    {
370        $toolName = $toolCall['function']['name'];
371        $arguments = json_decode($toolCall['function']['arguments'], true);
372
373        // Create a cache key from the tool name and arguments
374        $cacheKey = md5($toolName . serialize($arguments));
375
376        // Check if we have a cached result for this tool call
377        if (isset($this->toolCallCache[$cacheKey])) {
378            // Return cached result and indicate it was found in cache
379            $toolResponse = $this->toolCallCache[$cacheKey];
380            // Update with current tool call ID
381            $toolResponse['tool_call_id'] = $toolCall['id'];
382            $toolResponse['cached'] = true; // Indicate this response was cached
383            return $toolResponse;
384        }
385
386        $toolResponse = [
387            'role' => 'tool',
388            'tool_call_id' => $toolCall['id'],
389            'cached' => false // Indicate this is a fresh response
390        ];
391
392        switch ($toolName) {
393            case 'get_document':
394                $documentId = $arguments['id'];
395                $content = $this->getPageContent($documentId);
396                if ($content === false) {
397                    $toolResponse['content'] = 'Document not found: ' . $documentId;
398                } else {
399                    $toolResponse['content'] = $content;
400                }
401                break;
402
403            case 'get_template':
404                // Get template content using the convenience function
405                $toolResponse['content'] = $this->getTemplateContent();
406                break;
407
408            case 'get_examples':
409                // Get examples content using the convenience function
410                $count = isset($arguments['count']) ? (int)$arguments['count'] : 5;
411                $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>';
412                break;
413
414            default:
415                $toolResponse['content'] = 'Unknown tool: ' . $toolName;
416        }
417
418        // Cache the result for future calls with the same parameters
419        $cacheEntry = $toolResponse;
420        // Remove tool_call_id and cached flag from cache as they change per call
421        unset($cacheEntry['tool_call_id']);
422        unset($cacheEntry['cached']);
423        $this->toolCallCache[$cacheKey] = $cacheEntry;
424
425        return $toolResponse;
426    }
427
428    /**
429     * Make an API call with tool responses
430     *
431     * Sends a follow-up request to the LLM with tool responses.
432     * Implements complex logic for handling tool calls with caching and loop protection.
433     *
434     * Complex logic includes:
435     * 1. Making HTTP requests with proper authentication and error handling
436     * 2. Processing tool calls from the LLM response
437     * 3. Caching tool responses to avoid duplicate calls with identical parameters
438     * 4. Tracking tool call counts to prevent infinite loops
439     * 5. Implementing loop protection with call count limits
440     * 6. Handling recursive tool calls until final content is generated
441     *
442     * Loop protection works by:
443     * - Tracking individual tool call counts (max 3 per tool)
444     * - Tracking total tool calls (max 10 total)
445     * - Disabling tools when limits are exceeded to break potential loops
446     *
447     * @param array $data The API request data including messages with tool responses
448     * @param bool $toolsCalled Whether tools have already been called (used for loop protection)
449     * @param bool $useTools Whether to process tool calls (used for loop protection)
450     * @return string The final response content
451     */
452    private function callAPIWithTools($data, $toolsCalled = false, $useTools = true)
453    {
454        // Set up HTTP headers, including authentication if API key is configured
455        $headers = [
456            'Content-Type: application/json'
457        ];
458
459        if (!empty($this->api_key)) {
460            $headers[] = 'Authorization: Bearer ' . $this->api_key;
461        }
462
463       // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops
464        if ($toolsCalled) {
465            unset($data['tools']);
466            unset($data['tool_choice']);
467        }
468
469        // Initialize and configure cURL for the API request
470        $ch = curl_init();
471        curl_setopt($ch, CURLOPT_URL, $this->api_url);
472        curl_setopt($ch, CURLOPT_POST, true);
473        curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
474        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
475        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
476        curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
477        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
478
479        // Execute the API request
480        $response = curl_exec($ch);
481        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
482        $error = curl_error($ch);
483        curl_close($ch);
484
485        // Handle cURL errors
486        if ($error) {
487            throw new Exception('API request failed: ' . $error);
488        }
489
490        // Handle HTTP errors
491        if ($httpCode !== 200) {
492            throw new Exception('API request failed with HTTP code: ' . $httpCode);
493        }
494
495        // Parse and validate the JSON response
496        $result = json_decode($response, true);
497
498        // Extract the content from the response if available
499        if (isset($result['choices'][0]['message']['content'])) {
500            $content = trim($result['choices'][0]['message']['content']);
501            // Reset tool call counts when we get final content
502            $this->toolCallCounts = [];
503            return $content;
504        }
505
506        // Handle tool calls if present
507        if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) {
508            $toolCalls = $result['choices'][0]['message']['tool_calls'];
509            // Start with original messages
510            $messages = $data['messages'];
511            // Add assistant's message with tool calls, keeping all original fields except for content (which is null)
512            $assistantMessage = [];
513            foreach ($result['choices'][0]['message'] as $key => $value) {
514                if ($key !== 'content') {
515                    $assistantMessage[$key] = $value;
516                }
517            }
518            // Add assistant's message with tool calls
519            $messages[] = $assistantMessage;
520
521            // Process each tool call and track counts to prevent infinite loops
522            foreach ($toolCalls as $toolCall) {
523                $toolName = $toolCall['function']['name'];
524                // Increment tool call count
525                if (!isset($this->toolCallCounts[$toolName])) {
526                    $this->toolCallCounts[$toolName] = 0;
527                }
528                $this->toolCallCounts[$toolName]++;
529
530                $toolResponse = $this->handleToolCall($toolCall);
531                $messages[] = $toolResponse;
532            }
533
534            // Check if any tool has been called more than 3 times
535            $toolsCalledCount = 0;
536            foreach ($this->toolCallCounts as $count) {
537                if ($count > 3) {
538                    // If any tool called more than 3 times, disable tools to break loop
539                    $toolsCalled = true;
540                    break;
541                }
542                $toolsCalledCount += $count;
543            }
544
545            // If total tool calls exceed 10, also disable tools
546            if ($toolsCalledCount > 10) {
547                $toolsCalled = true;
548            }
549
550            // Make another API call with tool responses
551            $data['messages'] = $messages;
552            return $this->callAPIWithTools($data, $toolsCalled, $useTools);
553        }
554
555        // Throw exception for unexpected response format
556        throw new Exception('Unexpected API response format');
557    }
558
559    /**
560     * Load a prompt template from a DokuWiki page and replace placeholders
561     *
562     * Loads prompt templates from DokuWiki pages with IDs in the format
563     * dokullm:profiles:PROFILE:PROMPT_NAME
564     *
565     * The method implements a profile fallback mechanism:
566     * 1. First tries to load the prompt from the configured profile
567     * 2. If not found, falls back to default prompts
568     * 3. Throws an exception if neither is available
569     *
570     * After loading the prompt, it scans for placeholders and automatically
571     * adds missing ones with appropriate values before replacing all placeholders.
572     *
573     * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite')
574     * @param array $variables Associative array of placeholder => value pairs
575     * @return string The processed prompt with placeholders replaced
576     * @throws Exception If the prompt page cannot be loaded from any profile
577     */
578    private function loadPrompt($promptName, $variables = [])
579    {
580        // Default to 'default' if profile is not set
581        if (empty($this->profile)) {
582            $this->profile = 'default';
583        }
584
585        // Construct the page ID for the prompt in the configured profile
586        $promptPageId = 'dokullm:profiles:' . $this->profile . ':' . $promptName;
587
588        // Try to get the content of the prompt page in the configured profile
589        $prompt = $this->getPageContent($promptPageId);
590
591        // If the profile-specific prompt doesn't exist, try default as fallback
592        if ($prompt === false && $this->profile !== 'default') {
593            $promptPageId = 'dokullm:profile:default:' . $promptName;
594            $prompt = $this->getPageContent($promptPageId);
595        }
596
597        // If still no prompt found, throw an exception
598        if ($prompt === false) {
599            throw new Exception('Prompt page not found: ' . $promptPageId);
600        }
601
602        // Find placeholders in the prompt
603        $placeholders = $this->findPlaceholders($prompt);
604
605        // Add missing placeholders with appropriate values
606        foreach ($placeholders as $placeholder) {
607            // Skip if already provided in variables
608            if (isset($variables[$placeholder])) {
609                continue;
610            }
611
612            // Add appropriate values for specific placeholders
613            switch ($placeholder) {
614                case 'template':
615                    // If we have a page_template in variables, use it
616                    $variables[$placeholder] = $this->getTemplateContent($variables['page_template']);
617                    break;
618
619                case 'snippets':
620                    $variables[$placeholder] = $this->chromaClient !== null ? $this->getSnippets(10) : '( no examples )';
621                    break;
622
623                case 'examples':
624                    // If we have example page IDs in metadata, add examples content
625                    $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']);
626                    break;
627
628                case 'previous':
629                    // If we have a previous report page ID in metadata, add previous content
630                    $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']);
631
632                    // Add current and previous dates to metadata
633                    $variables['current_date'] = $this->getPageDate($this->pageId);
634                    $variables['previous_date'] = !empty($variables['page_previous']) ?
635                                                $this->getPageDate($variables['page_previous']) :
636                                                '';
637                    break;
638
639                default:
640                    // For other placeholders, leave them empty or set a default value
641                    $variables[$placeholder] = '';
642                    break;
643            }
644        }
645
646        // Replace placeholders with actual values
647        // Placeholders are in the format {placeholder_name}
648        foreach ($variables as $placeholder => $value) {
649            $prompt = str_replace('{' . $placeholder . '}', $value, $prompt);
650        }
651
652        // Return the processed prompt
653        return $prompt;
654    }
655
656    /**
657     * Load system prompt with optional command-specific appendage
658     *
659     * Loads the main system prompt and appends any command-specific system prompt
660     * if available.
661     *
662     * @param string $action The action/command name
663     * @param array $variables Associative array of placeholder => value pairs
664     * @return string The combined system prompt
665     */
666    private function loadSystemPrompt($action, $variables = [])
667    {
668        // Load system prompt which provides general instructions to the LLM
669        $systemPrompt = $this->loadPrompt('system', $variables);
670
671        // Check if there's a command-specific system prompt appendage
672        if (!empty($action)) {
673            try {
674                $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables);
675                if ($commandSystemPrompt !== false) {
676                    $systemPrompt .= "\n" . $commandSystemPrompt;
677                }
678            } catch (Exception $e) {
679                // Ignore exceptions when loading command-specific system prompt
680                // This allows the main system prompt to still be used
681            }
682        }
683
684        return $systemPrompt;
685    }
686
687    /**
688     * Get the content of a DokuWiki page
689     *
690     * Retrieves the raw content of a DokuWiki page by its ID.
691     * Used for loading template and example page content for context.
692     *
693     * @param string $pageId The page ID to retrieve
694     * @return string|false The page content or false if not found/readable
695     */
696    public function getPageContent($pageId)
697    {
698        // Convert page ID to file path
699        $pageFile = wikiFN($pageId);
700
701        // Check if file exists and is readable
702        if (file_exists($pageFile) && is_readable($pageFile)) {
703            return file_get_contents($pageFile);
704        }
705
706        return false;
707    }
708
709    /**
710     * Extract date from page ID or file timestamp
711     *
712     * Attempts to extract a date in YYmmdd format from the page ID.
713     * If not found, uses the file's last modification timestamp.
714     *
715     * @param string $pageId Optional page ID to extract date from (defaults to current page)
716     * @return string Formatted date string (YYYY-MM-DD)
717     */
718    private function getPageDate($pageId = null)
719    {
720        // Use provided page ID or current page ID
721        $targetPageId = $pageId ?: $this->pageId;
722
723        // Try to extract date from page ID (looking for YYmmdd pattern)
724        if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) {
725            // Convert YYmmdd to YYYY-MM-DD
726            $year = $matches[1];
727            $month = $matches[2];
728            $day = $matches[3];
729
730            // Assume 20xx for years 00-69, 19xx for years 70-99
731            $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year;
732
733            return $fullYear . '-' . $month . '-' . $day;
734        }
735
736        // Fallback to file timestamp
737        $pageFile = wikiFN($targetPageId);
738        if (file_exists($pageFile)) {
739            $timestamp = filemtime($pageFile);
740            return date('Y-m-d', $timestamp);
741        }
742
743        // Return empty string if no date can be determined
744        return '';
745    }
746
747    /**
748     * Get current text
749     *
750     * Retrieves the current text stored from the process function.
751     *
752     * @return string The current text
753     */
754    private function getCurrentText()
755    {
756        return $this->currentText;
757    }
758
759    /**
760     * Scan text for placeholders
761     *
762     * Finds all placeholders in the format {placeholder_name} in the provided text
763     * and returns an array of unique placeholder names.
764     *
765     * @param string $text The text to scan for placeholders
766     * @return array List of unique placeholder names found in the text
767     */
768    public function findPlaceholders($text)
769    {
770        $placeholders = [];
771        $pattern = '/\{([^}]+)\}/';
772
773        if (preg_match_all($pattern, $text, $matches)) {
774            // Get unique placeholder names
775            $placeholders = array_unique($matches[1]);
776        }
777
778        return $placeholders;
779    }
780
781    /**
782     * Get template content for the current text
783     *
784     * Convenience function to retrieve template content. If a pageId is provided,
785     * retrieves content directly from that page. Otherwise, queries ChromaDB for
786     * a relevant template based on the current text.
787     *
788     * @param string|null $pageId Optional page ID to retrieve template from directly
789     * @return string The template content or empty string if not found
790     */
791    private function getTemplateContent($pageId = null)
792    {
793        // If pageId is provided, use it directly
794        if ($pageId !== null) {
795            $templateContent = $this->getPageContent($pageId);
796            if ($templateContent !== false) {
797                return $templateContent;
798            }
799        }
800
801        // If ChromaDB is disabled, return empty template
802        if ($this->chromaClient === null) {
803            return '( no template )';
804        }
805
806        // Otherwise, get template suggestion for the current text
807        $pageId = $this->queryChromaDBTemplate($this->getCurrentText());
808        if (!empty($pageId)) {
809            $templateContent = $this->getPageContent($pageId[0]);
810            if ($templateContent !== false) {
811                return $templateContent;
812            }
813        }
814        return '( no template )';
815    }
816
817    /**
818     * Get snippets content for the current text
819     *
820     * Convenience function to retrieve relevant snippets for the current text.
821     * Queries ChromaDB for relevant snippets and returns them formatted.
822     *
823     * @param int $count Number of snippets to retrieve (default: 10)
824     * @return string Formatted snippets content or empty string if not found
825     */
826    private function getSnippets($count = 10)
827    {
828        // If ChromaDB is disabled, return empty snippets
829        if ($this->chromaClient === null) {
830            return '( no examples )';
831        }
832
833        // Get example snippets for the current text
834        $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count);
835        if (!empty($snippets)) {
836            $formattedSnippets = [];
837            foreach ($snippets as $index => $snippet) {
838                $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>';
839            }
840            return implode("\n", $formattedSnippets);
841        }
842        return '( no examples )';
843    }
844
845    /**
846     * Get examples content from example page IDs
847     *
848     * Convenience function to retrieve content from example pages.
849     * Returns the content of each page packed in XML elements.
850     *
851     * @param array $exampleIds List of example page IDs
852     * @return string Formatted examples content or empty string if not found
853     */
854    private function getExamplesContent($exampleIds = [])
855    {
856        if (empty($exampleIds) || !is_array($exampleIds)) {
857            return '( no examples )';
858        }
859
860        $examplesContent = [];
861        foreach ($exampleIds as $index => $exampleId) {
862            $content = $this->getPageContent($exampleId);
863            if ($content !== false) {
864                $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>';
865            }
866        }
867
868        return implode("\n", $examplesContent);
869    }
870
871    /**
872     * Get previous report content from previous page ID
873     *
874     * Convenience function to retrieve content from a previous report page.
875     * Returns the content of the previous page or a default message if not found.
876     *
877     * @param string $previousId Previous page ID
878     * @return string Previous report content or default message if not found
879     */
880    private function getPreviousContent($previousId = '')
881    {
882        if (empty($previousId)) {
883            return '( no previous report )';
884        }
885
886        $content = $this->getPageContent($previousId);
887        if ($content !== false) {
888            return $content;
889        }
890
891        return '( previous report not found )';
892    }
893
894    /**
895     * Get ChromaDB client with configuration
896     *
897     * Returns the ChromaDB client and collection name.
898     * If a client was passed in the constructor, use it. Otherwise, this method
899     * should not be called as it depends on getConf() which is not available.
900     *
901     * @return array Array containing the ChromaDB client and collection name
902     * @throws Exception If no ChromaDB client is available
903     */
904    private function getChromaDBClient()
905    {
906        // If we have a ChromaDB client passed in constructor, use it
907        if ($this->chromaClient !== null) {
908            // Get the collection name based on the page ID
909	    // FIXME
910            $chromaCollection = 'reports';
911            $pageId = $pageId;
912
913            if (!empty($this->pageId)) {
914                // Split the page ID by ':' and take the first part as collection name
915                $parts = explode(':', $this->pageId);
916                if (isset($parts[0]) && !empty($parts[0])) {
917                    // If the first part is 'playground', use the default collection
918                    // Otherwise, use the first part as the collection name
919                    if ($parts[0] === 'playground') {
920                        $chromaCollection = '';
921                    } else {
922                        $chromaCollection = $parts[0];
923                    }
924                }
925            }
926
927            return [$this->chromaClient, $chromaCollection];
928        }
929
930        // If we don't have a ChromaDB client, we can't create one here
931        // because getConf() is not available in this context
932        throw new Exception('No ChromaDB client available');
933    }
934
935    /**
936     * Query ChromaDB for relevant documents
937     *
938     * Generates embeddings for the input text and queries ChromaDB for similar documents.
939     * Extracts modality from the current page ID to use as the collection name.
940     *
941     * @param string $text The text to find similar documents for
942     * @param int $limit Maximum number of documents to retrieve (default: 5)
943     * @param array|null $where Optional filter conditions for metadata
944     * @return array List of document IDs
945     */
946    private function queryChromaDB($text, $limit = 5, $where = null)
947    {
948        try {
949            // Get ChromaDB client and collection name
950            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
951            // Query for similar documents
952            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
953
954            // Extract document IDs from results
955            $documentIds = [];
956            if (isset($results['ids'][0]) && is_array($results['ids'][0])) {
957                foreach ($results['ids'][0] as $id) {
958                    // Use the ChromaDB ID directly without conversion
959                    $documentIds[] = $id;
960                }
961            }
962
963            return $documentIds;
964        } catch (Exception $e) {
965            // Log error but don't fail the operation
966            error_log('ChromaDB query failed: ' . $e->getMessage());
967            return [];
968        }
969    }
970
971    /**
972     * Query ChromaDB for relevant documents and return text snippets
973     *
974     * Generates embeddings for the input text and queries ChromaDB for similar documents.
975     * Returns the actual text snippets instead of document IDs.
976     *
977     * @param string $text The text to find similar documents for
978     * @param int $limit Maximum number of documents to retrieve (default: 10)
979     * @param array|null $where Optional filter conditions for metadata
980     * @return array List of text snippets
981     */
982    private function queryChromaDBSnippets($text, $limit = 10, $where = null)
983    {
984        try {
985            // Get ChromaDB client and collection name
986            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
987            // Query for similar documents
988            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
989
990            // Extract document texts from results
991            $snippets = [];
992            if (isset($results['documents'][0]) && is_array($results['documents'][0])) {
993                foreach ($results['documents'][0] as $document) {
994                    $snippets[] = $document;
995                }
996            }
997
998            return $snippets;
999        } catch (Exception $e) {
1000            // Log error but don't fail the operation
1001            error_log('ChromaDB query failed: ' . $e->getMessage());
1002            return [];
1003        }
1004    }
1005
1006    /**
1007     * Query ChromaDB for a template document
1008     *
1009     * Generates embeddings for the input text and queries ChromaDB for a template document
1010     * by filtering with metadata 'template=true'.
1011     *
1012     * @param string $text The text to find a template for
1013     * @return array List of template document IDs (maximum 1)
1014     */
1015    public function queryChromaDBTemplate($text)
1016    {
1017        $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']);
1018
1019        // Remove chunk number (e.g., "@2") from the ID to get the base document ID
1020        if (!empty($templateIds)) {
1021            $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]);
1022        }
1023
1024        return $templateIds;
1025    }
1026
1027}
1028