xref: /plugin/dokullm/LlmClient.php (revision 8dbd6d13a4660f9898be7dfef01d5e569c9ff459)
1<?php
2namespace dokuwiki\plugin\dokullm;
3
4/**
5 * LLM Client for the dokullm plugin
6 *
7 * This class provides methods to interact with an LLM API for various
8 * text processing tasks such as completion, rewriting, grammar correction,
9 * summarization, conclusion creation, text analysis, and custom prompts.
10 *
11 * The client handles:
12 * - API configuration and authentication
13 * - Prompt template loading and processing
14 * - Context-aware requests with metadata
15 * - DokuWiki page content retrieval
16 */
17
18// must be run within Dokuwiki
19if (!defined('DOKU_INC')) {
20    die();
21}
22
23
24
25/**
26 * LLM Client class for handling API communications
27 *
28 * Manages configuration settings and provides methods for various
29 * text processing operations through an LLM API.
30 * Implements caching for tool calls to avoid duplicate processing.
31 */
32class LlmClient
33{
34    /** @var string The API endpoint URL */
35    private $api_url;
36
37    /** @var array Cache for tool call results */
38    private $toolCallCache = [];
39
40    /** @var string Current text for tool usage */
41    private $currentText = '';
42
43    /** @var array Track tool call counts to prevent infinite loops */
44    private $toolCallCounts = [];
45
46    /** @var string The API authentication key */
47    private $api_key;
48
49    /** @var string The model identifier to use */
50    private $model;
51
52    /** @var int The request timeout in seconds */
53    private $timeout;
54
55    /** @var float The temperature setting for response randomness */
56    private $temperature;
57
58    /** @var float The top-p setting for nucleus sampling */
59    private $top_p;
60
61    /** @var int The top-k setting for token selection */
62    private $top_k;
63
64    /** @var float The min-p setting for minimum probability threshold */
65    private $min_p;
66
67    /** @var bool Whether to enable thinking in the LLM responses */
68    private $think;
69
70    /**
71     * Initialize the LLM client with configuration settings
72     *
73     * Retrieves configuration values from DokuWiki's configuration system
74     * for API URL, key, model, timeout, and LLM sampling parameters.
75     *
76     * Configuration values:
77     * - api_url: The LLM API endpoint URL
78     * - api_key: Authentication key for the API (optional)
79     * - model: The model identifier to use for requests
80     * - timeout: Request timeout in seconds
81     * - language: Language code for prompt templates
82     * - temperature: Temperature setting for response randomness (0.0-1.0)
83     * - top_p: Top-p (nucleus sampling) setting (0.0-1.0)
84     * - top_k: Top-k setting (integer >= 1)
85     * - min_p: Minimum probability threshold (0.0-1.0)
86     * - think: Whether to enable thinking in LLM responses (boolean)
87     */
88    public function __construct($api_url = null, $api_key = null, $model = null, $timeout = null, $temperature = null, $top_p = null, $top_k = null, $min_p = null, $think = null)
89    {
90        $this->api_url = $api_url ?? $this->getConf('api_url');
91        $this->api_key = $api_key ?? $this->getConf('api_key');
92        $this->model = $model ?? $this->getConf('model');
93        $this->timeout = $timeout ?? $this->getConf('timeout');
94        $this->temperature = $temperature ?? $this->getConf('temperature');
95        $this->top_p = $top_p ?? $this->getConf('top_p');
96        $this->top_k = $top_k ?? $this->getConf('top_k');
97        $this->min_p = $min_p ?? $this->getConf('min_p');
98        $this->think = $think ?? $this->getConf('think', false);
99    }
100
101    /**
102     * Get configuration value for the dokullm plugin
103     *
104     * @param string $key Configuration key
105     * @param mixed $default Default value if key not found
106     * @return mixed Configuration value
107     */
108    private function getConf($key, $default = null) {
109        global $conf;
110        return isset($conf['plugin']['dokullm'][$key]) ? $conf['plugin']['dokullm'][$key] : $default;
111    }
112
113
114
115    public function process($action, $text, $metadata = [], $useContext = true)
116    {
117        // Store the current text for tool usage
118        $this->currentText = $text;
119
120        // Add text, think and action to metadata
121        $metadata['text'] = $text;
122        $metadata['think'] = $this->think ? '/think' : '/no_think';
123        $metadata['action'] = $action;
124
125        // If we have 'template' in metadata, move it to 'page_template'
126        if (isset($metadata['template'])) {
127            $metadata['page_template'] = $metadata['template'];
128            unset($metadata['template']);
129        }
130
131        // If we have 'examples' in metadata, move it to 'page_examples'
132        if (isset($metadata['examples'])) {
133            $metadata['page_examples'] = $metadata['examples'];
134            unset($metadata['examples']);
135        }
136
137        // If we have 'previous' in metadata, move it to 'page_previous'
138        if (isset($metadata['previous'])) {
139            $metadata['page_previous'] = $metadata['previous'];
140            unset($metadata['previous']);
141        }
142
143        $prompt = $this->loadPrompt($action, $metadata);
144
145        return $this->callAPI($action, $prompt, $metadata, $useContext);
146    }
147
148
149
150    /**
151     * Create the provided text using the LLM
152     *
153     * Sends a prompt to the LLM asking it to create the given text.
154     * First queries ChromaDB for relevant documents to include as examples.
155     * If no template is defined, queries ChromaDB for a template.
156     *
157     * @param string $text The text to create
158     * @param array $metadata Optional metadata containing template, examples, and snippets
159     * @param bool $useContext Whether to include template and examples in the context (default: true)
160     * @return string The created text
161     */
162    public function createReport($text, $metadata = [], $useContext = true)
163    {
164        // Store the current text for tool usage
165        $this->currentText = $text;
166
167        // Check if tools should be used based on configuration
168        $useTools = $this->getConf('use_tools', false);
169
170        // Only try to find template and add snippets if tools are not enabled
171        // When tools are enabled, the LLM will call get_template and get_examples as needed
172        if (!$useTools) {
173            // If no template is defined, try to find one using ChromaDB
174            if (empty($metadata['template'])) {
175                $templateResult = $this->queryChromaDBTemplate($text);
176                if (!empty($templateResult)) {
177                    // Use the first result as template
178                    $metadata['template'] = $templateResult[0];
179                }
180            }
181
182            // Query ChromaDB for relevant documents to use as examples
183            $chromaResults = $this->queryChromaDBSnippets($text, 10);
184
185            // Add ChromaDB results to metadata as snippets
186            if (!empty($chromaResults)) {
187                // Merge with existing snippets
188                $metadata['snippets'] = array_merge(
189                    isset($metadata['snippets']) ? $metadata['snippets'] : [],
190                    $chromaResults
191                );
192            }
193        }
194
195        $think = $this->think ? '/think' : '/no_think';
196        $prompt = $this->loadPrompt('create', ['text' => $text, 'think' => $think]);
197
198        return $this->callAPI('create', $prompt, $metadata, $useContext);
199    }
200
201    /**
202     * Compare two texts and highlight differences
203     *
204     * Sends a prompt to the LLM asking it to compare two texts and
205     * highlight their similarities and differences.
206     *
207     * @param string $text The current text to compare
208     * @param array $metadata Optional metadata containing template, examples, and previous report reference
209     * @return string The comparison results
210     */
211    public function compareText($text, $metadata = [], $useContext = false)
212    {
213        // Store the current text for tool usage
214        $this->currentText = $text;
215
216        // Load previous report from metadata if specified
217        $previousText = '';
218        if (!empty($metadata['previous_report_page'])) {
219            $previousText = $this->getPageContent($metadata['previous_report_page']);
220            if ($previousText === false) {
221                $previousText = '';
222            }
223        }
224
225        // Extract dates for placeholders
226        $currentDate = $this->getPageDate();
227        $previousDate = !empty($metadata['previous_report_page']) ?
228                        $this->getPageDate($metadata['previous_report_page']) :
229                        '';
230
231        $think = $this->think ? '/think' : '/no_think';
232        $prompt = $this->loadPrompt('compare', [
233            'text' => $text,
234            'previous_text' => $previousText,
235            'current_date' => $currentDate,
236            'previous_date' => $previousDate,
237            'think' => $think
238        ]);
239
240        return $this->callAPI('compare', $prompt, $metadata, $useContext);
241    }
242
243    /**
244     * Process text with a custom user prompt
245     *
246     * Sends a custom prompt to the LLM along with the provided text.
247     *
248     * @param string $text The text to process
249     * @param string $customPrompt The custom prompt to use
250     * @param array $metadata Optional metadata containing template and examples
251     * @param bool $useContext Whether to include template and examples in the context (default: true)
252     * @return string The processed text
253     */
254    public function processCustomPrompt($text, $metadata = [], $useContext = true)
255    {
256        // Store the current text for tool usage
257        $this->currentText = $text;
258
259        // Format the prompt with the text and custom prompt
260        $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text;
261
262        return $this->callAPI('custom', $prompt, $metadata, $useContext);
263    }
264
265    /**
266     * Get the list of available tools for the LLM
267     *
268     * Defines the tools that can be used by the LLM during processing.
269     *
270     * @return array List of tool definitions
271     */
272    private function getAvailableTools()
273    {
274        return [
275            [
276                'type' => 'function',
277                'function' => [
278                    'name' => 'get_document',
279                    'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.',
280                    'parameters' => [
281                        'type' => 'object',
282                        'properties' => [
283                            'id' => [
284                                'type' => 'string',
285                                'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.'
286                            ]
287                        ],
288                        'required' => ['id']
289                    ]
290                ]
291            ],
292            [
293                'type' => 'function',
294                'function' => [
295                    'name' => 'get_template',
296                    'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.',
297                    'parameters' => [
298                        'type' => 'object',
299                        'properties' => [
300                            'language' => [
301                                'type' => 'string',
302                                'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).',
303                                'default' => 'ro'
304                            ]
305                        ]
306                    ]
307                ]
308            ],
309            [
310                'type' => 'function',
311                'function' => [
312                    'name' => 'get_examples',
313                    'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.',
314                    'parameters' => [
315                        'type' => 'object',
316                        'properties' => [
317                            'count' => [
318                                'type' => 'integer',
319                                'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.',
320                                'default' => 5
321                            ]
322                        ]
323                    ]
324                ]
325            ]
326        ];
327    }
328
329    /**
330     * Call the LLM API with the specified prompt
331     *
332     * Makes an HTTP POST request to the configured API endpoint with
333     * the prompt and other parameters. Handles authentication if an
334     * API key is configured.
335     *
336     * The method constructs a conversation with system and user messages,
337     * including context information from metadata when available.
338     *
339     * Complex logic includes:
340     * 1. Loading and enhancing the system prompt with metadata context
341     * 2. Building the API request with model parameters
342     * 3. Handling authentication with API key if configured
343     * 4. Making the HTTP request with proper error handling
344     * 5. Parsing and validating the API response
345     * 6. Supporting tool usage with automatic tool calling when enabled
346     * 7. Implementing context enhancement with templates, examples, and snippets
347     *
348     * The context information includes:
349     * - Template content: Used as a starting point for the response
350     * - Example pages: Full content of specified example pages
351     * - Text snippets: Relevant text examples from ChromaDB
352     *
353     * When tools are enabled, the method supports automatic tool calling:
354     * - Tools can retrieve documents, templates, and examples as needed
355     * - Tool responses are cached to avoid duplicate calls with identical parameters
356     * - Infinite loop protection prevents excessive tool calls
357     *
358     * @param string $command The command name for loading command-specific system prompts
359     * @param string $prompt The prompt to send to the LLM as user message
360     * @param array $metadata Optional metadata containing template, examples, and snippets
361     * @param bool $useContext Whether to include template and examples in the context (default: true)
362     * @return string The response content from the LLM
363     * @throws Exception If the API request fails or returns unexpected format
364     */
365
366    private function callAPI($command, $prompt, $metadata = [], $useContext = true)
367    {
368        // Load system prompt which provides general instructions to the LLM
369        $systemPrompt = $this->loadSystemPrompt($command, []);
370
371        // Enhance the prompt with context information from metadata
372        // This provides the LLM with additional context about templates and examples
373        if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) {
374            $contextInfo = "\n\n<context>\n";
375
376            // Add template content if specified in metadata
377            if (!empty($metadata['template'])) {
378                $templateContent = $this->getPageContent($metadata['template']);
379                if ($templateContent !== false) {
380                    $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n";
381                }
382            }
383
384            // Add example pages content if specified in metadata
385            if (!empty($metadata['examples'])) {
386                $examplesContent = [];
387                foreach ($metadata['examples'] as $example) {
388                    $content = $this->getPageContent($example);
389                    if ($content !== false) {
390                        $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n";
391                    }
392                }
393                if (!empty($examplesContent)) {
394                    $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n";
395                }
396            }
397
398            // Add text snippets if specified in metadata
399            if (!empty($metadata['snippets'])) {
400                $snippetsContent = [];
401                foreach ($metadata['snippets'] as $index => $snippet) {
402                    // These are text snippets from ChromaDB
403                    $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n";
404                }
405                if (!empty($snippetsContent)) {
406                    $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n";
407                }
408            }
409
410            $contextInfo .= "\n</context>\n";
411
412            // Append context information to system prompt
413            $prompt = $contextInfo . "\n\n" . $prompt;
414        }
415
416        // Check if tools should be used based on configuration
417        $useTools = $this->getConf('use_tools', false);
418
419        // Prepare API request data with model parameters
420        $data = [
421            'model' => $this->model,
422            'messages' => [
423                ['role' => 'system', 'content' => $systemPrompt],
424                ['role' => 'user', 'content' => $prompt]
425            ],
426            'max_tokens' => 6144,
427            'stream' => false,
428            'keep_alive' => '30m',
429            'think' => true
430        ];
431
432        // Add tools to the request only if useTools is true
433        if ($useTools) {
434            // Define available tools
435            $data['tools'] = $this->getAvailableTools();
436            $data['tool_choice'] = 'auto';
437            $data['parallel_tool_calls'] = false;
438        }
439
440        // Only add parameters if they are defined and not null
441        if ($this->temperature !== null) {
442            $data['temperature'] = $this->temperature;
443        }
444        if ($this->top_p !== null) {
445            $data['top_p'] = $this->top_p;
446        }
447        if ($this->top_k !== null) {
448            $data['top_k'] = $this->top_k;
449        }
450        if ($this->min_p !== null) {
451            $data['min_p'] = $this->min_p;
452        }
453
454        // Make an API call with tool responses
455        return $this->callAPIWithTools($data, false);
456    }
457
458    /**
459     * Handle tool calls from the LLM
460     *
461     * Processes tool calls made by the LLM and returns appropriate responses.
462     * Implements caching to avoid duplicate calls with identical parameters.
463     *
464     * @param array $toolCall The tool call data from the LLM
465     * @return array The tool response message
466     */
467    private function handleToolCall($toolCall)
468    {
469        $toolName = $toolCall['function']['name'];
470        $arguments = json_decode($toolCall['function']['arguments'], true);
471
472        // Create a cache key from the tool name and arguments
473        $cacheKey = md5($toolName . serialize($arguments));
474
475        // Check if we have a cached result for this tool call
476        if (isset($this->toolCallCache[$cacheKey])) {
477            // Return cached result and indicate it was found in cache
478            $toolResponse = $this->toolCallCache[$cacheKey];
479            // Update with current tool call ID
480            $toolResponse['tool_call_id'] = $toolCall['id'];
481            $toolResponse['cached'] = true; // Indicate this response was cached
482            return $toolResponse;
483        }
484
485        $toolResponse = [
486            'role' => 'tool',
487            'tool_call_id' => $toolCall['id'],
488            'cached' => false // Indicate this is a fresh response
489        ];
490
491        switch ($toolName) {
492            case 'get_document':
493                $documentId = $arguments['id'];
494                $content = $this->getPageContent($documentId);
495                if ($content === false) {
496                    $toolResponse['content'] = 'Document not found: ' . $documentId;
497                } else {
498                    $toolResponse['content'] = $content;
499                }
500                break;
501
502            case 'get_template':
503                // Get template content using the convenience function
504                $toolResponse['content'] = $this->getTemplateContent();
505                break;
506
507            case 'get_examples':
508                // Get examples content using the convenience function
509                $count = isset($arguments['count']) ? (int)$arguments['count'] : 5;
510                $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>';
511                break;
512
513            default:
514                $toolResponse['content'] = 'Unknown tool: ' . $toolName;
515        }
516
517        // Cache the result for future calls with the same parameters
518        $cacheEntry = $toolResponse;
519        // Remove tool_call_id and cached flag from cache as they change per call
520        unset($cacheEntry['tool_call_id']);
521        unset($cacheEntry['cached']);
522        $this->toolCallCache[$cacheKey] = $cacheEntry;
523
524        return $toolResponse;
525    }
526
527    /**
528     * Make an API call with tool responses
529     *
530     * Sends a follow-up request to the LLM with tool responses.
531     * Implements complex logic for handling tool calls with caching and loop protection.
532     *
533     * Complex logic includes:
534     * 1. Making HTTP requests with proper authentication and error handling
535     * 2. Processing tool calls from the LLM response
536     * 3. Caching tool responses to avoid duplicate calls with identical parameters
537     * 4. Tracking tool call counts to prevent infinite loops
538     * 5. Implementing loop protection with call count limits
539     * 6. Handling recursive tool calls until final content is generated
540     *
541     * Loop protection works by:
542     * - Tracking individual tool call counts (max 3 per tool)
543     * - Tracking total tool calls (max 10 total)
544     * - Disabling tools when limits are exceeded to break potential loops
545     *
546     * @param array $data The API request data including messages with tool responses
547     * @param bool $toolsCalled Whether tools have already been called (used for loop protection)
548     * @param bool $useTools Whether to process tool calls (used for loop protection)
549     * @return string The final response content
550     */
551    private function callAPIWithTools($data, $toolsCalled = false, $useTools = true)
552    {
553        // Set up HTTP headers, including authentication if API key is configured
554        $headers = [
555            'Content-Type: application/json'
556        ];
557
558        if (!empty($this->api_key)) {
559            $headers[] = 'Authorization: Bearer ' . $this->api_key;
560        }
561
562       // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops
563        if ($toolsCalled) {
564            unset($data['tools']);
565            unset($data['tool_choice']);
566        }
567
568        // Initialize and configure cURL for the API request
569        $ch = curl_init();
570        curl_setopt($ch, CURLOPT_URL, $this->api_url);
571        curl_setopt($ch, CURLOPT_POST, true);
572        curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
573        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
574        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
575        curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
576        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
577
578        // Execute the API request
579        $response = curl_exec($ch);
580        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
581        $error = curl_error($ch);
582        curl_close($ch);
583
584        // Handle cURL errors
585        if ($error) {
586            throw new Exception('API request failed: ' . $error);
587        }
588
589        // Handle HTTP errors
590        if ($httpCode !== 200) {
591            throw new Exception('API request failed with HTTP code: ' . $httpCode);
592        }
593
594        // Parse and validate the JSON response
595        $result = json_decode($response, true);
596
597        // Extract the content from the response if available
598        if (isset($result['choices'][0]['message']['content'])) {
599            $content = trim($result['choices'][0]['message']['content']);
600            // Reset tool call counts when we get final content
601            $this->toolCallCounts = [];
602            return $content;
603        }
604
605        // Handle tool calls if present
606        if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) {
607            $toolCalls = $result['choices'][0]['message']['tool_calls'];
608            // Start with original messages
609            $messages = $data['messages'];
610            // Add assistant's message with tool calls, keeping all original fields except for content (which is null)
611            $assistantMessage = [];
612            foreach ($result['choices'][0]['message'] as $key => $value) {
613                if ($key !== 'content') {
614                    $assistantMessage[$key] = $value;
615                }
616            }
617            // Add assistant's message with tool calls
618            $messages[] = $assistantMessage;
619
620            // Process each tool call and track counts to prevent infinite loops
621            foreach ($toolCalls as $toolCall) {
622                $toolName = $toolCall['function']['name'];
623                // Increment tool call count
624                if (!isset($this->toolCallCounts[$toolName])) {
625                    $this->toolCallCounts[$toolName] = 0;
626                }
627                $this->toolCallCounts[$toolName]++;
628
629                $toolResponse = $this->handleToolCall($toolCall);
630                $messages[] = $toolResponse;
631            }
632
633            // Check if any tool has been called more than 3 times
634            $toolsCalledCount = 0;
635            foreach ($this->toolCallCounts as $count) {
636                if ($count > 3) {
637                    // If any tool called more than 3 times, disable tools to break loop
638                    $toolsCalled = true;
639                    break;
640                }
641                $toolsCalledCount += $count;
642            }
643
644            // If total tool calls exceed 10, also disable tools
645            if ($toolsCalledCount > 10) {
646                $toolsCalled = true;
647            }
648
649            // Make another API call with tool responses
650            $data['messages'] = $messages;
651            return $this->callAPIWithTools($data, $toolsCalled, $useTools);
652        }
653
654        // Throw exception for unexpected response format
655        throw new Exception('Unexpected API response format');
656    }
657
658    /**
659     * Load a prompt template from a DokuWiki page and replace placeholders
660     *
661     * Loads prompt templates from DokuWiki pages with IDs in the format
662     * dokullm:prompts:LANGUAGE:PROMPT_NAME
663     *
664     * The method implements a language fallback mechanism:
665     * 1. First tries to load the prompt in the configured language
666     * 2. If not found, falls back to English prompts
667     * 3. Throws an exception if neither is available
668     *
669     * After loading the prompt, it scans for placeholders and automatically
670     * adds missing ones with appropriate values before replacing all placeholders.
671     *
672     * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite')
673     * @param array $variables Associative array of placeholder => value pairs
674     * @return string The processed prompt with placeholders replaced
675     * @throws Exception If the prompt page cannot be loaded in any language
676     */
677    private function loadPrompt($promptName, $variables = [])
678    {
679        $language = $this->getConf('language');
680
681        // Default to 'en' if language is 'default' or not set
682        if ($language === 'default' || empty($language)) {
683            $language = 'en';
684        }
685
686        // Construct the page ID for the prompt in the configured language
687        $promptPageId = 'dokullm:prompts:' . $language . ':' . $promptName;
688
689        // Try to get the content of the prompt page in the configured language
690        $prompt = $this->getPageContent($promptPageId);
691
692        // If the language-specific prompt doesn't exist, try English as fallback
693        if ($prompt === false && $language !== 'en') {
694            $promptPageId = 'dokullm:prompts:en:' . $promptName;
695            $prompt = $this->getPageContent($promptPageId);
696        }
697
698        // If still no prompt found, throw an exception
699        if ($prompt === false) {
700            throw new Exception('Prompt page not found: ' . $promptPageId);
701        }
702
703        // Find placeholders in the prompt
704        $placeholders = $this->findPlaceholders($prompt);
705
706        // Add missing placeholders with appropriate values
707        foreach ($placeholders as $placeholder) {
708            // Skip if already provided in variables
709            if (isset($variables[$placeholder])) {
710                continue;
711            }
712
713            // Add appropriate values for specific placeholders
714            switch ($placeholder) {
715                case 'template':
716                    // If we have a page_template in variables, use it
717                    $variables[$placeholder] = $this->getTemplateContent($variables['page_template']);
718                    break;
719
720                case 'snippets':
721                    $variables[$placeholder] = $this->getSnippets(10);
722                    break;
723
724                case 'examples':
725                    // If we have example page IDs in metadata, add examples content
726                    $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']);
727                    break;
728
729                case 'previous':
730                    // If we have a previous report page ID in metadata, add previous content
731                    $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']);
732
733                    // Add current and previous dates to metadata
734                    $variables['current_date'] = $this->getPageDate();
735                    $variables['previous_date'] = !empty($variables['page_previous']) ?
736                                                $this->getPageDate($variables['page_previous']) :
737                                                '';
738                    break;
739
740                default:
741                    // For other placeholders, leave them empty or set a default value
742                    $variables[$placeholder] = '';
743                    break;
744            }
745        }
746
747        // Replace placeholders with actual values
748        // Placeholders are in the format {placeholder_name}
749        foreach ($variables as $placeholder => $value) {
750            $prompt = str_replace('{' . $placeholder . '}', $value, $prompt);
751        }
752
753        // Return the processed prompt
754        return $prompt;
755    }
756
757    /**
758     * Load system prompt with optional command-specific appendage
759     *
760     * Loads the main system prompt and appends any command-specific system prompt
761     * if available.
762     *
763     * @param string $action The action/command name
764     * @param array $variables Associative array of placeholder => value pairs
765     * @return string The combined system prompt
766     */
767    private function loadSystemPrompt($action, $variables = [])
768    {
769        // Load system prompt which provides general instructions to the LLM
770        $systemPrompt = $this->loadPrompt('system', $variables);
771
772        // Check if there's a command-specific system prompt appendage
773        if (!empty($action)) {
774            try {
775                $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables);
776                if ($commandSystemPrompt !== false) {
777                    $systemPrompt .= "\n" . $commandSystemPrompt;
778                }
779            } catch (Exception $e) {
780                // Ignore exceptions when loading command-specific system prompt
781                // This allows the main system prompt to still be used
782            }
783        }
784
785        return $systemPrompt;
786    }
787
788    /**
789     * Get the content of a DokuWiki page
790     *
791     * Retrieves the raw content of a DokuWiki page by its ID.
792     * Used for loading template and example page content for context.
793     *
794     * @param string $pageId The page ID to retrieve
795     * @return string|false The page content or false if not found/readable
796     */
797    public function getPageContent($pageId)
798    {
799        // Convert page ID to file path
800        $pageFile = wikiFN($pageId);
801
802        // Check if file exists and is readable
803        if (file_exists($pageFile) && is_readable($pageFile)) {
804            return file_get_contents($pageFile);
805        }
806
807        return false;
808    }
809
810    /**
811     * Extract date from page ID or file timestamp
812     *
813     * Attempts to extract a date in YYmmdd format from the page ID.
814     * If not found, uses the file's last modification timestamp.
815     *
816     * @param string $pageId Optional page ID to extract date from (defaults to current page)
817     * @return string Formatted date string (YYYY-MM-DD)
818     */
819    private function getPageDate($pageId = null)
820    {
821        global $ID;
822
823        // Use provided page ID or current page ID
824        $targetPageId = $pageId ?: $ID;
825
826        // Try to extract date from page ID (looking for YYmmdd pattern)
827        if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) {
828            // Convert YYmmdd to YYYY-MM-DD
829            $year = $matches[1];
830            $month = $matches[2];
831            $day = $matches[3];
832
833            // Assume 20xx for years 00-69, 19xx for years 70-99
834            $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year;
835
836            return $fullYear . '-' . $month . '-' . $day;
837        }
838
839        // Fallback to file timestamp
840        $pageFile = wikiFN($targetPageId);
841        if (file_exists($pageFile)) {
842            $timestamp = filemtime($pageFile);
843            return date('Y-m-d', $timestamp);
844        }
845
846        // Return empty string if no date can be determined
847        return '';
848    }
849
850    /**
851     * Get current text
852     *
853     * Retrieves the current text stored from the process function.
854     *
855     * @return string The current text
856     */
857    private function getCurrentText()
858    {
859        return $this->currentText;
860    }
861
862    /**
863     * Scan text for placeholders
864     *
865     * Finds all placeholders in the format {placeholder_name} in the provided text
866     * and returns an array of unique placeholder names.
867     *
868     * @param string $text The text to scan for placeholders
869     * @return array List of unique placeholder names found in the text
870     */
871    public function findPlaceholders($text)
872    {
873        $placeholders = [];
874        $pattern = '/\{([^}]+)\}/';
875
876        if (preg_match_all($pattern, $text, $matches)) {
877            // Get unique placeholder names
878            $placeholders = array_unique($matches[1]);
879        }
880
881        return $placeholders;
882    }
883
884    /**
885     * Get template content for the current text
886     *
887     * Convenience function to retrieve template content. If a pageId is provided,
888     * retrieves content directly from that page. Otherwise, queries ChromaDB for
889     * a relevant template based on the current text.
890     *
891     * @param string|null $pageId Optional page ID to retrieve template from directly
892     * @return string The template content or empty string if not found
893     */
894    private function getTemplateContent($pageId = null)
895    {
896        // If pageId is provided, use it directly
897        if ($pageId !== null) {
898            $templateContent = $this->getPageContent($pageId);
899            if ($templateContent !== false) {
900                return $templateContent;
901            }
902        }
903
904        // Otherwise, get template suggestion for the current text
905        $pageId = $this->queryChromaDBTemplate($this->getCurrentText());
906        if (!empty($pageId)) {
907            $templateContent = $this->getPageContent($pageId[0]);
908            if ($templateContent !== false) {
909                return $templateContent;
910            }
911        }
912        return '( no template )';
913    }
914
915    /**
916     * Get snippets content for the current text
917     *
918     * Convenience function to retrieve relevant snippets for the current text.
919     * Queries ChromaDB for relevant snippets and returns them formatted.
920     *
921     * @param int $count Number of snippets to retrieve (default: 10)
922     * @return string Formatted snippets content or empty string if not found
923     */
924    private function getSnippets($count = 10)
925    {
926        // Get example snippets for the current text
927        $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count);
928        if (!empty($snippets)) {
929            $formattedSnippets = [];
930            foreach ($snippets as $index => $snippet) {
931                $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>';
932            }
933            return implode("\n", $formattedSnippets);
934        }
935        return '( no examples )';
936    }
937
938    /**
939     * Get examples content from example page IDs
940     *
941     * Convenience function to retrieve content from example pages.
942     * Returns the content of each page packed in XML elements.
943     *
944     * @param array $exampleIds List of example page IDs
945     * @return string Formatted examples content or empty string if not found
946     */
947    private function getExamplesContent($exampleIds = [])
948    {
949        if (empty($exampleIds) || !is_array($exampleIds)) {
950            return '( no examples )';
951        }
952
953        $examplesContent = [];
954        foreach ($exampleIds as $index => $exampleId) {
955            $content = $this->getPageContent($exampleId);
956            if ($content !== false) {
957                $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>';
958            }
959        }
960
961        return implode("\n", $examplesContent);
962    }
963
964    /**
965     * Get previous report content from previous page ID
966     *
967     * Convenience function to retrieve content from a previous report page.
968     * Returns the content of the previous page or a default message if not found.
969     *
970     * @param string $previousId Previous page ID
971     * @return string Previous report content or default message if not found
972     */
973    private function getPreviousContent($previousId = '')
974    {
975        if (empty($previousId)) {
976            return '( no previous report )';
977        }
978
979        $content = $this->getPageContent($previousId);
980        if ($content !== false) {
981            return $content;
982        }
983
984        return '( previous report not found )';
985    }
986
987    /**
988     * Get ChromaDB client with configuration
989     *
990     * Creates and returns a ChromaDB client with the appropriate configuration.
991     * Extracts modality from the current page ID to use as the collection name.
992     *
993     * @return array Array containing the ChromaDB client and collection name
994     */
995    private function getChromaDBClient()
996    {
997        // Get ChromaDB configuration from DokuWiki plugin configuration
998        $chromaHost = $this->getConf('chroma_host', 'localhost');
999        $chromaPort = $this->getConf('chroma_port', 8000);
1000        $chromaTenant = $this->getConf('chroma_tenant', 'dokullm');
1001        $chromaDatabase = $this->getConf('chroma_database', 'dokullm');
1002        $chromaDefaultCollection = $this->getConf('chroma_collection', 'documents');
1003        $ollamaHost = $this->getConf('ollama_host', 'localhost');
1004        $ollamaPort = $this->getConf('ollama_port', 11434);
1005        $ollamaModel = $this->getConf('ollama_embeddings_model', 'nomic-embed-text');
1006
1007        // Use the first part of the current page ID as collection name, fallback to default
1008        global $ID;
1009        $chromaCollection = $chromaDefaultCollection; // Default collection name
1010
1011        if (!empty($ID)) {
1012            // Split the page ID by ':' and take the first part as collection name
1013            $parts = explode(':', $ID);
1014            if (isset($parts[0]) && !empty($parts[0])) {
1015                // If the first part is 'playground', use the default collection
1016                // Otherwise, use the first part as the collection name
1017                if ($parts[0] === 'playground') {
1018                    $chromaCollection = $chromaDefaultCollection;
1019                } else {
1020                    $chromaCollection = $parts[0];
1021                }
1022            }
1023        }
1024
1025        // Create ChromaDB client with all required parameters
1026        $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient(
1027            $chromaHost,
1028            $chromaPort,
1029            $chromaTenant,
1030            $chromaDatabase,
1031            $ollamaHost,
1032            $ollamaPort,
1033            $ollamaModel
1034        );
1035
1036
1037        return [$chromaClient, $chromaCollection];
1038    }
1039
1040    /**
1041     * Query ChromaDB for relevant documents
1042     *
1043     * Generates embeddings for the input text and queries ChromaDB for similar documents.
1044     * Extracts modality from the current page ID to use as the collection name.
1045     *
1046     * @param string $text The text to find similar documents for
1047     * @param int $limit Maximum number of documents to retrieve (default: 5)
1048     * @param array|null $where Optional filter conditions for metadata
1049     * @return array List of document IDs
1050     */
1051    private function queryChromaDB($text, $limit = 5, $where = null)
1052    {
1053        try {
1054            // Get ChromaDB client and collection name
1055            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
1056            // Query for similar documents
1057            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
1058
1059            // Extract document IDs from results
1060            $documentIds = [];
1061            if (isset($results['ids'][0]) && is_array($results['ids'][0])) {
1062                foreach ($results['ids'][0] as $id) {
1063                    // Use the ChromaDB ID directly without conversion
1064                    $documentIds[] = $id;
1065                }
1066            }
1067
1068            return $documentIds;
1069        } catch (Exception $e) {
1070            // Log error but don't fail the operation
1071            error_log('ChromaDB query failed: ' . $e->getMessage());
1072            return [];
1073        }
1074    }
1075
1076    /**
1077     * Query ChromaDB for relevant documents and return text snippets
1078     *
1079     * Generates embeddings for the input text and queries ChromaDB for similar documents.
1080     * Returns the actual text snippets instead of document IDs.
1081     *
1082     * @param string $text The text to find similar documents for
1083     * @param int $limit Maximum number of documents to retrieve (default: 10)
1084     * @param array|null $where Optional filter conditions for metadata
1085     * @return array List of text snippets
1086     */
1087    private function queryChromaDBSnippets($text, $limit = 10, $where = null)
1088    {
1089        try {
1090            // Get ChromaDB client and collection name
1091            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
1092            // Query for similar documents
1093            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
1094
1095            // Extract document texts from results
1096            $snippets = [];
1097            if (isset($results['documents'][0]) && is_array($results['documents'][0])) {
1098                foreach ($results['documents'][0] as $document) {
1099                    $snippets[] = $document;
1100                }
1101            }
1102
1103            return $snippets;
1104        } catch (Exception $e) {
1105            // Log error but don't fail the operation
1106            error_log('ChromaDB query failed: ' . $e->getMessage());
1107            return [];
1108        }
1109    }
1110
1111    /**
1112     * Query ChromaDB for a template document
1113     *
1114     * Generates embeddings for the input text and queries ChromaDB for a template document
1115     * by filtering with metadata 'template=true'.
1116     *
1117     * @param string $text The text to find a template for
1118     * @return array List of template document IDs (maximum 1)
1119     */
1120    public function queryChromaDBTemplate($text)
1121    {
1122        $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']);
1123
1124        // Remove chunk number (e.g., "@2") from the ID to get the base document ID
1125        if (!empty($templateIds)) {
1126            $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]);
1127        }
1128
1129        return $templateIds;
1130    }
1131
1132}
1133