xref: /plugin/dokullm/LlmClient.php (revision 590368144294a28ecf0e0e39feb976bf79fefb1e)
1*59036814SCostin Stroie<?php
2*59036814SCostin Stroienamespace dokuwiki\plugin\dokullm;
3*59036814SCostin Stroie
4*59036814SCostin Stroie/**
5*59036814SCostin Stroie * LLM Client for the dokullm plugin
6*59036814SCostin Stroie *
7*59036814SCostin Stroie * This class provides methods to interact with an LLM API for various
8*59036814SCostin Stroie * text processing tasks such as completion, rewriting, grammar correction,
9*59036814SCostin Stroie * summarization, conclusion creation, text analysis, and custom prompts.
10*59036814SCostin Stroie *
11*59036814SCostin Stroie * The client handles:
12*59036814SCostin Stroie * - API configuration and authentication
13*59036814SCostin Stroie * - Prompt template loading and processing
14*59036814SCostin Stroie * - Context-aware requests with metadata
15*59036814SCostin Stroie * - DokuWiki page content retrieval
16*59036814SCostin Stroie */
17*59036814SCostin Stroie
18*59036814SCostin Stroie// must be run within Dokuwiki
19*59036814SCostin Stroieif (!defined('DOKU_INC')) {
20*59036814SCostin Stroie    die();
21*59036814SCostin Stroie}
22*59036814SCostin Stroie
23*59036814SCostin Stroie
24*59036814SCostin Stroie/**
25*59036814SCostin Stroie * LLM Client class for handling API communications
26*59036814SCostin Stroie *
27*59036814SCostin Stroie * Manages configuration settings and provides methods for various
28*59036814SCostin Stroie * text processing operations through an LLM API.
29*59036814SCostin Stroie * Implements caching for tool calls to avoid duplicate processing.
30*59036814SCostin Stroie */
31*59036814SCostin Stroieclass LlmClient
32*59036814SCostin Stroie{
33*59036814SCostin Stroie    /** @var string The API endpoint URL */
34*59036814SCostin Stroie    private $api_url;
35*59036814SCostin Stroie
36*59036814SCostin Stroie    /** @var array Cache for tool call results */
37*59036814SCostin Stroie    private $toolCallCache = [];
38*59036814SCostin Stroie
39*59036814SCostin Stroie    /** @var string Current text for tool usage */
40*59036814SCostin Stroie    private $currentText = '';
41*59036814SCostin Stroie
42*59036814SCostin Stroie    /** @var array Track tool call counts to prevent infinite loops */
43*59036814SCostin Stroie    private $toolCallCounts = [];
44*59036814SCostin Stroie
45*59036814SCostin Stroie    /** @var string The API authentication key */
46*59036814SCostin Stroie    private $api_key;
47*59036814SCostin Stroie
48*59036814SCostin Stroie    /** @var string The model identifier to use */
49*59036814SCostin Stroie    private $model;
50*59036814SCostin Stroie
51*59036814SCostin Stroie    /** @var int The request timeout in seconds */
52*59036814SCostin Stroie    private $timeout;
53*59036814SCostin Stroie
54*59036814SCostin Stroie    /** @var float The temperature setting for response randomness */
55*59036814SCostin Stroie    private $temperature;
56*59036814SCostin Stroie
57*59036814SCostin Stroie    /** @var float The top-p setting for nucleus sampling */
58*59036814SCostin Stroie    private $top_p;
59*59036814SCostin Stroie
60*59036814SCostin Stroie    /** @var int The top-k setting for token selection */
61*59036814SCostin Stroie    private $top_k;
62*59036814SCostin Stroie
63*59036814SCostin Stroie    /** @var float The min-p setting for minimum probability threshold */
64*59036814SCostin Stroie    private $min_p;
65*59036814SCostin Stroie
66*59036814SCostin Stroie    /** @var bool Whether to enable thinking in the LLM responses */
67*59036814SCostin Stroie    private $think;
68*59036814SCostin Stroie
69*59036814SCostin Stroie    /**
70*59036814SCostin Stroie     * Initialize the LLM client with configuration settings
71*59036814SCostin Stroie     *
72*59036814SCostin Stroie     * Retrieves configuration values from DokuWiki's configuration system
73*59036814SCostin Stroie     * for API URL, key, model, timeout, and LLM sampling parameters.
74*59036814SCostin Stroie     *
75*59036814SCostin Stroie     * Configuration values:
76*59036814SCostin Stroie     * - api_url: The LLM API endpoint URL
77*59036814SCostin Stroie     * - api_key: Authentication key for the API (optional)
78*59036814SCostin Stroie     * - model: The model identifier to use for requests
79*59036814SCostin Stroie     * - timeout: Request timeout in seconds
80*59036814SCostin Stroie     * - language: Language code for prompt templates
81*59036814SCostin Stroie     * - temperature: Temperature setting for response randomness (0.0-1.0)
82*59036814SCostin Stroie     * - top_p: Top-p (nucleus sampling) setting (0.0-1.0)
83*59036814SCostin Stroie     * - top_k: Top-k setting (integer >= 1)
84*59036814SCostin Stroie     * - min_p: Minimum probability threshold (0.0-1.0)
85*59036814SCostin Stroie     * - think: Whether to enable thinking in LLM responses (boolean)
86*59036814SCostin Stroie     */
87*59036814SCostin Stroie    public function __construct()
88*59036814SCostin Stroie    {
89*59036814SCostin Stroie        global $conf;
90*59036814SCostin Stroie        $this->api_url = $conf['plugin']['dokullm']['api_url'];
91*59036814SCostin Stroie        $this->api_key = $conf['plugin']['dokullm']['api_key'];
92*59036814SCostin Stroie        $this->model = $conf['plugin']['dokullm']['model'];
93*59036814SCostin Stroie        $this->timeout = $conf['plugin']['dokullm']['timeout'];
94*59036814SCostin Stroie        $this->temperature = $conf['plugin']['dokullm']['temperature'];
95*59036814SCostin Stroie        $this->top_p = $conf['plugin']['dokullm']['top_p'];
96*59036814SCostin Stroie        $this->top_k = $conf['plugin']['dokullm']['top_k'];
97*59036814SCostin Stroie        $this->min_p = $conf['plugin']['dokullm']['min_p'];
98*59036814SCostin Stroie        $this->think = $conf['plugin']['dokullm']['think'] ?? false;
99*59036814SCostin Stroie    }
100*59036814SCostin Stroie
101*59036814SCostin Stroie
102*59036814SCostin Stroie
103*59036814SCostin Stroie    public function process($action, $text, $metadata = [], $useContext = true)
104*59036814SCostin Stroie    {
105*59036814SCostin Stroie        // Store the current text for tool usage
106*59036814SCostin Stroie        $this->currentText = $text;
107*59036814SCostin Stroie
108*59036814SCostin Stroie        // Add text, think and action to metadata
109*59036814SCostin Stroie        $metadata['text'] = $text;
110*59036814SCostin Stroie        $metadata['think'] = $this->think ? '/think' : '/no_think';
111*59036814SCostin Stroie        $metadata['action'] = $action;
112*59036814SCostin Stroie
113*59036814SCostin Stroie        // If we have 'template' in metadata, move it to 'page_template'
114*59036814SCostin Stroie        if (isset($metadata['template'])) {
115*59036814SCostin Stroie            $metadata['page_template'] = $metadata['template'];
116*59036814SCostin Stroie            unset($metadata['template']);
117*59036814SCostin Stroie        }
118*59036814SCostin Stroie
119*59036814SCostin Stroie        // If we have 'examples' in metadata, move it to 'page_examples'
120*59036814SCostin Stroie        if (isset($metadata['examples'])) {
121*59036814SCostin Stroie            $metadata['page_examples'] = $metadata['examples'];
122*59036814SCostin Stroie            unset($metadata['examples']);
123*59036814SCostin Stroie        }
124*59036814SCostin Stroie
125*59036814SCostin Stroie        // If we have 'previous' in metadata, move it to 'page_previous'
126*59036814SCostin Stroie        if (isset($metadata['previous'])) {
127*59036814SCostin Stroie            $metadata['page_previous'] = $metadata['previous'];
128*59036814SCostin Stroie            unset($metadata['previous']);
129*59036814SCostin Stroie        }
130*59036814SCostin Stroie
131*59036814SCostin Stroie        $prompt = $this->loadPrompt($action, $metadata);
132*59036814SCostin Stroie
133*59036814SCostin Stroie        return $this->callAPI($action, $prompt, $metadata, $useContext);
134*59036814SCostin Stroie    }
135*59036814SCostin Stroie
136*59036814SCostin Stroie
137*59036814SCostin Stroie
138*59036814SCostin Stroie    /**
139*59036814SCostin Stroie     * Create the provided text using the LLM
140*59036814SCostin Stroie     *
141*59036814SCostin Stroie     * Sends a prompt to the LLM asking it to create the given text.
142*59036814SCostin Stroie     * First queries ChromaDB for relevant documents to include as examples.
143*59036814SCostin Stroie     * If no template is defined, queries ChromaDB for a template.
144*59036814SCostin Stroie     *
145*59036814SCostin Stroie     * @param string $text The text to create
146*59036814SCostin Stroie     * @param array $metadata Optional metadata containing template, examples, and snippets
147*59036814SCostin Stroie     * @param bool $useContext Whether to include template and examples in the context (default: true)
148*59036814SCostin Stroie     * @return string The created text
149*59036814SCostin Stroie     */
150*59036814SCostin Stroie    public function createReport($text, $metadata = [], $useContext = true)
151*59036814SCostin Stroie    {
152*59036814SCostin Stroie        // Store the current text for tool usage
153*59036814SCostin Stroie        $this->currentText = $text;
154*59036814SCostin Stroie
155*59036814SCostin Stroie        // Check if tools should be used based on configuration
156*59036814SCostin Stroie        global $conf;
157*59036814SCostin Stroie        $useTools = $conf['plugin']['dokullm']['use_tools'] ?? false;
158*59036814SCostin Stroie
159*59036814SCostin Stroie        // Only try to find template and add snippets if tools are not enabled
160*59036814SCostin Stroie        // When tools are enabled, the LLM will call get_template and get_examples as needed
161*59036814SCostin Stroie        if (!$useTools) {
162*59036814SCostin Stroie            // If no template is defined, try to find one using ChromaDB
163*59036814SCostin Stroie            if (empty($metadata['template'])) {
164*59036814SCostin Stroie                $templateResult = $this->queryChromaDBTemplate($text);
165*59036814SCostin Stroie                if (!empty($templateResult)) {
166*59036814SCostin Stroie                    // Use the first result as template
167*59036814SCostin Stroie                    $metadata['template'] = $templateResult[0];
168*59036814SCostin Stroie                }
169*59036814SCostin Stroie            }
170*59036814SCostin Stroie
171*59036814SCostin Stroie            // Query ChromaDB for relevant documents to use as examples
172*59036814SCostin Stroie            $chromaResults = $this->queryChromaDBSnippets($text, 10);
173*59036814SCostin Stroie
174*59036814SCostin Stroie            // Add ChromaDB results to metadata as snippets
175*59036814SCostin Stroie            if (!empty($chromaResults)) {
176*59036814SCostin Stroie                // Merge with existing snippets
177*59036814SCostin Stroie                $metadata['snippets'] = array_merge(
178*59036814SCostin Stroie                    isset($metadata['snippets']) ? $metadata['snippets'] : [],
179*59036814SCostin Stroie                    $chromaResults
180*59036814SCostin Stroie                );
181*59036814SCostin Stroie            }
182*59036814SCostin Stroie        }
183*59036814SCostin Stroie
184*59036814SCostin Stroie        $think = $this->think ? '/think' : '/no_think';
185*59036814SCostin Stroie        $prompt = $this->loadPrompt('create', ['text' => $text, 'think' => $think]);
186*59036814SCostin Stroie
187*59036814SCostin Stroie        return $this->callAPI('create', $prompt, $metadata, $useContext);
188*59036814SCostin Stroie    }
189*59036814SCostin Stroie
190*59036814SCostin Stroie    /**
191*59036814SCostin Stroie     * Compare two texts and highlight differences
192*59036814SCostin Stroie     *
193*59036814SCostin Stroie     * Sends a prompt to the LLM asking it to compare two texts and
194*59036814SCostin Stroie     * highlight their similarities and differences.
195*59036814SCostin Stroie     *
196*59036814SCostin Stroie     * @param string $text The current text to compare
197*59036814SCostin Stroie     * @param array $metadata Optional metadata containing template, examples, and previous report reference
198*59036814SCostin Stroie     * @return string The comparison results
199*59036814SCostin Stroie     */
200*59036814SCostin Stroie    public function compareText($text, $metadata = [], $useContext = false)
201*59036814SCostin Stroie    {
202*59036814SCostin Stroie        // Store the current text for tool usage
203*59036814SCostin Stroie        $this->currentText = $text;
204*59036814SCostin Stroie
205*59036814SCostin Stroie        // Load previous report from metadata if specified
206*59036814SCostin Stroie        $previousText = '';
207*59036814SCostin Stroie        if (!empty($metadata['previous_report_page'])) {
208*59036814SCostin Stroie            $previousText = $this->getPageContent($metadata['previous_report_page']);
209*59036814SCostin Stroie            if ($previousText === false) {
210*59036814SCostin Stroie                $previousText = '';
211*59036814SCostin Stroie            }
212*59036814SCostin Stroie        }
213*59036814SCostin Stroie
214*59036814SCostin Stroie        // Extract dates for placeholders
215*59036814SCostin Stroie        $currentDate = $this->getPageDate();
216*59036814SCostin Stroie        $previousDate = !empty($metadata['previous_report_page']) ?
217*59036814SCostin Stroie                        $this->getPageDate($metadata['previous_report_page']) :
218*59036814SCostin Stroie                        '';
219*59036814SCostin Stroie
220*59036814SCostin Stroie        $think = $this->think ? '/think' : '/no_think';
221*59036814SCostin Stroie        $prompt = $this->loadPrompt('compare', [
222*59036814SCostin Stroie            'text' => $text,
223*59036814SCostin Stroie            'previous_text' => $previousText,
224*59036814SCostin Stroie            'current_date' => $currentDate,
225*59036814SCostin Stroie            'previous_date' => $previousDate,
226*59036814SCostin Stroie            'think' => $think
227*59036814SCostin Stroie        ]);
228*59036814SCostin Stroie
229*59036814SCostin Stroie        return $this->callAPI('compare', $prompt, $metadata, $useContext);
230*59036814SCostin Stroie    }
231*59036814SCostin Stroie
232*59036814SCostin Stroie    /**
233*59036814SCostin Stroie     * Process text with a custom user prompt
234*59036814SCostin Stroie     *
235*59036814SCostin Stroie     * Sends a custom prompt to the LLM along with the provided text.
236*59036814SCostin Stroie     *
237*59036814SCostin Stroie     * @param string $text The text to process
238*59036814SCostin Stroie     * @param string $customPrompt The custom prompt to use
239*59036814SCostin Stroie     * @param array $metadata Optional metadata containing template and examples
240*59036814SCostin Stroie     * @param bool $useContext Whether to include template and examples in the context (default: true)
241*59036814SCostin Stroie     * @return string The processed text
242*59036814SCostin Stroie     */
243*59036814SCostin Stroie    public function processCustomPrompt($text, $metadata = [], $useContext = true)
244*59036814SCostin Stroie    {
245*59036814SCostin Stroie        // Store the current text for tool usage
246*59036814SCostin Stroie        $this->currentText = $text;
247*59036814SCostin Stroie
248*59036814SCostin Stroie        // Format the prompt with the text and custom prompt
249*59036814SCostin Stroie        $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text;
250*59036814SCostin Stroie
251*59036814SCostin Stroie        return $this->callAPI('custom', $prompt, $metadata, $useContext);
252*59036814SCostin Stroie    }
253*59036814SCostin Stroie
254*59036814SCostin Stroie    /**
255*59036814SCostin Stroie     * Get the list of available tools for the LLM
256*59036814SCostin Stroie     *
257*59036814SCostin Stroie     * Defines the tools that can be used by the LLM during processing.
258*59036814SCostin Stroie     *
259*59036814SCostin Stroie     * @return array List of tool definitions
260*59036814SCostin Stroie     */
261*59036814SCostin Stroie    private function getAvailableTools()
262*59036814SCostin Stroie    {
263*59036814SCostin Stroie        return [
264*59036814SCostin Stroie            [
265*59036814SCostin Stroie                'type' => 'function',
266*59036814SCostin Stroie                'function' => [
267*59036814SCostin Stroie                    'name' => 'get_document',
268*59036814SCostin Stroie                    'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.',
269*59036814SCostin Stroie                    'parameters' => [
270*59036814SCostin Stroie                        'type' => 'object',
271*59036814SCostin Stroie                        'properties' => [
272*59036814SCostin Stroie                            'id' => [
273*59036814SCostin Stroie                                'type' => 'string',
274*59036814SCostin Stroie                                'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.'
275*59036814SCostin Stroie                            ]
276*59036814SCostin Stroie                        ],
277*59036814SCostin Stroie                        'required' => ['id']
278*59036814SCostin Stroie                    ]
279*59036814SCostin Stroie                ]
280*59036814SCostin Stroie            ],
281*59036814SCostin Stroie            [
282*59036814SCostin Stroie                'type' => 'function',
283*59036814SCostin Stroie                'function' => [
284*59036814SCostin Stroie                    'name' => 'get_template',
285*59036814SCostin Stroie                    'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.',
286*59036814SCostin Stroie                    'parameters' => [
287*59036814SCostin Stroie                        'type' => 'object',
288*59036814SCostin Stroie                        'properties' => [
289*59036814SCostin Stroie                            'language' => [
290*59036814SCostin Stroie                                'type' => 'string',
291*59036814SCostin Stroie                                'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).',
292*59036814SCostin Stroie                                'default' => 'ro'
293*59036814SCostin Stroie                            ]
294*59036814SCostin Stroie                        ]
295*59036814SCostin Stroie                    ]
296*59036814SCostin Stroie                ]
297*59036814SCostin Stroie            ],
298*59036814SCostin Stroie            [
299*59036814SCostin Stroie                'type' => 'function',
300*59036814SCostin Stroie                'function' => [
301*59036814SCostin Stroie                    'name' => 'get_examples',
302*59036814SCostin Stroie                    'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.',
303*59036814SCostin Stroie                    'parameters' => [
304*59036814SCostin Stroie                        'type' => 'object',
305*59036814SCostin Stroie                        'properties' => [
306*59036814SCostin Stroie                            'count' => [
307*59036814SCostin Stroie                                'type' => 'integer',
308*59036814SCostin Stroie                                'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.',
309*59036814SCostin Stroie                                'default' => 5
310*59036814SCostin Stroie                            ]
311*59036814SCostin Stroie                        ]
312*59036814SCostin Stroie                    ]
313*59036814SCostin Stroie                ]
314*59036814SCostin Stroie            ]
315*59036814SCostin Stroie        ];
316*59036814SCostin Stroie    }
317*59036814SCostin Stroie
318*59036814SCostin Stroie    /**
319*59036814SCostin Stroie     * Call the LLM API with the specified prompt
320*59036814SCostin Stroie     *
321*59036814SCostin Stroie     * Makes an HTTP POST request to the configured API endpoint with
322*59036814SCostin Stroie     * the prompt and other parameters. Handles authentication if an
323*59036814SCostin Stroie     * API key is configured.
324*59036814SCostin Stroie     *
325*59036814SCostin Stroie     * The method constructs a conversation with system and user messages,
326*59036814SCostin Stroie     * including context information from metadata when available.
327*59036814SCostin Stroie     *
328*59036814SCostin Stroie     * Complex logic includes:
329*59036814SCostin Stroie     * 1. Loading and enhancing the system prompt with metadata context
330*59036814SCostin Stroie     * 2. Building the API request with model parameters
331*59036814SCostin Stroie     * 3. Handling authentication with API key if configured
332*59036814SCostin Stroie     * 4. Making the HTTP request with proper error handling
333*59036814SCostin Stroie     * 5. Parsing and validating the API response
334*59036814SCostin Stroie     * 6. Supporting tool usage with automatic tool calling when enabled
335*59036814SCostin Stroie     * 7. Implementing context enhancement with templates, examples, and snippets
336*59036814SCostin Stroie     *
337*59036814SCostin Stroie     * The context information includes:
338*59036814SCostin Stroie     * - Template content: Used as a starting point for the response
339*59036814SCostin Stroie     * - Example pages: Full content of specified example pages
340*59036814SCostin Stroie     * - Text snippets: Relevant text examples from ChromaDB
341*59036814SCostin Stroie     *
342*59036814SCostin Stroie     * When tools are enabled, the method supports automatic tool calling:
343*59036814SCostin Stroie     * - Tools can retrieve documents, templates, and examples as needed
344*59036814SCostin Stroie     * - Tool responses are cached to avoid duplicate calls with identical parameters
345*59036814SCostin Stroie     * - Infinite loop protection prevents excessive tool calls
346*59036814SCostin Stroie     *
347*59036814SCostin Stroie     * @param string $command The command name for loading command-specific system prompts
348*59036814SCostin Stroie     * @param string $prompt The prompt to send to the LLM as user message
349*59036814SCostin Stroie     * @param array $metadata Optional metadata containing template, examples, and snippets
350*59036814SCostin Stroie     * @param bool $useContext Whether to include template and examples in the context (default: true)
351*59036814SCostin Stroie     * @return string The response content from the LLM
352*59036814SCostin Stroie     * @throws Exception If the API request fails or returns unexpected format
353*59036814SCostin Stroie     */
354*59036814SCostin Stroie
355*59036814SCostin Stroie    private function callAPI($command, $prompt, $metadata = [], $useContext = true)
356*59036814SCostin Stroie    {
357*59036814SCostin Stroie        // Load system prompt which provides general instructions to the LLM
358*59036814SCostin Stroie        $systemPrompt = $this->loadSystemPrompt($command, []);
359*59036814SCostin Stroie
360*59036814SCostin Stroie        // Enhance the prompt with context information from metadata
361*59036814SCostin Stroie        // This provides the LLM with additional context about templates and examples
362*59036814SCostin Stroie        if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) {
363*59036814SCostin Stroie            $contextInfo = "\n\n<context>\n";
364*59036814SCostin Stroie
365*59036814SCostin Stroie            // Add template content if specified in metadata
366*59036814SCostin Stroie            if (!empty($metadata['template'])) {
367*59036814SCostin Stroie                $templateContent = $this->getPageContent($metadata['template']);
368*59036814SCostin Stroie                if ($templateContent !== false) {
369*59036814SCostin Stroie                    $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n";
370*59036814SCostin Stroie                }
371*59036814SCostin Stroie            }
372*59036814SCostin Stroie
373*59036814SCostin Stroie            // Add example pages content if specified in metadata
374*59036814SCostin Stroie            if (!empty($metadata['examples'])) {
375*59036814SCostin Stroie                $examplesContent = [];
376*59036814SCostin Stroie                foreach ($metadata['examples'] as $example) {
377*59036814SCostin Stroie                    $content = $this->getPageContent($example);
378*59036814SCostin Stroie                    if ($content !== false) {
379*59036814SCostin Stroie                        $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n";
380*59036814SCostin Stroie                    }
381*59036814SCostin Stroie                }
382*59036814SCostin Stroie                if (!empty($examplesContent)) {
383*59036814SCostin Stroie                    $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n";
384*59036814SCostin Stroie                }
385*59036814SCostin Stroie            }
386*59036814SCostin Stroie
387*59036814SCostin Stroie            // Add text snippets if specified in metadata
388*59036814SCostin Stroie            if (!empty($metadata['snippets'])) {
389*59036814SCostin Stroie                $snippetsContent = [];
390*59036814SCostin Stroie                foreach ($metadata['snippets'] as $index => $snippet) {
391*59036814SCostin Stroie                    // These are text snippets from ChromaDB
392*59036814SCostin Stroie                    $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n";
393*59036814SCostin Stroie                }
394*59036814SCostin Stroie                if (!empty($snippetsContent)) {
395*59036814SCostin Stroie                    $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n";
396*59036814SCostin Stroie                }
397*59036814SCostin Stroie            }
398*59036814SCostin Stroie
399*59036814SCostin Stroie            $contextInfo .= "\n</context>\n";
400*59036814SCostin Stroie
401*59036814SCostin Stroie            // Append context information to system prompt
402*59036814SCostin Stroie            $prompt = $contextInfo . "\n\n" . $prompt;
403*59036814SCostin Stroie        }
404*59036814SCostin Stroie
405*59036814SCostin Stroie        // Check if tools should be used based on configuration
406*59036814SCostin Stroie        global $conf;
407*59036814SCostin Stroie        $useTools = $conf['plugin']['dokullm']['use_tools'] ?? false;
408*59036814SCostin Stroie
409*59036814SCostin Stroie        // Prepare API request data with model parameters
410*59036814SCostin Stroie        $data = [
411*59036814SCostin Stroie            'model' => $this->model,
412*59036814SCostin Stroie            'messages' => [
413*59036814SCostin Stroie                ['role' => 'system', 'content' => $systemPrompt],
414*59036814SCostin Stroie                ['role' => 'user', 'content' => $prompt]
415*59036814SCostin Stroie            ],
416*59036814SCostin Stroie            'max_tokens' => 6144,
417*59036814SCostin Stroie            'stream' => false,
418*59036814SCostin Stroie            'keep_alive' => '30m',
419*59036814SCostin Stroie            'think' => true
420*59036814SCostin Stroie        ];
421*59036814SCostin Stroie
422*59036814SCostin Stroie        // Add tools to the request only if useTools is true
423*59036814SCostin Stroie        if ($useTools) {
424*59036814SCostin Stroie            // Define available tools
425*59036814SCostin Stroie            $data['tools'] = $this->getAvailableTools();
426*59036814SCostin Stroie            $data['tool_choice'] = 'auto';
427*59036814SCostin Stroie            $data['parallel_tool_calls'] = false;
428*59036814SCostin Stroie        }
429*59036814SCostin Stroie
430*59036814SCostin Stroie        // Only add parameters if they are defined and not null
431*59036814SCostin Stroie        if ($this->temperature !== null) {
432*59036814SCostin Stroie            $data['temperature'] = $this->temperature;
433*59036814SCostin Stroie        }
434*59036814SCostin Stroie        if ($this->top_p !== null) {
435*59036814SCostin Stroie            $data['top_p'] = $this->top_p;
436*59036814SCostin Stroie        }
437*59036814SCostin Stroie        if ($this->top_k !== null) {
438*59036814SCostin Stroie            $data['top_k'] = $this->top_k;
439*59036814SCostin Stroie        }
440*59036814SCostin Stroie        if ($this->min_p !== null) {
441*59036814SCostin Stroie            $data['min_p'] = $this->min_p;
442*59036814SCostin Stroie        }
443*59036814SCostin Stroie
444*59036814SCostin Stroie        // Make an API call with tool responses
445*59036814SCostin Stroie        return $this->callAPIWithTools($data, false);
446*59036814SCostin Stroie    }
447*59036814SCostin Stroie
448*59036814SCostin Stroie    /**
449*59036814SCostin Stroie     * Handle tool calls from the LLM
450*59036814SCostin Stroie     *
451*59036814SCostin Stroie     * Processes tool calls made by the LLM and returns appropriate responses.
452*59036814SCostin Stroie     * Implements caching to avoid duplicate calls with identical parameters.
453*59036814SCostin Stroie     *
454*59036814SCostin Stroie     * @param array $toolCall The tool call data from the LLM
455*59036814SCostin Stroie     * @return array The tool response message
456*59036814SCostin Stroie     */
457*59036814SCostin Stroie    private function handleToolCall($toolCall)
458*59036814SCostin Stroie    {
459*59036814SCostin Stroie        $toolName = $toolCall['function']['name'];
460*59036814SCostin Stroie        $arguments = json_decode($toolCall['function']['arguments'], true);
461*59036814SCostin Stroie
462*59036814SCostin Stroie        // Create a cache key from the tool name and arguments
463*59036814SCostin Stroie        $cacheKey = md5($toolName . serialize($arguments));
464*59036814SCostin Stroie
465*59036814SCostin Stroie        // Check if we have a cached result for this tool call
466*59036814SCostin Stroie        if (isset($this->toolCallCache[$cacheKey])) {
467*59036814SCostin Stroie            // Return cached result and indicate it was found in cache
468*59036814SCostin Stroie            $toolResponse = $this->toolCallCache[$cacheKey];
469*59036814SCostin Stroie            // Update with current tool call ID
470*59036814SCostin Stroie            $toolResponse['tool_call_id'] = $toolCall['id'];
471*59036814SCostin Stroie            $toolResponse['cached'] = true; // Indicate this response was cached
472*59036814SCostin Stroie            return $toolResponse;
473*59036814SCostin Stroie        }
474*59036814SCostin Stroie
475*59036814SCostin Stroie        $toolResponse = [
476*59036814SCostin Stroie            'role' => 'tool',
477*59036814SCostin Stroie            'tool_call_id' => $toolCall['id'],
478*59036814SCostin Stroie            'cached' => false // Indicate this is a fresh response
479*59036814SCostin Stroie        ];
480*59036814SCostin Stroie
481*59036814SCostin Stroie        switch ($toolName) {
482*59036814SCostin Stroie            case 'get_document':
483*59036814SCostin Stroie                $documentId = $arguments['id'];
484*59036814SCostin Stroie                $content = $this->getPageContent($documentId);
485*59036814SCostin Stroie                if ($content === false) {
486*59036814SCostin Stroie                    $toolResponse['content'] = 'Document not found: ' . $documentId;
487*59036814SCostin Stroie                } else {
488*59036814SCostin Stroie                    $toolResponse['content'] = $content;
489*59036814SCostin Stroie                }
490*59036814SCostin Stroie                break;
491*59036814SCostin Stroie
492*59036814SCostin Stroie            case 'get_template':
493*59036814SCostin Stroie                // Get template content using the convenience function
494*59036814SCostin Stroie                $toolResponse['content'] = $this->getTemplateContent();
495*59036814SCostin Stroie                break;
496*59036814SCostin Stroie
497*59036814SCostin Stroie            case 'get_examples':
498*59036814SCostin Stroie                // Get examples content using the convenience function
499*59036814SCostin Stroie                $count = isset($arguments['count']) ? (int)$arguments['count'] : 5;
500*59036814SCostin Stroie                $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>';
501*59036814SCostin Stroie                break;
502*59036814SCostin Stroie
503*59036814SCostin Stroie            default:
504*59036814SCostin Stroie                $toolResponse['content'] = 'Unknown tool: ' . $toolName;
505*59036814SCostin Stroie        }
506*59036814SCostin Stroie
507*59036814SCostin Stroie        // Cache the result for future calls with the same parameters
508*59036814SCostin Stroie        $cacheEntry = $toolResponse;
509*59036814SCostin Stroie        // Remove tool_call_id and cached flag from cache as they change per call
510*59036814SCostin Stroie        unset($cacheEntry['tool_call_id']);
511*59036814SCostin Stroie        unset($cacheEntry['cached']);
512*59036814SCostin Stroie        $this->toolCallCache[$cacheKey] = $cacheEntry;
513*59036814SCostin Stroie
514*59036814SCostin Stroie        return $toolResponse;
515*59036814SCostin Stroie    }
516*59036814SCostin Stroie
517*59036814SCostin Stroie    /**
518*59036814SCostin Stroie     * Make an API call with tool responses
519*59036814SCostin Stroie     *
520*59036814SCostin Stroie     * Sends a follow-up request to the LLM with tool responses.
521*59036814SCostin Stroie     * Implements complex logic for handling tool calls with caching and loop protection.
522*59036814SCostin Stroie     *
523*59036814SCostin Stroie     * Complex logic includes:
524*59036814SCostin Stroie     * 1. Making HTTP requests with proper authentication and error handling
525*59036814SCostin Stroie     * 2. Processing tool calls from the LLM response
526*59036814SCostin Stroie     * 3. Caching tool responses to avoid duplicate calls with identical parameters
527*59036814SCostin Stroie     * 4. Tracking tool call counts to prevent infinite loops
528*59036814SCostin Stroie     * 5. Implementing loop protection with call count limits
529*59036814SCostin Stroie     * 6. Handling recursive tool calls until final content is generated
530*59036814SCostin Stroie     *
531*59036814SCostin Stroie     * Loop protection works by:
532*59036814SCostin Stroie     * - Tracking individual tool call counts (max 3 per tool)
533*59036814SCostin Stroie     * - Tracking total tool calls (max 10 total)
534*59036814SCostin Stroie     * - Disabling tools when limits are exceeded to break potential loops
535*59036814SCostin Stroie     *
536*59036814SCostin Stroie     * @param array $data The API request data including messages with tool responses
537*59036814SCostin Stroie     * @param bool $toolsCalled Whether tools have already been called (used for loop protection)
538*59036814SCostin Stroie     * @param bool $useTools Whether to process tool calls (used for loop protection)
539*59036814SCostin Stroie     * @return string The final response content
540*59036814SCostin Stroie     */
541*59036814SCostin Stroie    private function callAPIWithTools($data, $toolsCalled = false, $useTools = true)
542*59036814SCostin Stroie    {
543*59036814SCostin Stroie        // Set up HTTP headers, including authentication if API key is configured
544*59036814SCostin Stroie        $headers = [
545*59036814SCostin Stroie            'Content-Type: application/json'
546*59036814SCostin Stroie        ];
547*59036814SCostin Stroie
548*59036814SCostin Stroie        if (!empty($this->api_key)) {
549*59036814SCostin Stroie            $headers[] = 'Authorization: Bearer ' . $this->api_key;
550*59036814SCostin Stroie        }
551*59036814SCostin Stroie
552*59036814SCostin Stroie       // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops
553*59036814SCostin Stroie        if ($toolsCalled) {
554*59036814SCostin Stroie            unset($data['tools']);
555*59036814SCostin Stroie            unset($data['tool_choice']);
556*59036814SCostin Stroie        }
557*59036814SCostin Stroie
558*59036814SCostin Stroie        // Initialize and configure cURL for the API request
559*59036814SCostin Stroie        $ch = curl_init();
560*59036814SCostin Stroie        curl_setopt($ch, CURLOPT_URL, $this->api_url);
561*59036814SCostin Stroie        curl_setopt($ch, CURLOPT_POST, true);
562*59036814SCostin Stroie        curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
563*59036814SCostin Stroie        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
564*59036814SCostin Stroie        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
565*59036814SCostin Stroie        curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
566*59036814SCostin Stroie        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true);
567*59036814SCostin Stroie
568*59036814SCostin Stroie        // Execute the API request
569*59036814SCostin Stroie        $response = curl_exec($ch);
570*59036814SCostin Stroie        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
571*59036814SCostin Stroie        $error = curl_error($ch);
572*59036814SCostin Stroie        curl_close($ch);
573*59036814SCostin Stroie
574*59036814SCostin Stroie        // Handle cURL errors
575*59036814SCostin Stroie        if ($error) {
576*59036814SCostin Stroie            throw new Exception('API request failed: ' . $error);
577*59036814SCostin Stroie        }
578*59036814SCostin Stroie
579*59036814SCostin Stroie        // Handle HTTP errors
580*59036814SCostin Stroie        if ($httpCode !== 200) {
581*59036814SCostin Stroie            throw new Exception('API request failed with HTTP code: ' . $httpCode);
582*59036814SCostin Stroie        }
583*59036814SCostin Stroie
584*59036814SCostin Stroie        // Parse and validate the JSON response
585*59036814SCostin Stroie        $result = json_decode($response, true);
586*59036814SCostin Stroie
587*59036814SCostin Stroie        // Extract the content from the response if available
588*59036814SCostin Stroie        if (isset($result['choices'][0]['message']['content'])) {
589*59036814SCostin Stroie            $content = trim($result['choices'][0]['message']['content']);
590*59036814SCostin Stroie            // Reset tool call counts when we get final content
591*59036814SCostin Stroie            $this->toolCallCounts = [];
592*59036814SCostin Stroie            return $content;
593*59036814SCostin Stroie        }
594*59036814SCostin Stroie
595*59036814SCostin Stroie        // Handle tool calls if present
596*59036814SCostin Stroie        if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) {
597*59036814SCostin Stroie            $toolCalls = $result['choices'][0]['message']['tool_calls'];
598*59036814SCostin Stroie            // Start with original messages
599*59036814SCostin Stroie            $messages = $data['messages'];
600*59036814SCostin Stroie            // Add assistant's message with tool calls, keeping all original fields except for content (which is null)
601*59036814SCostin Stroie            $assistantMessage = [];
602*59036814SCostin Stroie            foreach ($result['choices'][0]['message'] as $key => $value) {
603*59036814SCostin Stroie                if ($key !== 'content') {
604*59036814SCostin Stroie                    $assistantMessage[$key] = $value;
605*59036814SCostin Stroie                }
606*59036814SCostin Stroie            }
607*59036814SCostin Stroie            // Add assistant's message with tool calls
608*59036814SCostin Stroie            $messages[] = $assistantMessage;
609*59036814SCostin Stroie
610*59036814SCostin Stroie            // Process each tool call and track counts to prevent infinite loops
611*59036814SCostin Stroie            foreach ($toolCalls as $toolCall) {
612*59036814SCostin Stroie                $toolName = $toolCall['function']['name'];
613*59036814SCostin Stroie                // Increment tool call count
614*59036814SCostin Stroie                if (!isset($this->toolCallCounts[$toolName])) {
615*59036814SCostin Stroie                    $this->toolCallCounts[$toolName] = 0;
616*59036814SCostin Stroie                }
617*59036814SCostin Stroie                $this->toolCallCounts[$toolName]++;
618*59036814SCostin Stroie
619*59036814SCostin Stroie                $toolResponse = $this->handleToolCall($toolCall);
620*59036814SCostin Stroie                $messages[] = $toolResponse;
621*59036814SCostin Stroie            }
622*59036814SCostin Stroie
623*59036814SCostin Stroie            // Check if any tool has been called more than 3 times
624*59036814SCostin Stroie            $toolsCalledCount = 0;
625*59036814SCostin Stroie            foreach ($this->toolCallCounts as $count) {
626*59036814SCostin Stroie                if ($count > 3) {
627*59036814SCostin Stroie                    // If any tool called more than 3 times, disable tools to break loop
628*59036814SCostin Stroie                    $toolsCalled = true;
629*59036814SCostin Stroie                    break;
630*59036814SCostin Stroie                }
631*59036814SCostin Stroie                $toolsCalledCount += $count;
632*59036814SCostin Stroie            }
633*59036814SCostin Stroie
634*59036814SCostin Stroie            // If total tool calls exceed 10, also disable tools
635*59036814SCostin Stroie            if ($toolsCalledCount > 10) {
636*59036814SCostin Stroie                $toolsCalled = true;
637*59036814SCostin Stroie            }
638*59036814SCostin Stroie
639*59036814SCostin Stroie            // Make another API call with tool responses
640*59036814SCostin Stroie            $data['messages'] = $messages;
641*59036814SCostin Stroie            return $this->callAPIWithTools($data, $toolsCalled, $useTools);
642*59036814SCostin Stroie        }
643*59036814SCostin Stroie
644*59036814SCostin Stroie        // Throw exception for unexpected response format
645*59036814SCostin Stroie        throw new Exception('Unexpected API response format');
646*59036814SCostin Stroie    }
647*59036814SCostin Stroie
648*59036814SCostin Stroie    /**
649*59036814SCostin Stroie     * Load a prompt template from a DokuWiki page and replace placeholders
650*59036814SCostin Stroie     *
651*59036814SCostin Stroie     * Loads prompt templates from DokuWiki pages with IDs in the format
652*59036814SCostin Stroie     * dokullm:prompts:LANGUAGE:PROMPT_NAME
653*59036814SCostin Stroie     *
654*59036814SCostin Stroie     * The method implements a language fallback mechanism:
655*59036814SCostin Stroie     * 1. First tries to load the prompt in the configured language
656*59036814SCostin Stroie     * 2. If not found, falls back to English prompts
657*59036814SCostin Stroie     * 3. Throws an exception if neither is available
658*59036814SCostin Stroie     *
659*59036814SCostin Stroie     * After loading the prompt, it scans for placeholders and automatically
660*59036814SCostin Stroie     * adds missing ones with appropriate values before replacing all placeholders.
661*59036814SCostin Stroie     *
662*59036814SCostin Stroie     * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite')
663*59036814SCostin Stroie     * @param array $variables Associative array of placeholder => value pairs
664*59036814SCostin Stroie     * @return string The processed prompt with placeholders replaced
665*59036814SCostin Stroie     * @throws Exception If the prompt page cannot be loaded in any language
666*59036814SCostin Stroie     */
667*59036814SCostin Stroie    private function loadPrompt($promptName, $variables = [])
668*59036814SCostin Stroie    {
669*59036814SCostin Stroie        global $conf;
670*59036814SCostin Stroie        $language = $conf['plugin']['dokullm']['language'];
671*59036814SCostin Stroie
672*59036814SCostin Stroie        // Default to 'en' if language is 'default' or not set
673*59036814SCostin Stroie        if ($language === 'default' || empty($language)) {
674*59036814SCostin Stroie            $language = 'en';
675*59036814SCostin Stroie        }
676*59036814SCostin Stroie
677*59036814SCostin Stroie        // Construct the page ID for the prompt in the configured language
678*59036814SCostin Stroie        $promptPageId = 'dokullm:prompts:' . $language . ':' . $promptName;
679*59036814SCostin Stroie
680*59036814SCostin Stroie        // Try to get the content of the prompt page in the configured language
681*59036814SCostin Stroie        $prompt = $this->getPageContent($promptPageId);
682*59036814SCostin Stroie
683*59036814SCostin Stroie        // If the language-specific prompt doesn't exist, try English as fallback
684*59036814SCostin Stroie        if ($prompt === false && $language !== 'en') {
685*59036814SCostin Stroie            $promptPageId = 'dokullm:prompts:en:' . $promptName;
686*59036814SCostin Stroie            $prompt = $this->getPageContent($promptPageId);
687*59036814SCostin Stroie        }
688*59036814SCostin Stroie
689*59036814SCostin Stroie        // If still no prompt found, throw an exception
690*59036814SCostin Stroie        if ($prompt === false) {
691*59036814SCostin Stroie            throw new Exception('Prompt page not found: ' . $promptPageId);
692*59036814SCostin Stroie        }
693*59036814SCostin Stroie
694*59036814SCostin Stroie        // Find placeholders in the prompt
695*59036814SCostin Stroie        $placeholders = $this->findPlaceholders($prompt);
696*59036814SCostin Stroie
697*59036814SCostin Stroie        // Add missing placeholders with appropriate values
698*59036814SCostin Stroie        foreach ($placeholders as $placeholder) {
699*59036814SCostin Stroie            // Skip if already provided in variables
700*59036814SCostin Stroie            if (isset($variables[$placeholder])) {
701*59036814SCostin Stroie                continue;
702*59036814SCostin Stroie            }
703*59036814SCostin Stroie
704*59036814SCostin Stroie            // Add appropriate values for specific placeholders
705*59036814SCostin Stroie            switch ($placeholder) {
706*59036814SCostin Stroie                case 'template':
707*59036814SCostin Stroie                    // If we have a page_template in variables, use it
708*59036814SCostin Stroie                    $variables[$placeholder] = $this->getTemplateContent($variables['page_template']);
709*59036814SCostin Stroie                    break;
710*59036814SCostin Stroie
711*59036814SCostin Stroie                case 'snippets':
712*59036814SCostin Stroie                    $variables[$placeholder] = $this->getSnippets(10);
713*59036814SCostin Stroie                    break;
714*59036814SCostin Stroie
715*59036814SCostin Stroie                case 'examples':
716*59036814SCostin Stroie                    // If we have example page IDs in metadata, add examples content
717*59036814SCostin Stroie                    $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']);
718*59036814SCostin Stroie                    break;
719*59036814SCostin Stroie
720*59036814SCostin Stroie                case 'previous':
721*59036814SCostin Stroie                    // If we have a previous report page ID in metadata, add previous content
722*59036814SCostin Stroie                    $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']);
723*59036814SCostin Stroie
724*59036814SCostin Stroie                    // Add current and previous dates to metadata
725*59036814SCostin Stroie                    $variables['current_date'] = $this->getPageDate();
726*59036814SCostin Stroie                    $variables['previous_date'] = !empty($variables['page_previous']) ?
727*59036814SCostin Stroie                                                $this->getPageDate($variables['page_previous']) :
728*59036814SCostin Stroie                                                '';
729*59036814SCostin Stroie                    break;
730*59036814SCostin Stroie
731*59036814SCostin Stroie                default:
732*59036814SCostin Stroie                    // For other placeholders, leave them empty or set a default value
733*59036814SCostin Stroie                    $variables[$placeholder] = '';
734*59036814SCostin Stroie                    break;
735*59036814SCostin Stroie            }
736*59036814SCostin Stroie        }
737*59036814SCostin Stroie
738*59036814SCostin Stroie        // Replace placeholders with actual values
739*59036814SCostin Stroie        // Placeholders are in the format {placeholder_name}
740*59036814SCostin Stroie        foreach ($variables as $placeholder => $value) {
741*59036814SCostin Stroie            $prompt = str_replace('{' . $placeholder . '}', $value, $prompt);
742*59036814SCostin Stroie        }
743*59036814SCostin Stroie
744*59036814SCostin Stroie        // Return the processed prompt
745*59036814SCostin Stroie        return $prompt;
746*59036814SCostin Stroie    }
747*59036814SCostin Stroie
748*59036814SCostin Stroie    /**
749*59036814SCostin Stroie     * Load system prompt with optional command-specific appendage
750*59036814SCostin Stroie     *
751*59036814SCostin Stroie     * Loads the main system prompt and appends any command-specific system prompt
752*59036814SCostin Stroie     * if available.
753*59036814SCostin Stroie     *
754*59036814SCostin Stroie     * @param string $action The action/command name
755*59036814SCostin Stroie     * @param array $variables Associative array of placeholder => value pairs
756*59036814SCostin Stroie     * @return string The combined system prompt
757*59036814SCostin Stroie     */
758*59036814SCostin Stroie    private function loadSystemPrompt($action, $variables = [])
759*59036814SCostin Stroie    {
760*59036814SCostin Stroie        // Load system prompt which provides general instructions to the LLM
761*59036814SCostin Stroie        $systemPrompt = $this->loadPrompt('system', $variables);
762*59036814SCostin Stroie
763*59036814SCostin Stroie        // Check if there's a command-specific system prompt appendage
764*59036814SCostin Stroie        if (!empty($action)) {
765*59036814SCostin Stroie            try {
766*59036814SCostin Stroie                $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables);
767*59036814SCostin Stroie                if ($commandSystemPrompt !== false) {
768*59036814SCostin Stroie                    $systemPrompt .= "\n" . $commandSystemPrompt;
769*59036814SCostin Stroie                }
770*59036814SCostin Stroie            } catch (Exception $e) {
771*59036814SCostin Stroie                // Ignore exceptions when loading command-specific system prompt
772*59036814SCostin Stroie                // This allows the main system prompt to still be used
773*59036814SCostin Stroie            }
774*59036814SCostin Stroie        }
775*59036814SCostin Stroie
776*59036814SCostin Stroie        return $systemPrompt;
777*59036814SCostin Stroie    }
778*59036814SCostin Stroie
779*59036814SCostin Stroie    /**
780*59036814SCostin Stroie     * Get the content of a DokuWiki page
781*59036814SCostin Stroie     *
782*59036814SCostin Stroie     * Retrieves the raw content of a DokuWiki page by its ID.
783*59036814SCostin Stroie     * Used for loading template and example page content for context.
784*59036814SCostin Stroie     *
785*59036814SCostin Stroie     * @param string $pageId The page ID to retrieve
786*59036814SCostin Stroie     * @return string|false The page content or false if not found/readable
787*59036814SCostin Stroie     */
788*59036814SCostin Stroie    public function getPageContent($pageId)
789*59036814SCostin Stroie    {
790*59036814SCostin Stroie        // Convert page ID to file path
791*59036814SCostin Stroie        $pageFile = wikiFN($pageId);
792*59036814SCostin Stroie
793*59036814SCostin Stroie        // Check if file exists and is readable
794*59036814SCostin Stroie        if (file_exists($pageFile) && is_readable($pageFile)) {
795*59036814SCostin Stroie            return file_get_contents($pageFile);
796*59036814SCostin Stroie        }
797*59036814SCostin Stroie
798*59036814SCostin Stroie        return false;
799*59036814SCostin Stroie    }
800*59036814SCostin Stroie
801*59036814SCostin Stroie    /**
802*59036814SCostin Stroie     * Extract date from page ID or file timestamp
803*59036814SCostin Stroie     *
804*59036814SCostin Stroie     * Attempts to extract a date in YYmmdd format from the page ID.
805*59036814SCostin Stroie     * If not found, uses the file's last modification timestamp.
806*59036814SCostin Stroie     *
807*59036814SCostin Stroie     * @param string $pageId Optional page ID to extract date from (defaults to current page)
808*59036814SCostin Stroie     * @return string Formatted date string (YYYY-MM-DD)
809*59036814SCostin Stroie     */
810*59036814SCostin Stroie    private function getPageDate($pageId = null)
811*59036814SCostin Stroie    {
812*59036814SCostin Stroie        global $ID;
813*59036814SCostin Stroie
814*59036814SCostin Stroie        // Use provided page ID or current page ID
815*59036814SCostin Stroie        $targetPageId = $pageId ?: $ID;
816*59036814SCostin Stroie
817*59036814SCostin Stroie        // Try to extract date from page ID (looking for YYmmdd pattern)
818*59036814SCostin Stroie        if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) {
819*59036814SCostin Stroie            // Convert YYmmdd to YYYY-MM-DD
820*59036814SCostin Stroie            $year = $matches[1];
821*59036814SCostin Stroie            $month = $matches[2];
822*59036814SCostin Stroie            $day = $matches[3];
823*59036814SCostin Stroie
824*59036814SCostin Stroie            // Assume 20xx for years 00-69, 19xx for years 70-99
825*59036814SCostin Stroie            $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year;
826*59036814SCostin Stroie
827*59036814SCostin Stroie            return $fullYear . '-' . $month . '-' . $day;
828*59036814SCostin Stroie        }
829*59036814SCostin Stroie
830*59036814SCostin Stroie        // Fallback to file timestamp
831*59036814SCostin Stroie        $pageFile = wikiFN($targetPageId);
832*59036814SCostin Stroie        if (file_exists($pageFile)) {
833*59036814SCostin Stroie            $timestamp = filemtime($pageFile);
834*59036814SCostin Stroie            return date('Y-m-d', $timestamp);
835*59036814SCostin Stroie        }
836*59036814SCostin Stroie
837*59036814SCostin Stroie        // Return empty string if no date can be determined
838*59036814SCostin Stroie        return '';
839*59036814SCostin Stroie    }
840*59036814SCostin Stroie
841*59036814SCostin Stroie    /**
842*59036814SCostin Stroie     * Get current text
843*59036814SCostin Stroie     *
844*59036814SCostin Stroie     * Retrieves the current text stored from the process function.
845*59036814SCostin Stroie     *
846*59036814SCostin Stroie     * @return string The current text
847*59036814SCostin Stroie     */
848*59036814SCostin Stroie    private function getCurrentText()
849*59036814SCostin Stroie    {
850*59036814SCostin Stroie        return $this->currentText;
851*59036814SCostin Stroie    }
852*59036814SCostin Stroie
853*59036814SCostin Stroie    /**
854*59036814SCostin Stroie     * Scan text for placeholders
855*59036814SCostin Stroie     *
856*59036814SCostin Stroie     * Finds all placeholders in the format {placeholder_name} in the provided text
857*59036814SCostin Stroie     * and returns an array of unique placeholder names.
858*59036814SCostin Stroie     *
859*59036814SCostin Stroie     * @param string $text The text to scan for placeholders
860*59036814SCostin Stroie     * @return array List of unique placeholder names found in the text
861*59036814SCostin Stroie     */
862*59036814SCostin Stroie    public function findPlaceholders($text)
863*59036814SCostin Stroie    {
864*59036814SCostin Stroie        $placeholders = [];
865*59036814SCostin Stroie        $pattern = '/\{([^}]+)\}/';
866*59036814SCostin Stroie
867*59036814SCostin Stroie        if (preg_match_all($pattern, $text, $matches)) {
868*59036814SCostin Stroie            // Get unique placeholder names
869*59036814SCostin Stroie            $placeholders = array_unique($matches[1]);
870*59036814SCostin Stroie        }
871*59036814SCostin Stroie
872*59036814SCostin Stroie        return $placeholders;
873*59036814SCostin Stroie    }
874*59036814SCostin Stroie
875*59036814SCostin Stroie    /**
876*59036814SCostin Stroie     * Get template content for the current text
877*59036814SCostin Stroie     *
878*59036814SCostin Stroie     * Convenience function to retrieve template content. If a pageId is provided,
879*59036814SCostin Stroie     * retrieves content directly from that page. Otherwise, queries ChromaDB for
880*59036814SCostin Stroie     * a relevant template based on the current text.
881*59036814SCostin Stroie     *
882*59036814SCostin Stroie     * @param string|null $pageId Optional page ID to retrieve template from directly
883*59036814SCostin Stroie     * @return string The template content or empty string if not found
884*59036814SCostin Stroie     */
885*59036814SCostin Stroie    private function getTemplateContent($pageId = null)
886*59036814SCostin Stroie    {
887*59036814SCostin Stroie        // If pageId is provided, use it directly
888*59036814SCostin Stroie        if ($pageId !== null) {
889*59036814SCostin Stroie            $templateContent = $this->getPageContent($pageId);
890*59036814SCostin Stroie            if ($templateContent !== false) {
891*59036814SCostin Stroie                return $templateContent;
892*59036814SCostin Stroie            }
893*59036814SCostin Stroie        }
894*59036814SCostin Stroie
895*59036814SCostin Stroie        // Otherwise, get template suggestion for the current text
896*59036814SCostin Stroie        $pageId = $this->queryChromaDBTemplate($this->getCurrentText());
897*59036814SCostin Stroie        if (!empty($pageId)) {
898*59036814SCostin Stroie            $templateContent = $this->getPageContent($pageId[0]);
899*59036814SCostin Stroie            if ($templateContent !== false) {
900*59036814SCostin Stroie                return $templateContent;
901*59036814SCostin Stroie            }
902*59036814SCostin Stroie        }
903*59036814SCostin Stroie        return '( no template )';
904*59036814SCostin Stroie    }
905*59036814SCostin Stroie
906*59036814SCostin Stroie    /**
907*59036814SCostin Stroie     * Get snippets content for the current text
908*59036814SCostin Stroie     *
909*59036814SCostin Stroie     * Convenience function to retrieve relevant snippets for the current text.
910*59036814SCostin Stroie     * Queries ChromaDB for relevant snippets and returns them formatted.
911*59036814SCostin Stroie     *
912*59036814SCostin Stroie     * @param int $count Number of snippets to retrieve (default: 10)
913*59036814SCostin Stroie     * @return string Formatted snippets content or empty string if not found
914*59036814SCostin Stroie     */
915*59036814SCostin Stroie    private function getSnippets($count = 10)
916*59036814SCostin Stroie    {
917*59036814SCostin Stroie        // Get example snippets for the current text
918*59036814SCostin Stroie        $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count);
919*59036814SCostin Stroie        if (!empty($snippets)) {
920*59036814SCostin Stroie            $formattedSnippets = [];
921*59036814SCostin Stroie            foreach ($snippets as $index => $snippet) {
922*59036814SCostin Stroie                $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>';
923*59036814SCostin Stroie            }
924*59036814SCostin Stroie            return implode("\n", $formattedSnippets);
925*59036814SCostin Stroie        }
926*59036814SCostin Stroie        return '( no examples )';
927*59036814SCostin Stroie    }
928*59036814SCostin Stroie
929*59036814SCostin Stroie    /**
930*59036814SCostin Stroie     * Get examples content from example page IDs
931*59036814SCostin Stroie     *
932*59036814SCostin Stroie     * Convenience function to retrieve content from example pages.
933*59036814SCostin Stroie     * Returns the content of each page packed in XML elements.
934*59036814SCostin Stroie     *
935*59036814SCostin Stroie     * @param array $exampleIds List of example page IDs
936*59036814SCostin Stroie     * @return string Formatted examples content or empty string if not found
937*59036814SCostin Stroie     */
938*59036814SCostin Stroie    private function getExamplesContent($exampleIds = [])
939*59036814SCostin Stroie    {
940*59036814SCostin Stroie        if (empty($exampleIds) || !is_array($exampleIds)) {
941*59036814SCostin Stroie            return '( no examples )';
942*59036814SCostin Stroie        }
943*59036814SCostin Stroie
944*59036814SCostin Stroie        $examplesContent = [];
945*59036814SCostin Stroie        foreach ($exampleIds as $index => $exampleId) {
946*59036814SCostin Stroie            $content = $this->getPageContent($exampleId);
947*59036814SCostin Stroie            if ($content !== false) {
948*59036814SCostin Stroie                $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>';
949*59036814SCostin Stroie            }
950*59036814SCostin Stroie        }
951*59036814SCostin Stroie
952*59036814SCostin Stroie        return implode("\n", $examplesContent);
953*59036814SCostin Stroie    }
954*59036814SCostin Stroie
955*59036814SCostin Stroie    /**
956*59036814SCostin Stroie     * Get previous report content from previous page ID
957*59036814SCostin Stroie     *
958*59036814SCostin Stroie     * Convenience function to retrieve content from a previous report page.
959*59036814SCostin Stroie     * Returns the content of the previous page or a default message if not found.
960*59036814SCostin Stroie     *
961*59036814SCostin Stroie     * @param string $previousId Previous page ID
962*59036814SCostin Stroie     * @return string Previous report content or default message if not found
963*59036814SCostin Stroie     */
964*59036814SCostin Stroie    private function getPreviousContent($previousId = '')
965*59036814SCostin Stroie    {
966*59036814SCostin Stroie        if (empty($previousId)) {
967*59036814SCostin Stroie            return '( no previous report )';
968*59036814SCostin Stroie        }
969*59036814SCostin Stroie
970*59036814SCostin Stroie        $content = $this->getPageContent($previousId);
971*59036814SCostin Stroie        if ($content !== false) {
972*59036814SCostin Stroie            return $content;
973*59036814SCostin Stroie        }
974*59036814SCostin Stroie
975*59036814SCostin Stroie        return '( previous report not found )';
976*59036814SCostin Stroie    }
977*59036814SCostin Stroie
978*59036814SCostin Stroie    /**
979*59036814SCostin Stroie     * Get ChromaDB client with configuration
980*59036814SCostin Stroie     *
981*59036814SCostin Stroie     * Creates and returns a ChromaDB client with the appropriate configuration.
982*59036814SCostin Stroie     * Extracts modality from the current page ID to use as the collection name.
983*59036814SCostin Stroie     *
984*59036814SCostin Stroie     * @return array Array containing the ChromaDB client and collection name
985*59036814SCostin Stroie     */
986*59036814SCostin Stroie    private function getChromaDBClient()
987*59036814SCostin Stroie    {
988*59036814SCostin Stroie        // Include config.php to get ChromaDB configuration
989*59036814SCostin Stroie        require_once 'config.php';
990*59036814SCostin Stroie
991*59036814SCostin Stroie        // Get ChromaDB configuration from config.php
992*59036814SCostin Stroie        $chromaHost = defined('CHROMA_HOST') ? CHROMA_HOST : 'localhost';
993*59036814SCostin Stroie        $chromaPort = defined('CHROMA_PORT') ? CHROMA_PORT : 8000;
994*59036814SCostin Stroie        $chromaTenant = defined('CHROMA_TENANT') ? CHROMA_TENANT : 'dokullm';
995*59036814SCostin Stroie        $chromaDatabase = defined('CHROMA_DATABASE') ? CHROMA_DATABASE : 'dokullm';
996*59036814SCostin Stroie        $chromaDefaultCollection = defined('CHROMA_COLLECTION') ? CHROMA_COLLECTION : 'documents';
997*59036814SCostin Stroie
998*59036814SCostin Stroie        // Use the first part of the current page ID as collection name, fallback to default
999*59036814SCostin Stroie        global $ID;
1000*59036814SCostin Stroie        $chromaCollection = $chromaDefaultCollection; // Default collection name
1001*59036814SCostin Stroie
1002*59036814SCostin Stroie        if (!empty($ID)) {
1003*59036814SCostin Stroie            // Split the page ID by ':' and take the first part as collection name
1004*59036814SCostin Stroie            $parts = explode(':', $ID);
1005*59036814SCostin Stroie            if (isset($parts[0]) && !empty($parts[0])) {
1006*59036814SCostin Stroie                // If the first part is 'playground', use the default collection
1007*59036814SCostin Stroie                // Otherwise, use the first part as the collection name
1008*59036814SCostin Stroie                if ($parts[0] === 'playground') {
1009*59036814SCostin Stroie                    $chromaCollection = $chromaDefaultCollection;
1010*59036814SCostin Stroie                } else {
1011*59036814SCostin Stroie                    $chromaCollection = $parts[0];
1012*59036814SCostin Stroie                }
1013*59036814SCostin Stroie            }
1014*59036814SCostin Stroie        }
1015*59036814SCostin Stroie
1016*59036814SCostin Stroie        // Create ChromaDB client
1017*59036814SCostin Stroie        $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient($chromaHost, $chromaPort, $chromaTenant, $chromaDatabase);
1018*59036814SCostin Stroie
1019*59036814SCostin Stroie
1020*59036814SCostin Stroie        return [$chromaClient, $chromaCollection];
1021*59036814SCostin Stroie    }
1022*59036814SCostin Stroie
1023*59036814SCostin Stroie    /**
1024*59036814SCostin Stroie     * Query ChromaDB for relevant documents
1025*59036814SCostin Stroie     *
1026*59036814SCostin Stroie     * Generates embeddings for the input text and queries ChromaDB for similar documents.
1027*59036814SCostin Stroie     * Extracts modality from the current page ID to use as the collection name.
1028*59036814SCostin Stroie     *
1029*59036814SCostin Stroie     * @param string $text The text to find similar documents for
1030*59036814SCostin Stroie     * @param int $limit Maximum number of documents to retrieve (default: 5)
1031*59036814SCostin Stroie     * @param array|null $where Optional filter conditions for metadata
1032*59036814SCostin Stroie     * @return array List of document IDs
1033*59036814SCostin Stroie     */
1034*59036814SCostin Stroie    private function queryChromaDB($text, $limit = 5, $where = null)
1035*59036814SCostin Stroie    {
1036*59036814SCostin Stroie        try {
1037*59036814SCostin Stroie            // Get ChromaDB client and collection name
1038*59036814SCostin Stroie            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
1039*59036814SCostin Stroie            // Query for similar documents
1040*59036814SCostin Stroie            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
1041*59036814SCostin Stroie
1042*59036814SCostin Stroie            // Extract document IDs from results
1043*59036814SCostin Stroie            $documentIds = [];
1044*59036814SCostin Stroie            if (isset($results['ids'][0]) && is_array($results['ids'][0])) {
1045*59036814SCostin Stroie                foreach ($results['ids'][0] as $id) {
1046*59036814SCostin Stroie                    // Use the ChromaDB ID directly without conversion
1047*59036814SCostin Stroie                    $documentIds[] = $id;
1048*59036814SCostin Stroie                }
1049*59036814SCostin Stroie            }
1050*59036814SCostin Stroie
1051*59036814SCostin Stroie            return $documentIds;
1052*59036814SCostin Stroie        } catch (Exception $e) {
1053*59036814SCostin Stroie            // Log error but don't fail the operation
1054*59036814SCostin Stroie            error_log('ChromaDB query failed: ' . $e->getMessage());
1055*59036814SCostin Stroie            return [];
1056*59036814SCostin Stroie        }
1057*59036814SCostin Stroie    }
1058*59036814SCostin Stroie
1059*59036814SCostin Stroie    /**
1060*59036814SCostin Stroie     * Query ChromaDB for relevant documents and return text snippets
1061*59036814SCostin Stroie     *
1062*59036814SCostin Stroie     * Generates embeddings for the input text and queries ChromaDB for similar documents.
1063*59036814SCostin Stroie     * Returns the actual text snippets instead of document IDs.
1064*59036814SCostin Stroie     *
1065*59036814SCostin Stroie     * @param string $text The text to find similar documents for
1066*59036814SCostin Stroie     * @param int $limit Maximum number of documents to retrieve (default: 10)
1067*59036814SCostin Stroie     * @param array|null $where Optional filter conditions for metadata
1068*59036814SCostin Stroie     * @return array List of text snippets
1069*59036814SCostin Stroie     */
1070*59036814SCostin Stroie    private function queryChromaDBSnippets($text, $limit = 10, $where = null)
1071*59036814SCostin Stroie    {
1072*59036814SCostin Stroie        try {
1073*59036814SCostin Stroie            // Get ChromaDB client and collection name
1074*59036814SCostin Stroie            list($chromaClient, $chromaCollection) = $this->getChromaDBClient();
1075*59036814SCostin Stroie            // Query for similar documents
1076*59036814SCostin Stroie            $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where);
1077*59036814SCostin Stroie
1078*59036814SCostin Stroie            // Extract document texts from results
1079*59036814SCostin Stroie            $snippets = [];
1080*59036814SCostin Stroie            if (isset($results['documents'][0]) && is_array($results['documents'][0])) {
1081*59036814SCostin Stroie                foreach ($results['documents'][0] as $document) {
1082*59036814SCostin Stroie                    $snippets[] = $document;
1083*59036814SCostin Stroie                }
1084*59036814SCostin Stroie            }
1085*59036814SCostin Stroie
1086*59036814SCostin Stroie            return $snippets;
1087*59036814SCostin Stroie        } catch (Exception $e) {
1088*59036814SCostin Stroie            // Log error but don't fail the operation
1089*59036814SCostin Stroie            error_log('ChromaDB query failed: ' . $e->getMessage());
1090*59036814SCostin Stroie            return [];
1091*59036814SCostin Stroie        }
1092*59036814SCostin Stroie    }
1093*59036814SCostin Stroie
1094*59036814SCostin Stroie    /**
1095*59036814SCostin Stroie     * Query ChromaDB for a template document
1096*59036814SCostin Stroie     *
1097*59036814SCostin Stroie     * Generates embeddings for the input text and queries ChromaDB for a template document
1098*59036814SCostin Stroie     * by filtering with metadata 'template=true'.
1099*59036814SCostin Stroie     *
1100*59036814SCostin Stroie     * @param string $text The text to find a template for
1101*59036814SCostin Stroie     * @return array List of template document IDs (maximum 1)
1102*59036814SCostin Stroie     */
1103*59036814SCostin Stroie    public function queryChromaDBTemplate($text)
1104*59036814SCostin Stroie    {
1105*59036814SCostin Stroie        $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']);
1106*59036814SCostin Stroie
1107*59036814SCostin Stroie        // Remove chunk number (e.g., "@2") from the ID to get the base document ID
1108*59036814SCostin Stroie        if (!empty($templateIds)) {
1109*59036814SCostin Stroie            $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]);
1110*59036814SCostin Stroie        }
1111*59036814SCostin Stroie
1112*59036814SCostin Stroie        return $templateIds;
1113*59036814SCostin Stroie    }
1114*59036814SCostin Stroie
1115*59036814SCostin Stroie}
1116