1*59036814SCostin Stroie<?php 2*59036814SCostin Stroienamespace dokuwiki\plugin\dokullm; 3*59036814SCostin Stroie 4*59036814SCostin Stroie/** 5*59036814SCostin Stroie * LLM Client for the dokullm plugin 6*59036814SCostin Stroie * 7*59036814SCostin Stroie * This class provides methods to interact with an LLM API for various 8*59036814SCostin Stroie * text processing tasks such as completion, rewriting, grammar correction, 9*59036814SCostin Stroie * summarization, conclusion creation, text analysis, and custom prompts. 10*59036814SCostin Stroie * 11*59036814SCostin Stroie * The client handles: 12*59036814SCostin Stroie * - API configuration and authentication 13*59036814SCostin Stroie * - Prompt template loading and processing 14*59036814SCostin Stroie * - Context-aware requests with metadata 15*59036814SCostin Stroie * - DokuWiki page content retrieval 16*59036814SCostin Stroie */ 17*59036814SCostin Stroie 18*59036814SCostin Stroie// must be run within Dokuwiki 19*59036814SCostin Stroieif (!defined('DOKU_INC')) { 20*59036814SCostin Stroie die(); 21*59036814SCostin Stroie} 22*59036814SCostin Stroie 23*59036814SCostin Stroie 24*59036814SCostin Stroie/** 25*59036814SCostin Stroie * LLM Client class for handling API communications 26*59036814SCostin Stroie * 27*59036814SCostin Stroie * Manages configuration settings and provides methods for various 28*59036814SCostin Stroie * text processing operations through an LLM API. 29*59036814SCostin Stroie * Implements caching for tool calls to avoid duplicate processing. 30*59036814SCostin Stroie */ 31*59036814SCostin Stroieclass LlmClient 32*59036814SCostin Stroie{ 33*59036814SCostin Stroie /** @var string The API endpoint URL */ 34*59036814SCostin Stroie private $api_url; 35*59036814SCostin Stroie 36*59036814SCostin Stroie /** @var array Cache for tool call results */ 37*59036814SCostin Stroie private $toolCallCache = []; 38*59036814SCostin Stroie 39*59036814SCostin Stroie /** @var string Current text for tool usage */ 40*59036814SCostin Stroie private $currentText = ''; 41*59036814SCostin Stroie 42*59036814SCostin Stroie /** @var array Track tool call counts to prevent infinite loops */ 43*59036814SCostin Stroie private $toolCallCounts = []; 44*59036814SCostin Stroie 45*59036814SCostin Stroie /** @var string The API authentication key */ 46*59036814SCostin Stroie private $api_key; 47*59036814SCostin Stroie 48*59036814SCostin Stroie /** @var string The model identifier to use */ 49*59036814SCostin Stroie private $model; 50*59036814SCostin Stroie 51*59036814SCostin Stroie /** @var int The request timeout in seconds */ 52*59036814SCostin Stroie private $timeout; 53*59036814SCostin Stroie 54*59036814SCostin Stroie /** @var float The temperature setting for response randomness */ 55*59036814SCostin Stroie private $temperature; 56*59036814SCostin Stroie 57*59036814SCostin Stroie /** @var float The top-p setting for nucleus sampling */ 58*59036814SCostin Stroie private $top_p; 59*59036814SCostin Stroie 60*59036814SCostin Stroie /** @var int The top-k setting for token selection */ 61*59036814SCostin Stroie private $top_k; 62*59036814SCostin Stroie 63*59036814SCostin Stroie /** @var float The min-p setting for minimum probability threshold */ 64*59036814SCostin Stroie private $min_p; 65*59036814SCostin Stroie 66*59036814SCostin Stroie /** @var bool Whether to enable thinking in the LLM responses */ 67*59036814SCostin Stroie private $think; 68*59036814SCostin Stroie 69*59036814SCostin Stroie /** 70*59036814SCostin Stroie * Initialize the LLM client with configuration settings 71*59036814SCostin Stroie * 72*59036814SCostin Stroie * Retrieves configuration values from DokuWiki's configuration system 73*59036814SCostin Stroie * for API URL, key, model, timeout, and LLM sampling parameters. 74*59036814SCostin Stroie * 75*59036814SCostin Stroie * Configuration values: 76*59036814SCostin Stroie * - api_url: The LLM API endpoint URL 77*59036814SCostin Stroie * - api_key: Authentication key for the API (optional) 78*59036814SCostin Stroie * - model: The model identifier to use for requests 79*59036814SCostin Stroie * - timeout: Request timeout in seconds 80*59036814SCostin Stroie * - language: Language code for prompt templates 81*59036814SCostin Stroie * - temperature: Temperature setting for response randomness (0.0-1.0) 82*59036814SCostin Stroie * - top_p: Top-p (nucleus sampling) setting (0.0-1.0) 83*59036814SCostin Stroie * - top_k: Top-k setting (integer >= 1) 84*59036814SCostin Stroie * - min_p: Minimum probability threshold (0.0-1.0) 85*59036814SCostin Stroie * - think: Whether to enable thinking in LLM responses (boolean) 86*59036814SCostin Stroie */ 87*59036814SCostin Stroie public function __construct() 88*59036814SCostin Stroie { 89*59036814SCostin Stroie global $conf; 90*59036814SCostin Stroie $this->api_url = $conf['plugin']['dokullm']['api_url']; 91*59036814SCostin Stroie $this->api_key = $conf['plugin']['dokullm']['api_key']; 92*59036814SCostin Stroie $this->model = $conf['plugin']['dokullm']['model']; 93*59036814SCostin Stroie $this->timeout = $conf['plugin']['dokullm']['timeout']; 94*59036814SCostin Stroie $this->temperature = $conf['plugin']['dokullm']['temperature']; 95*59036814SCostin Stroie $this->top_p = $conf['plugin']['dokullm']['top_p']; 96*59036814SCostin Stroie $this->top_k = $conf['plugin']['dokullm']['top_k']; 97*59036814SCostin Stroie $this->min_p = $conf['plugin']['dokullm']['min_p']; 98*59036814SCostin Stroie $this->think = $conf['plugin']['dokullm']['think'] ?? false; 99*59036814SCostin Stroie } 100*59036814SCostin Stroie 101*59036814SCostin Stroie 102*59036814SCostin Stroie 103*59036814SCostin Stroie public function process($action, $text, $metadata = [], $useContext = true) 104*59036814SCostin Stroie { 105*59036814SCostin Stroie // Store the current text for tool usage 106*59036814SCostin Stroie $this->currentText = $text; 107*59036814SCostin Stroie 108*59036814SCostin Stroie // Add text, think and action to metadata 109*59036814SCostin Stroie $metadata['text'] = $text; 110*59036814SCostin Stroie $metadata['think'] = $this->think ? '/think' : '/no_think'; 111*59036814SCostin Stroie $metadata['action'] = $action; 112*59036814SCostin Stroie 113*59036814SCostin Stroie // If we have 'template' in metadata, move it to 'page_template' 114*59036814SCostin Stroie if (isset($metadata['template'])) { 115*59036814SCostin Stroie $metadata['page_template'] = $metadata['template']; 116*59036814SCostin Stroie unset($metadata['template']); 117*59036814SCostin Stroie } 118*59036814SCostin Stroie 119*59036814SCostin Stroie // If we have 'examples' in metadata, move it to 'page_examples' 120*59036814SCostin Stroie if (isset($metadata['examples'])) { 121*59036814SCostin Stroie $metadata['page_examples'] = $metadata['examples']; 122*59036814SCostin Stroie unset($metadata['examples']); 123*59036814SCostin Stroie } 124*59036814SCostin Stroie 125*59036814SCostin Stroie // If we have 'previous' in metadata, move it to 'page_previous' 126*59036814SCostin Stroie if (isset($metadata['previous'])) { 127*59036814SCostin Stroie $metadata['page_previous'] = $metadata['previous']; 128*59036814SCostin Stroie unset($metadata['previous']); 129*59036814SCostin Stroie } 130*59036814SCostin Stroie 131*59036814SCostin Stroie $prompt = $this->loadPrompt($action, $metadata); 132*59036814SCostin Stroie 133*59036814SCostin Stroie return $this->callAPI($action, $prompt, $metadata, $useContext); 134*59036814SCostin Stroie } 135*59036814SCostin Stroie 136*59036814SCostin Stroie 137*59036814SCostin Stroie 138*59036814SCostin Stroie /** 139*59036814SCostin Stroie * Create the provided text using the LLM 140*59036814SCostin Stroie * 141*59036814SCostin Stroie * Sends a prompt to the LLM asking it to create the given text. 142*59036814SCostin Stroie * First queries ChromaDB for relevant documents to include as examples. 143*59036814SCostin Stroie * If no template is defined, queries ChromaDB for a template. 144*59036814SCostin Stroie * 145*59036814SCostin Stroie * @param string $text The text to create 146*59036814SCostin Stroie * @param array $metadata Optional metadata containing template, examples, and snippets 147*59036814SCostin Stroie * @param bool $useContext Whether to include template and examples in the context (default: true) 148*59036814SCostin Stroie * @return string The created text 149*59036814SCostin Stroie */ 150*59036814SCostin Stroie public function createReport($text, $metadata = [], $useContext = true) 151*59036814SCostin Stroie { 152*59036814SCostin Stroie // Store the current text for tool usage 153*59036814SCostin Stroie $this->currentText = $text; 154*59036814SCostin Stroie 155*59036814SCostin Stroie // Check if tools should be used based on configuration 156*59036814SCostin Stroie global $conf; 157*59036814SCostin Stroie $useTools = $conf['plugin']['dokullm']['use_tools'] ?? false; 158*59036814SCostin Stroie 159*59036814SCostin Stroie // Only try to find template and add snippets if tools are not enabled 160*59036814SCostin Stroie // When tools are enabled, the LLM will call get_template and get_examples as needed 161*59036814SCostin Stroie if (!$useTools) { 162*59036814SCostin Stroie // If no template is defined, try to find one using ChromaDB 163*59036814SCostin Stroie if (empty($metadata['template'])) { 164*59036814SCostin Stroie $templateResult = $this->queryChromaDBTemplate($text); 165*59036814SCostin Stroie if (!empty($templateResult)) { 166*59036814SCostin Stroie // Use the first result as template 167*59036814SCostin Stroie $metadata['template'] = $templateResult[0]; 168*59036814SCostin Stroie } 169*59036814SCostin Stroie } 170*59036814SCostin Stroie 171*59036814SCostin Stroie // Query ChromaDB for relevant documents to use as examples 172*59036814SCostin Stroie $chromaResults = $this->queryChromaDBSnippets($text, 10); 173*59036814SCostin Stroie 174*59036814SCostin Stroie // Add ChromaDB results to metadata as snippets 175*59036814SCostin Stroie if (!empty($chromaResults)) { 176*59036814SCostin Stroie // Merge with existing snippets 177*59036814SCostin Stroie $metadata['snippets'] = array_merge( 178*59036814SCostin Stroie isset($metadata['snippets']) ? $metadata['snippets'] : [], 179*59036814SCostin Stroie $chromaResults 180*59036814SCostin Stroie ); 181*59036814SCostin Stroie } 182*59036814SCostin Stroie } 183*59036814SCostin Stroie 184*59036814SCostin Stroie $think = $this->think ? '/think' : '/no_think'; 185*59036814SCostin Stroie $prompt = $this->loadPrompt('create', ['text' => $text, 'think' => $think]); 186*59036814SCostin Stroie 187*59036814SCostin Stroie return $this->callAPI('create', $prompt, $metadata, $useContext); 188*59036814SCostin Stroie } 189*59036814SCostin Stroie 190*59036814SCostin Stroie /** 191*59036814SCostin Stroie * Compare two texts and highlight differences 192*59036814SCostin Stroie * 193*59036814SCostin Stroie * Sends a prompt to the LLM asking it to compare two texts and 194*59036814SCostin Stroie * highlight their similarities and differences. 195*59036814SCostin Stroie * 196*59036814SCostin Stroie * @param string $text The current text to compare 197*59036814SCostin Stroie * @param array $metadata Optional metadata containing template, examples, and previous report reference 198*59036814SCostin Stroie * @return string The comparison results 199*59036814SCostin Stroie */ 200*59036814SCostin Stroie public function compareText($text, $metadata = [], $useContext = false) 201*59036814SCostin Stroie { 202*59036814SCostin Stroie // Store the current text for tool usage 203*59036814SCostin Stroie $this->currentText = $text; 204*59036814SCostin Stroie 205*59036814SCostin Stroie // Load previous report from metadata if specified 206*59036814SCostin Stroie $previousText = ''; 207*59036814SCostin Stroie if (!empty($metadata['previous_report_page'])) { 208*59036814SCostin Stroie $previousText = $this->getPageContent($metadata['previous_report_page']); 209*59036814SCostin Stroie if ($previousText === false) { 210*59036814SCostin Stroie $previousText = ''; 211*59036814SCostin Stroie } 212*59036814SCostin Stroie } 213*59036814SCostin Stroie 214*59036814SCostin Stroie // Extract dates for placeholders 215*59036814SCostin Stroie $currentDate = $this->getPageDate(); 216*59036814SCostin Stroie $previousDate = !empty($metadata['previous_report_page']) ? 217*59036814SCostin Stroie $this->getPageDate($metadata['previous_report_page']) : 218*59036814SCostin Stroie ''; 219*59036814SCostin Stroie 220*59036814SCostin Stroie $think = $this->think ? '/think' : '/no_think'; 221*59036814SCostin Stroie $prompt = $this->loadPrompt('compare', [ 222*59036814SCostin Stroie 'text' => $text, 223*59036814SCostin Stroie 'previous_text' => $previousText, 224*59036814SCostin Stroie 'current_date' => $currentDate, 225*59036814SCostin Stroie 'previous_date' => $previousDate, 226*59036814SCostin Stroie 'think' => $think 227*59036814SCostin Stroie ]); 228*59036814SCostin Stroie 229*59036814SCostin Stroie return $this->callAPI('compare', $prompt, $metadata, $useContext); 230*59036814SCostin Stroie } 231*59036814SCostin Stroie 232*59036814SCostin Stroie /** 233*59036814SCostin Stroie * Process text with a custom user prompt 234*59036814SCostin Stroie * 235*59036814SCostin Stroie * Sends a custom prompt to the LLM along with the provided text. 236*59036814SCostin Stroie * 237*59036814SCostin Stroie * @param string $text The text to process 238*59036814SCostin Stroie * @param string $customPrompt The custom prompt to use 239*59036814SCostin Stroie * @param array $metadata Optional metadata containing template and examples 240*59036814SCostin Stroie * @param bool $useContext Whether to include template and examples in the context (default: true) 241*59036814SCostin Stroie * @return string The processed text 242*59036814SCostin Stroie */ 243*59036814SCostin Stroie public function processCustomPrompt($text, $metadata = [], $useContext = true) 244*59036814SCostin Stroie { 245*59036814SCostin Stroie // Store the current text for tool usage 246*59036814SCostin Stroie $this->currentText = $text; 247*59036814SCostin Stroie 248*59036814SCostin Stroie // Format the prompt with the text and custom prompt 249*59036814SCostin Stroie $prompt = $metadata['prompt'] . "\n\nText to process:\n" . $text; 250*59036814SCostin Stroie 251*59036814SCostin Stroie return $this->callAPI('custom', $prompt, $metadata, $useContext); 252*59036814SCostin Stroie } 253*59036814SCostin Stroie 254*59036814SCostin Stroie /** 255*59036814SCostin Stroie * Get the list of available tools for the LLM 256*59036814SCostin Stroie * 257*59036814SCostin Stroie * Defines the tools that can be used by the LLM during processing. 258*59036814SCostin Stroie * 259*59036814SCostin Stroie * @return array List of tool definitions 260*59036814SCostin Stroie */ 261*59036814SCostin Stroie private function getAvailableTools() 262*59036814SCostin Stroie { 263*59036814SCostin Stroie return [ 264*59036814SCostin Stroie [ 265*59036814SCostin Stroie 'type' => 'function', 266*59036814SCostin Stroie 'function' => [ 267*59036814SCostin Stroie 'name' => 'get_document', 268*59036814SCostin Stroie 'description' => 'Retrieve the full content of a specific document by providing its unique document ID. Use this when you need to access the complete text of a particular document for reference or analysis.', 269*59036814SCostin Stroie 'parameters' => [ 270*59036814SCostin Stroie 'type' => 'object', 271*59036814SCostin Stroie 'properties' => [ 272*59036814SCostin Stroie 'id' => [ 273*59036814SCostin Stroie 'type' => 'string', 274*59036814SCostin Stroie 'description' => 'The unique identifier of the document to retrieve. This should be a valid document ID that exists in the system.' 275*59036814SCostin Stroie ] 276*59036814SCostin Stroie ], 277*59036814SCostin Stroie 'required' => ['id'] 278*59036814SCostin Stroie ] 279*59036814SCostin Stroie ] 280*59036814SCostin Stroie ], 281*59036814SCostin Stroie [ 282*59036814SCostin Stroie 'type' => 'function', 283*59036814SCostin Stroie 'function' => [ 284*59036814SCostin Stroie 'name' => 'get_template', 285*59036814SCostin Stroie 'description' => 'Retrieve a relevant template document that matches the current context and content. Use this when you need a structural template or format example to base your response on, particularly for creating consistent reports or documents.', 286*59036814SCostin Stroie 'parameters' => [ 287*59036814SCostin Stroie 'type' => 'object', 288*59036814SCostin Stroie 'properties' => [ 289*59036814SCostin Stroie 'language' => [ 290*59036814SCostin Stroie 'type' => 'string', 291*59036814SCostin Stroie 'description' => 'The language the template should be written in (e.g., "ro" for Romanian, "en" for English).', 292*59036814SCostin Stroie 'default' => 'ro' 293*59036814SCostin Stroie ] 294*59036814SCostin Stroie ] 295*59036814SCostin Stroie ] 296*59036814SCostin Stroie ] 297*59036814SCostin Stroie ], 298*59036814SCostin Stroie [ 299*59036814SCostin Stroie 'type' => 'function', 300*59036814SCostin Stroie 'function' => [ 301*59036814SCostin Stroie 'name' => 'get_examples', 302*59036814SCostin Stroie 'description' => 'Retrieve relevant example snippets from previous reports that are similar to the current context. Use this when you need to see how similar content was previously handled, to maintain consistency in style, terminology, and structure.', 303*59036814SCostin Stroie 'parameters' => [ 304*59036814SCostin Stroie 'type' => 'object', 305*59036814SCostin Stroie 'properties' => [ 306*59036814SCostin Stroie 'count' => [ 307*59036814SCostin Stroie 'type' => 'integer', 308*59036814SCostin Stroie 'description' => 'The number of examples to retrieve (1-20). Use more examples when you need comprehensive reference material, fewer when you need just a quick reminder of the style.', 309*59036814SCostin Stroie 'default' => 5 310*59036814SCostin Stroie ] 311*59036814SCostin Stroie ] 312*59036814SCostin Stroie ] 313*59036814SCostin Stroie ] 314*59036814SCostin Stroie ] 315*59036814SCostin Stroie ]; 316*59036814SCostin Stroie } 317*59036814SCostin Stroie 318*59036814SCostin Stroie /** 319*59036814SCostin Stroie * Call the LLM API with the specified prompt 320*59036814SCostin Stroie * 321*59036814SCostin Stroie * Makes an HTTP POST request to the configured API endpoint with 322*59036814SCostin Stroie * the prompt and other parameters. Handles authentication if an 323*59036814SCostin Stroie * API key is configured. 324*59036814SCostin Stroie * 325*59036814SCostin Stroie * The method constructs a conversation with system and user messages, 326*59036814SCostin Stroie * including context information from metadata when available. 327*59036814SCostin Stroie * 328*59036814SCostin Stroie * Complex logic includes: 329*59036814SCostin Stroie * 1. Loading and enhancing the system prompt with metadata context 330*59036814SCostin Stroie * 2. Building the API request with model parameters 331*59036814SCostin Stroie * 3. Handling authentication with API key if configured 332*59036814SCostin Stroie * 4. Making the HTTP request with proper error handling 333*59036814SCostin Stroie * 5. Parsing and validating the API response 334*59036814SCostin Stroie * 6. Supporting tool usage with automatic tool calling when enabled 335*59036814SCostin Stroie * 7. Implementing context enhancement with templates, examples, and snippets 336*59036814SCostin Stroie * 337*59036814SCostin Stroie * The context information includes: 338*59036814SCostin Stroie * - Template content: Used as a starting point for the response 339*59036814SCostin Stroie * - Example pages: Full content of specified example pages 340*59036814SCostin Stroie * - Text snippets: Relevant text examples from ChromaDB 341*59036814SCostin Stroie * 342*59036814SCostin Stroie * When tools are enabled, the method supports automatic tool calling: 343*59036814SCostin Stroie * - Tools can retrieve documents, templates, and examples as needed 344*59036814SCostin Stroie * - Tool responses are cached to avoid duplicate calls with identical parameters 345*59036814SCostin Stroie * - Infinite loop protection prevents excessive tool calls 346*59036814SCostin Stroie * 347*59036814SCostin Stroie * @param string $command The command name for loading command-specific system prompts 348*59036814SCostin Stroie * @param string $prompt The prompt to send to the LLM as user message 349*59036814SCostin Stroie * @param array $metadata Optional metadata containing template, examples, and snippets 350*59036814SCostin Stroie * @param bool $useContext Whether to include template and examples in the context (default: true) 351*59036814SCostin Stroie * @return string The response content from the LLM 352*59036814SCostin Stroie * @throws Exception If the API request fails or returns unexpected format 353*59036814SCostin Stroie */ 354*59036814SCostin Stroie 355*59036814SCostin Stroie private function callAPI($command, $prompt, $metadata = [], $useContext = true) 356*59036814SCostin Stroie { 357*59036814SCostin Stroie // Load system prompt which provides general instructions to the LLM 358*59036814SCostin Stroie $systemPrompt = $this->loadSystemPrompt($command, []); 359*59036814SCostin Stroie 360*59036814SCostin Stroie // Enhance the prompt with context information from metadata 361*59036814SCostin Stroie // This provides the LLM with additional context about templates and examples 362*59036814SCostin Stroie if ($useContext && !empty($metadata) && (!empty($metadata['template']) || !empty($metadata['examples']) || !empty($metadata['snippets']))) { 363*59036814SCostin Stroie $contextInfo = "\n\n<context>\n"; 364*59036814SCostin Stroie 365*59036814SCostin Stroie // Add template content if specified in metadata 366*59036814SCostin Stroie if (!empty($metadata['template'])) { 367*59036814SCostin Stroie $templateContent = $this->getPageContent($metadata['template']); 368*59036814SCostin Stroie if ($templateContent !== false) { 369*59036814SCostin Stroie $contextInfo .= "\n\n<template>\nPornește de la acest template (" . $metadata['template'] . "):\n" . $templateContent . "\n</template>\n"; 370*59036814SCostin Stroie } 371*59036814SCostin Stroie } 372*59036814SCostin Stroie 373*59036814SCostin Stroie // Add example pages content if specified in metadata 374*59036814SCostin Stroie if (!empty($metadata['examples'])) { 375*59036814SCostin Stroie $examplesContent = []; 376*59036814SCostin Stroie foreach ($metadata['examples'] as $example) { 377*59036814SCostin Stroie $content = $this->getPageContent($example); 378*59036814SCostin Stroie if ($content !== false) { 379*59036814SCostin Stroie $examplesContent[] = "\n<example_page source=\"" . $example . "\">\n" . $content . "\n</example_page>\n"; 380*59036814SCostin Stroie } 381*59036814SCostin Stroie } 382*59036814SCostin Stroie if (!empty($examplesContent)) { 383*59036814SCostin Stroie $contextInfo .= "\n<style_examples>\nAcestea sunt rapoarte complete anterioare - studiază stilul meu de redactare:\n" . implode("\n", $examplesContent) . "\n</style_examples>\n"; 384*59036814SCostin Stroie } 385*59036814SCostin Stroie } 386*59036814SCostin Stroie 387*59036814SCostin Stroie // Add text snippets if specified in metadata 388*59036814SCostin Stroie if (!empty($metadata['snippets'])) { 389*59036814SCostin Stroie $snippetsContent = []; 390*59036814SCostin Stroie foreach ($metadata['snippets'] as $index => $snippet) { 391*59036814SCostin Stroie // These are text snippets from ChromaDB 392*59036814SCostin Stroie $snippetsContent[] = "\n<example id=\"" . ($index + 1) . "\">\n" . $snippet . "\n</example>\n"; 393*59036814SCostin Stroie } 394*59036814SCostin Stroie if (!empty($snippetsContent)) { 395*59036814SCostin Stroie $contextInfo .= "\n\n<style_examples>\nAcestea sunt exemple din rapoartele mele anterioare - studiază stilul de redactare, terminologia și structura frazelor:\n" . implode("\n", $snippetsContent) . "\n</style_examples>\n"; 396*59036814SCostin Stroie } 397*59036814SCostin Stroie } 398*59036814SCostin Stroie 399*59036814SCostin Stroie $contextInfo .= "\n</context>\n"; 400*59036814SCostin Stroie 401*59036814SCostin Stroie // Append context information to system prompt 402*59036814SCostin Stroie $prompt = $contextInfo . "\n\n" . $prompt; 403*59036814SCostin Stroie } 404*59036814SCostin Stroie 405*59036814SCostin Stroie // Check if tools should be used based on configuration 406*59036814SCostin Stroie global $conf; 407*59036814SCostin Stroie $useTools = $conf['plugin']['dokullm']['use_tools'] ?? false; 408*59036814SCostin Stroie 409*59036814SCostin Stroie // Prepare API request data with model parameters 410*59036814SCostin Stroie $data = [ 411*59036814SCostin Stroie 'model' => $this->model, 412*59036814SCostin Stroie 'messages' => [ 413*59036814SCostin Stroie ['role' => 'system', 'content' => $systemPrompt], 414*59036814SCostin Stroie ['role' => 'user', 'content' => $prompt] 415*59036814SCostin Stroie ], 416*59036814SCostin Stroie 'max_tokens' => 6144, 417*59036814SCostin Stroie 'stream' => false, 418*59036814SCostin Stroie 'keep_alive' => '30m', 419*59036814SCostin Stroie 'think' => true 420*59036814SCostin Stroie ]; 421*59036814SCostin Stroie 422*59036814SCostin Stroie // Add tools to the request only if useTools is true 423*59036814SCostin Stroie if ($useTools) { 424*59036814SCostin Stroie // Define available tools 425*59036814SCostin Stroie $data['tools'] = $this->getAvailableTools(); 426*59036814SCostin Stroie $data['tool_choice'] = 'auto'; 427*59036814SCostin Stroie $data['parallel_tool_calls'] = false; 428*59036814SCostin Stroie } 429*59036814SCostin Stroie 430*59036814SCostin Stroie // Only add parameters if they are defined and not null 431*59036814SCostin Stroie if ($this->temperature !== null) { 432*59036814SCostin Stroie $data['temperature'] = $this->temperature; 433*59036814SCostin Stroie } 434*59036814SCostin Stroie if ($this->top_p !== null) { 435*59036814SCostin Stroie $data['top_p'] = $this->top_p; 436*59036814SCostin Stroie } 437*59036814SCostin Stroie if ($this->top_k !== null) { 438*59036814SCostin Stroie $data['top_k'] = $this->top_k; 439*59036814SCostin Stroie } 440*59036814SCostin Stroie if ($this->min_p !== null) { 441*59036814SCostin Stroie $data['min_p'] = $this->min_p; 442*59036814SCostin Stroie } 443*59036814SCostin Stroie 444*59036814SCostin Stroie // Make an API call with tool responses 445*59036814SCostin Stroie return $this->callAPIWithTools($data, false); 446*59036814SCostin Stroie } 447*59036814SCostin Stroie 448*59036814SCostin Stroie /** 449*59036814SCostin Stroie * Handle tool calls from the LLM 450*59036814SCostin Stroie * 451*59036814SCostin Stroie * Processes tool calls made by the LLM and returns appropriate responses. 452*59036814SCostin Stroie * Implements caching to avoid duplicate calls with identical parameters. 453*59036814SCostin Stroie * 454*59036814SCostin Stroie * @param array $toolCall The tool call data from the LLM 455*59036814SCostin Stroie * @return array The tool response message 456*59036814SCostin Stroie */ 457*59036814SCostin Stroie private function handleToolCall($toolCall) 458*59036814SCostin Stroie { 459*59036814SCostin Stroie $toolName = $toolCall['function']['name']; 460*59036814SCostin Stroie $arguments = json_decode($toolCall['function']['arguments'], true); 461*59036814SCostin Stroie 462*59036814SCostin Stroie // Create a cache key from the tool name and arguments 463*59036814SCostin Stroie $cacheKey = md5($toolName . serialize($arguments)); 464*59036814SCostin Stroie 465*59036814SCostin Stroie // Check if we have a cached result for this tool call 466*59036814SCostin Stroie if (isset($this->toolCallCache[$cacheKey])) { 467*59036814SCostin Stroie // Return cached result and indicate it was found in cache 468*59036814SCostin Stroie $toolResponse = $this->toolCallCache[$cacheKey]; 469*59036814SCostin Stroie // Update with current tool call ID 470*59036814SCostin Stroie $toolResponse['tool_call_id'] = $toolCall['id']; 471*59036814SCostin Stroie $toolResponse['cached'] = true; // Indicate this response was cached 472*59036814SCostin Stroie return $toolResponse; 473*59036814SCostin Stroie } 474*59036814SCostin Stroie 475*59036814SCostin Stroie $toolResponse = [ 476*59036814SCostin Stroie 'role' => 'tool', 477*59036814SCostin Stroie 'tool_call_id' => $toolCall['id'], 478*59036814SCostin Stroie 'cached' => false // Indicate this is a fresh response 479*59036814SCostin Stroie ]; 480*59036814SCostin Stroie 481*59036814SCostin Stroie switch ($toolName) { 482*59036814SCostin Stroie case 'get_document': 483*59036814SCostin Stroie $documentId = $arguments['id']; 484*59036814SCostin Stroie $content = $this->getPageContent($documentId); 485*59036814SCostin Stroie if ($content === false) { 486*59036814SCostin Stroie $toolResponse['content'] = 'Document not found: ' . $documentId; 487*59036814SCostin Stroie } else { 488*59036814SCostin Stroie $toolResponse['content'] = $content; 489*59036814SCostin Stroie } 490*59036814SCostin Stroie break; 491*59036814SCostin Stroie 492*59036814SCostin Stroie case 'get_template': 493*59036814SCostin Stroie // Get template content using the convenience function 494*59036814SCostin Stroie $toolResponse['content'] = $this->getTemplateContent(); 495*59036814SCostin Stroie break; 496*59036814SCostin Stroie 497*59036814SCostin Stroie case 'get_examples': 498*59036814SCostin Stroie // Get examples content using the convenience function 499*59036814SCostin Stroie $count = isset($arguments['count']) ? (int)$arguments['count'] : 5; 500*59036814SCostin Stroie $toolResponse['content'] = '<examples>\n' . $this->getSnippets($count) . '\n</examples>'; 501*59036814SCostin Stroie break; 502*59036814SCostin Stroie 503*59036814SCostin Stroie default: 504*59036814SCostin Stroie $toolResponse['content'] = 'Unknown tool: ' . $toolName; 505*59036814SCostin Stroie } 506*59036814SCostin Stroie 507*59036814SCostin Stroie // Cache the result for future calls with the same parameters 508*59036814SCostin Stroie $cacheEntry = $toolResponse; 509*59036814SCostin Stroie // Remove tool_call_id and cached flag from cache as they change per call 510*59036814SCostin Stroie unset($cacheEntry['tool_call_id']); 511*59036814SCostin Stroie unset($cacheEntry['cached']); 512*59036814SCostin Stroie $this->toolCallCache[$cacheKey] = $cacheEntry; 513*59036814SCostin Stroie 514*59036814SCostin Stroie return $toolResponse; 515*59036814SCostin Stroie } 516*59036814SCostin Stroie 517*59036814SCostin Stroie /** 518*59036814SCostin Stroie * Make an API call with tool responses 519*59036814SCostin Stroie * 520*59036814SCostin Stroie * Sends a follow-up request to the LLM with tool responses. 521*59036814SCostin Stroie * Implements complex logic for handling tool calls with caching and loop protection. 522*59036814SCostin Stroie * 523*59036814SCostin Stroie * Complex logic includes: 524*59036814SCostin Stroie * 1. Making HTTP requests with proper authentication and error handling 525*59036814SCostin Stroie * 2. Processing tool calls from the LLM response 526*59036814SCostin Stroie * 3. Caching tool responses to avoid duplicate calls with identical parameters 527*59036814SCostin Stroie * 4. Tracking tool call counts to prevent infinite loops 528*59036814SCostin Stroie * 5. Implementing loop protection with call count limits 529*59036814SCostin Stroie * 6. Handling recursive tool calls until final content is generated 530*59036814SCostin Stroie * 531*59036814SCostin Stroie * Loop protection works by: 532*59036814SCostin Stroie * - Tracking individual tool call counts (max 3 per tool) 533*59036814SCostin Stroie * - Tracking total tool calls (max 10 total) 534*59036814SCostin Stroie * - Disabling tools when limits are exceeded to break potential loops 535*59036814SCostin Stroie * 536*59036814SCostin Stroie * @param array $data The API request data including messages with tool responses 537*59036814SCostin Stroie * @param bool $toolsCalled Whether tools have already been called (used for loop protection) 538*59036814SCostin Stroie * @param bool $useTools Whether to process tool calls (used for loop protection) 539*59036814SCostin Stroie * @return string The final response content 540*59036814SCostin Stroie */ 541*59036814SCostin Stroie private function callAPIWithTools($data, $toolsCalled = false, $useTools = true) 542*59036814SCostin Stroie { 543*59036814SCostin Stroie // Set up HTTP headers, including authentication if API key is configured 544*59036814SCostin Stroie $headers = [ 545*59036814SCostin Stroie 'Content-Type: application/json' 546*59036814SCostin Stroie ]; 547*59036814SCostin Stroie 548*59036814SCostin Stroie if (!empty($this->api_key)) { 549*59036814SCostin Stroie $headers[] = 'Authorization: Bearer ' . $this->api_key; 550*59036814SCostin Stroie } 551*59036814SCostin Stroie 552*59036814SCostin Stroie // If tools have already been called, remove tools and tool_choice from data to prevent infinite loops 553*59036814SCostin Stroie if ($toolsCalled) { 554*59036814SCostin Stroie unset($data['tools']); 555*59036814SCostin Stroie unset($data['tool_choice']); 556*59036814SCostin Stroie } 557*59036814SCostin Stroie 558*59036814SCostin Stroie // Initialize and configure cURL for the API request 559*59036814SCostin Stroie $ch = curl_init(); 560*59036814SCostin Stroie curl_setopt($ch, CURLOPT_URL, $this->api_url); 561*59036814SCostin Stroie curl_setopt($ch, CURLOPT_POST, true); 562*59036814SCostin Stroie curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data)); 563*59036814SCostin Stroie curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 564*59036814SCostin Stroie curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 565*59036814SCostin Stroie curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); 566*59036814SCostin Stroie curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); 567*59036814SCostin Stroie 568*59036814SCostin Stroie // Execute the API request 569*59036814SCostin Stroie $response = curl_exec($ch); 570*59036814SCostin Stroie $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); 571*59036814SCostin Stroie $error = curl_error($ch); 572*59036814SCostin Stroie curl_close($ch); 573*59036814SCostin Stroie 574*59036814SCostin Stroie // Handle cURL errors 575*59036814SCostin Stroie if ($error) { 576*59036814SCostin Stroie throw new Exception('API request failed: ' . $error); 577*59036814SCostin Stroie } 578*59036814SCostin Stroie 579*59036814SCostin Stroie // Handle HTTP errors 580*59036814SCostin Stroie if ($httpCode !== 200) { 581*59036814SCostin Stroie throw new Exception('API request failed with HTTP code: ' . $httpCode); 582*59036814SCostin Stroie } 583*59036814SCostin Stroie 584*59036814SCostin Stroie // Parse and validate the JSON response 585*59036814SCostin Stroie $result = json_decode($response, true); 586*59036814SCostin Stroie 587*59036814SCostin Stroie // Extract the content from the response if available 588*59036814SCostin Stroie if (isset($result['choices'][0]['message']['content'])) { 589*59036814SCostin Stroie $content = trim($result['choices'][0]['message']['content']); 590*59036814SCostin Stroie // Reset tool call counts when we get final content 591*59036814SCostin Stroie $this->toolCallCounts = []; 592*59036814SCostin Stroie return $content; 593*59036814SCostin Stroie } 594*59036814SCostin Stroie 595*59036814SCostin Stroie // Handle tool calls if present 596*59036814SCostin Stroie if ($useTools && isset($result['choices'][0]['message']['tool_calls'])) { 597*59036814SCostin Stroie $toolCalls = $result['choices'][0]['message']['tool_calls']; 598*59036814SCostin Stroie // Start with original messages 599*59036814SCostin Stroie $messages = $data['messages']; 600*59036814SCostin Stroie // Add assistant's message with tool calls, keeping all original fields except for content (which is null) 601*59036814SCostin Stroie $assistantMessage = []; 602*59036814SCostin Stroie foreach ($result['choices'][0]['message'] as $key => $value) { 603*59036814SCostin Stroie if ($key !== 'content') { 604*59036814SCostin Stroie $assistantMessage[$key] = $value; 605*59036814SCostin Stroie } 606*59036814SCostin Stroie } 607*59036814SCostin Stroie // Add assistant's message with tool calls 608*59036814SCostin Stroie $messages[] = $assistantMessage; 609*59036814SCostin Stroie 610*59036814SCostin Stroie // Process each tool call and track counts to prevent infinite loops 611*59036814SCostin Stroie foreach ($toolCalls as $toolCall) { 612*59036814SCostin Stroie $toolName = $toolCall['function']['name']; 613*59036814SCostin Stroie // Increment tool call count 614*59036814SCostin Stroie if (!isset($this->toolCallCounts[$toolName])) { 615*59036814SCostin Stroie $this->toolCallCounts[$toolName] = 0; 616*59036814SCostin Stroie } 617*59036814SCostin Stroie $this->toolCallCounts[$toolName]++; 618*59036814SCostin Stroie 619*59036814SCostin Stroie $toolResponse = $this->handleToolCall($toolCall); 620*59036814SCostin Stroie $messages[] = $toolResponse; 621*59036814SCostin Stroie } 622*59036814SCostin Stroie 623*59036814SCostin Stroie // Check if any tool has been called more than 3 times 624*59036814SCostin Stroie $toolsCalledCount = 0; 625*59036814SCostin Stroie foreach ($this->toolCallCounts as $count) { 626*59036814SCostin Stroie if ($count > 3) { 627*59036814SCostin Stroie // If any tool called more than 3 times, disable tools to break loop 628*59036814SCostin Stroie $toolsCalled = true; 629*59036814SCostin Stroie break; 630*59036814SCostin Stroie } 631*59036814SCostin Stroie $toolsCalledCount += $count; 632*59036814SCostin Stroie } 633*59036814SCostin Stroie 634*59036814SCostin Stroie // If total tool calls exceed 10, also disable tools 635*59036814SCostin Stroie if ($toolsCalledCount > 10) { 636*59036814SCostin Stroie $toolsCalled = true; 637*59036814SCostin Stroie } 638*59036814SCostin Stroie 639*59036814SCostin Stroie // Make another API call with tool responses 640*59036814SCostin Stroie $data['messages'] = $messages; 641*59036814SCostin Stroie return $this->callAPIWithTools($data, $toolsCalled, $useTools); 642*59036814SCostin Stroie } 643*59036814SCostin Stroie 644*59036814SCostin Stroie // Throw exception for unexpected response format 645*59036814SCostin Stroie throw new Exception('Unexpected API response format'); 646*59036814SCostin Stroie } 647*59036814SCostin Stroie 648*59036814SCostin Stroie /** 649*59036814SCostin Stroie * Load a prompt template from a DokuWiki page and replace placeholders 650*59036814SCostin Stroie * 651*59036814SCostin Stroie * Loads prompt templates from DokuWiki pages with IDs in the format 652*59036814SCostin Stroie * dokullm:prompts:LANGUAGE:PROMPT_NAME 653*59036814SCostin Stroie * 654*59036814SCostin Stroie * The method implements a language fallback mechanism: 655*59036814SCostin Stroie * 1. First tries to load the prompt in the configured language 656*59036814SCostin Stroie * 2. If not found, falls back to English prompts 657*59036814SCostin Stroie * 3. Throws an exception if neither is available 658*59036814SCostin Stroie * 659*59036814SCostin Stroie * After loading the prompt, it scans for placeholders and automatically 660*59036814SCostin Stroie * adds missing ones with appropriate values before replacing all placeholders. 661*59036814SCostin Stroie * 662*59036814SCostin Stroie * @param string $promptName The name of the prompt (e.g., 'create', 'rewrite') 663*59036814SCostin Stroie * @param array $variables Associative array of placeholder => value pairs 664*59036814SCostin Stroie * @return string The processed prompt with placeholders replaced 665*59036814SCostin Stroie * @throws Exception If the prompt page cannot be loaded in any language 666*59036814SCostin Stroie */ 667*59036814SCostin Stroie private function loadPrompt($promptName, $variables = []) 668*59036814SCostin Stroie { 669*59036814SCostin Stroie global $conf; 670*59036814SCostin Stroie $language = $conf['plugin']['dokullm']['language']; 671*59036814SCostin Stroie 672*59036814SCostin Stroie // Default to 'en' if language is 'default' or not set 673*59036814SCostin Stroie if ($language === 'default' || empty($language)) { 674*59036814SCostin Stroie $language = 'en'; 675*59036814SCostin Stroie } 676*59036814SCostin Stroie 677*59036814SCostin Stroie // Construct the page ID for the prompt in the configured language 678*59036814SCostin Stroie $promptPageId = 'dokullm:prompts:' . $language . ':' . $promptName; 679*59036814SCostin Stroie 680*59036814SCostin Stroie // Try to get the content of the prompt page in the configured language 681*59036814SCostin Stroie $prompt = $this->getPageContent($promptPageId); 682*59036814SCostin Stroie 683*59036814SCostin Stroie // If the language-specific prompt doesn't exist, try English as fallback 684*59036814SCostin Stroie if ($prompt === false && $language !== 'en') { 685*59036814SCostin Stroie $promptPageId = 'dokullm:prompts:en:' . $promptName; 686*59036814SCostin Stroie $prompt = $this->getPageContent($promptPageId); 687*59036814SCostin Stroie } 688*59036814SCostin Stroie 689*59036814SCostin Stroie // If still no prompt found, throw an exception 690*59036814SCostin Stroie if ($prompt === false) { 691*59036814SCostin Stroie throw new Exception('Prompt page not found: ' . $promptPageId); 692*59036814SCostin Stroie } 693*59036814SCostin Stroie 694*59036814SCostin Stroie // Find placeholders in the prompt 695*59036814SCostin Stroie $placeholders = $this->findPlaceholders($prompt); 696*59036814SCostin Stroie 697*59036814SCostin Stroie // Add missing placeholders with appropriate values 698*59036814SCostin Stroie foreach ($placeholders as $placeholder) { 699*59036814SCostin Stroie // Skip if already provided in variables 700*59036814SCostin Stroie if (isset($variables[$placeholder])) { 701*59036814SCostin Stroie continue; 702*59036814SCostin Stroie } 703*59036814SCostin Stroie 704*59036814SCostin Stroie // Add appropriate values for specific placeholders 705*59036814SCostin Stroie switch ($placeholder) { 706*59036814SCostin Stroie case 'template': 707*59036814SCostin Stroie // If we have a page_template in variables, use it 708*59036814SCostin Stroie $variables[$placeholder] = $this->getTemplateContent($variables['page_template']); 709*59036814SCostin Stroie break; 710*59036814SCostin Stroie 711*59036814SCostin Stroie case 'snippets': 712*59036814SCostin Stroie $variables[$placeholder] = $this->getSnippets(10); 713*59036814SCostin Stroie break; 714*59036814SCostin Stroie 715*59036814SCostin Stroie case 'examples': 716*59036814SCostin Stroie // If we have example page IDs in metadata, add examples content 717*59036814SCostin Stroie $variables[$placeholder] = $this->getExamplesContent($variables['page_examples']); 718*59036814SCostin Stroie break; 719*59036814SCostin Stroie 720*59036814SCostin Stroie case 'previous': 721*59036814SCostin Stroie // If we have a previous report page ID in metadata, add previous content 722*59036814SCostin Stroie $variables[$placeholder] = $this->getPreviousContent($variables['page_previous']); 723*59036814SCostin Stroie 724*59036814SCostin Stroie // Add current and previous dates to metadata 725*59036814SCostin Stroie $variables['current_date'] = $this->getPageDate(); 726*59036814SCostin Stroie $variables['previous_date'] = !empty($variables['page_previous']) ? 727*59036814SCostin Stroie $this->getPageDate($variables['page_previous']) : 728*59036814SCostin Stroie ''; 729*59036814SCostin Stroie break; 730*59036814SCostin Stroie 731*59036814SCostin Stroie default: 732*59036814SCostin Stroie // For other placeholders, leave them empty or set a default value 733*59036814SCostin Stroie $variables[$placeholder] = ''; 734*59036814SCostin Stroie break; 735*59036814SCostin Stroie } 736*59036814SCostin Stroie } 737*59036814SCostin Stroie 738*59036814SCostin Stroie // Replace placeholders with actual values 739*59036814SCostin Stroie // Placeholders are in the format {placeholder_name} 740*59036814SCostin Stroie foreach ($variables as $placeholder => $value) { 741*59036814SCostin Stroie $prompt = str_replace('{' . $placeholder . '}', $value, $prompt); 742*59036814SCostin Stroie } 743*59036814SCostin Stroie 744*59036814SCostin Stroie // Return the processed prompt 745*59036814SCostin Stroie return $prompt; 746*59036814SCostin Stroie } 747*59036814SCostin Stroie 748*59036814SCostin Stroie /** 749*59036814SCostin Stroie * Load system prompt with optional command-specific appendage 750*59036814SCostin Stroie * 751*59036814SCostin Stroie * Loads the main system prompt and appends any command-specific system prompt 752*59036814SCostin Stroie * if available. 753*59036814SCostin Stroie * 754*59036814SCostin Stroie * @param string $action The action/command name 755*59036814SCostin Stroie * @param array $variables Associative array of placeholder => value pairs 756*59036814SCostin Stroie * @return string The combined system prompt 757*59036814SCostin Stroie */ 758*59036814SCostin Stroie private function loadSystemPrompt($action, $variables = []) 759*59036814SCostin Stroie { 760*59036814SCostin Stroie // Load system prompt which provides general instructions to the LLM 761*59036814SCostin Stroie $systemPrompt = $this->loadPrompt('system', $variables); 762*59036814SCostin Stroie 763*59036814SCostin Stroie // Check if there's a command-specific system prompt appendage 764*59036814SCostin Stroie if (!empty($action)) { 765*59036814SCostin Stroie try { 766*59036814SCostin Stroie $commandSystemPrompt = $this->loadPrompt($action . ':system', $variables); 767*59036814SCostin Stroie if ($commandSystemPrompt !== false) { 768*59036814SCostin Stroie $systemPrompt .= "\n" . $commandSystemPrompt; 769*59036814SCostin Stroie } 770*59036814SCostin Stroie } catch (Exception $e) { 771*59036814SCostin Stroie // Ignore exceptions when loading command-specific system prompt 772*59036814SCostin Stroie // This allows the main system prompt to still be used 773*59036814SCostin Stroie } 774*59036814SCostin Stroie } 775*59036814SCostin Stroie 776*59036814SCostin Stroie return $systemPrompt; 777*59036814SCostin Stroie } 778*59036814SCostin Stroie 779*59036814SCostin Stroie /** 780*59036814SCostin Stroie * Get the content of a DokuWiki page 781*59036814SCostin Stroie * 782*59036814SCostin Stroie * Retrieves the raw content of a DokuWiki page by its ID. 783*59036814SCostin Stroie * Used for loading template and example page content for context. 784*59036814SCostin Stroie * 785*59036814SCostin Stroie * @param string $pageId The page ID to retrieve 786*59036814SCostin Stroie * @return string|false The page content or false if not found/readable 787*59036814SCostin Stroie */ 788*59036814SCostin Stroie public function getPageContent($pageId) 789*59036814SCostin Stroie { 790*59036814SCostin Stroie // Convert page ID to file path 791*59036814SCostin Stroie $pageFile = wikiFN($pageId); 792*59036814SCostin Stroie 793*59036814SCostin Stroie // Check if file exists and is readable 794*59036814SCostin Stroie if (file_exists($pageFile) && is_readable($pageFile)) { 795*59036814SCostin Stroie return file_get_contents($pageFile); 796*59036814SCostin Stroie } 797*59036814SCostin Stroie 798*59036814SCostin Stroie return false; 799*59036814SCostin Stroie } 800*59036814SCostin Stroie 801*59036814SCostin Stroie /** 802*59036814SCostin Stroie * Extract date from page ID or file timestamp 803*59036814SCostin Stroie * 804*59036814SCostin Stroie * Attempts to extract a date in YYmmdd format from the page ID. 805*59036814SCostin Stroie * If not found, uses the file's last modification timestamp. 806*59036814SCostin Stroie * 807*59036814SCostin Stroie * @param string $pageId Optional page ID to extract date from (defaults to current page) 808*59036814SCostin Stroie * @return string Formatted date string (YYYY-MM-DD) 809*59036814SCostin Stroie */ 810*59036814SCostin Stroie private function getPageDate($pageId = null) 811*59036814SCostin Stroie { 812*59036814SCostin Stroie global $ID; 813*59036814SCostin Stroie 814*59036814SCostin Stroie // Use provided page ID or current page ID 815*59036814SCostin Stroie $targetPageId = $pageId ?: $ID; 816*59036814SCostin Stroie 817*59036814SCostin Stroie // Try to extract date from page ID (looking for YYmmdd pattern) 818*59036814SCostin Stroie if (preg_match('/(\d{2})(\d{2})(\d{2})/', $targetPageId, $matches)) { 819*59036814SCostin Stroie // Convert YYmmdd to YYYY-MM-DD 820*59036814SCostin Stroie $year = $matches[1]; 821*59036814SCostin Stroie $month = $matches[2]; 822*59036814SCostin Stroie $day = $matches[3]; 823*59036814SCostin Stroie 824*59036814SCostin Stroie // Assume 20xx for years 00-69, 19xx for years 70-99 825*59036814SCostin Stroie $fullYear = intval($year) <= 69 ? '20' . $year : '19' . $year; 826*59036814SCostin Stroie 827*59036814SCostin Stroie return $fullYear . '-' . $month . '-' . $day; 828*59036814SCostin Stroie } 829*59036814SCostin Stroie 830*59036814SCostin Stroie // Fallback to file timestamp 831*59036814SCostin Stroie $pageFile = wikiFN($targetPageId); 832*59036814SCostin Stroie if (file_exists($pageFile)) { 833*59036814SCostin Stroie $timestamp = filemtime($pageFile); 834*59036814SCostin Stroie return date('Y-m-d', $timestamp); 835*59036814SCostin Stroie } 836*59036814SCostin Stroie 837*59036814SCostin Stroie // Return empty string if no date can be determined 838*59036814SCostin Stroie return ''; 839*59036814SCostin Stroie } 840*59036814SCostin Stroie 841*59036814SCostin Stroie /** 842*59036814SCostin Stroie * Get current text 843*59036814SCostin Stroie * 844*59036814SCostin Stroie * Retrieves the current text stored from the process function. 845*59036814SCostin Stroie * 846*59036814SCostin Stroie * @return string The current text 847*59036814SCostin Stroie */ 848*59036814SCostin Stroie private function getCurrentText() 849*59036814SCostin Stroie { 850*59036814SCostin Stroie return $this->currentText; 851*59036814SCostin Stroie } 852*59036814SCostin Stroie 853*59036814SCostin Stroie /** 854*59036814SCostin Stroie * Scan text for placeholders 855*59036814SCostin Stroie * 856*59036814SCostin Stroie * Finds all placeholders in the format {placeholder_name} in the provided text 857*59036814SCostin Stroie * and returns an array of unique placeholder names. 858*59036814SCostin Stroie * 859*59036814SCostin Stroie * @param string $text The text to scan for placeholders 860*59036814SCostin Stroie * @return array List of unique placeholder names found in the text 861*59036814SCostin Stroie */ 862*59036814SCostin Stroie public function findPlaceholders($text) 863*59036814SCostin Stroie { 864*59036814SCostin Stroie $placeholders = []; 865*59036814SCostin Stroie $pattern = '/\{([^}]+)\}/'; 866*59036814SCostin Stroie 867*59036814SCostin Stroie if (preg_match_all($pattern, $text, $matches)) { 868*59036814SCostin Stroie // Get unique placeholder names 869*59036814SCostin Stroie $placeholders = array_unique($matches[1]); 870*59036814SCostin Stroie } 871*59036814SCostin Stroie 872*59036814SCostin Stroie return $placeholders; 873*59036814SCostin Stroie } 874*59036814SCostin Stroie 875*59036814SCostin Stroie /** 876*59036814SCostin Stroie * Get template content for the current text 877*59036814SCostin Stroie * 878*59036814SCostin Stroie * Convenience function to retrieve template content. If a pageId is provided, 879*59036814SCostin Stroie * retrieves content directly from that page. Otherwise, queries ChromaDB for 880*59036814SCostin Stroie * a relevant template based on the current text. 881*59036814SCostin Stroie * 882*59036814SCostin Stroie * @param string|null $pageId Optional page ID to retrieve template from directly 883*59036814SCostin Stroie * @return string The template content or empty string if not found 884*59036814SCostin Stroie */ 885*59036814SCostin Stroie private function getTemplateContent($pageId = null) 886*59036814SCostin Stroie { 887*59036814SCostin Stroie // If pageId is provided, use it directly 888*59036814SCostin Stroie if ($pageId !== null) { 889*59036814SCostin Stroie $templateContent = $this->getPageContent($pageId); 890*59036814SCostin Stroie if ($templateContent !== false) { 891*59036814SCostin Stroie return $templateContent; 892*59036814SCostin Stroie } 893*59036814SCostin Stroie } 894*59036814SCostin Stroie 895*59036814SCostin Stroie // Otherwise, get template suggestion for the current text 896*59036814SCostin Stroie $pageId = $this->queryChromaDBTemplate($this->getCurrentText()); 897*59036814SCostin Stroie if (!empty($pageId)) { 898*59036814SCostin Stroie $templateContent = $this->getPageContent($pageId[0]); 899*59036814SCostin Stroie if ($templateContent !== false) { 900*59036814SCostin Stroie return $templateContent; 901*59036814SCostin Stroie } 902*59036814SCostin Stroie } 903*59036814SCostin Stroie return '( no template )'; 904*59036814SCostin Stroie } 905*59036814SCostin Stroie 906*59036814SCostin Stroie /** 907*59036814SCostin Stroie * Get snippets content for the current text 908*59036814SCostin Stroie * 909*59036814SCostin Stroie * Convenience function to retrieve relevant snippets for the current text. 910*59036814SCostin Stroie * Queries ChromaDB for relevant snippets and returns them formatted. 911*59036814SCostin Stroie * 912*59036814SCostin Stroie * @param int $count Number of snippets to retrieve (default: 10) 913*59036814SCostin Stroie * @return string Formatted snippets content or empty string if not found 914*59036814SCostin Stroie */ 915*59036814SCostin Stroie private function getSnippets($count = 10) 916*59036814SCostin Stroie { 917*59036814SCostin Stroie // Get example snippets for the current text 918*59036814SCostin Stroie $snippets = $this->queryChromaDBSnippets($this->getCurrentText(), $count); 919*59036814SCostin Stroie if (!empty($snippets)) { 920*59036814SCostin Stroie $formattedSnippets = []; 921*59036814SCostin Stroie foreach ($snippets as $index => $snippet) { 922*59036814SCostin Stroie $formattedSnippets[] = '<example id="' . ($index + 1) . '">\n' . $snippet . '\n</example>'; 923*59036814SCostin Stroie } 924*59036814SCostin Stroie return implode("\n", $formattedSnippets); 925*59036814SCostin Stroie } 926*59036814SCostin Stroie return '( no examples )'; 927*59036814SCostin Stroie } 928*59036814SCostin Stroie 929*59036814SCostin Stroie /** 930*59036814SCostin Stroie * Get examples content from example page IDs 931*59036814SCostin Stroie * 932*59036814SCostin Stroie * Convenience function to retrieve content from example pages. 933*59036814SCostin Stroie * Returns the content of each page packed in XML elements. 934*59036814SCostin Stroie * 935*59036814SCostin Stroie * @param array $exampleIds List of example page IDs 936*59036814SCostin Stroie * @return string Formatted examples content or empty string if not found 937*59036814SCostin Stroie */ 938*59036814SCostin Stroie private function getExamplesContent($exampleIds = []) 939*59036814SCostin Stroie { 940*59036814SCostin Stroie if (empty($exampleIds) || !is_array($exampleIds)) { 941*59036814SCostin Stroie return '( no examples )'; 942*59036814SCostin Stroie } 943*59036814SCostin Stroie 944*59036814SCostin Stroie $examplesContent = []; 945*59036814SCostin Stroie foreach ($exampleIds as $index => $exampleId) { 946*59036814SCostin Stroie $content = $this->getPageContent($exampleId); 947*59036814SCostin Stroie if ($content !== false) { 948*59036814SCostin Stroie $examplesContent[] = '<example_page source="' . $exampleId . '">\n' . $content . '\n</example_page>'; 949*59036814SCostin Stroie } 950*59036814SCostin Stroie } 951*59036814SCostin Stroie 952*59036814SCostin Stroie return implode("\n", $examplesContent); 953*59036814SCostin Stroie } 954*59036814SCostin Stroie 955*59036814SCostin Stroie /** 956*59036814SCostin Stroie * Get previous report content from previous page ID 957*59036814SCostin Stroie * 958*59036814SCostin Stroie * Convenience function to retrieve content from a previous report page. 959*59036814SCostin Stroie * Returns the content of the previous page or a default message if not found. 960*59036814SCostin Stroie * 961*59036814SCostin Stroie * @param string $previousId Previous page ID 962*59036814SCostin Stroie * @return string Previous report content or default message if not found 963*59036814SCostin Stroie */ 964*59036814SCostin Stroie private function getPreviousContent($previousId = '') 965*59036814SCostin Stroie { 966*59036814SCostin Stroie if (empty($previousId)) { 967*59036814SCostin Stroie return '( no previous report )'; 968*59036814SCostin Stroie } 969*59036814SCostin Stroie 970*59036814SCostin Stroie $content = $this->getPageContent($previousId); 971*59036814SCostin Stroie if ($content !== false) { 972*59036814SCostin Stroie return $content; 973*59036814SCostin Stroie } 974*59036814SCostin Stroie 975*59036814SCostin Stroie return '( previous report not found )'; 976*59036814SCostin Stroie } 977*59036814SCostin Stroie 978*59036814SCostin Stroie /** 979*59036814SCostin Stroie * Get ChromaDB client with configuration 980*59036814SCostin Stroie * 981*59036814SCostin Stroie * Creates and returns a ChromaDB client with the appropriate configuration. 982*59036814SCostin Stroie * Extracts modality from the current page ID to use as the collection name. 983*59036814SCostin Stroie * 984*59036814SCostin Stroie * @return array Array containing the ChromaDB client and collection name 985*59036814SCostin Stroie */ 986*59036814SCostin Stroie private function getChromaDBClient() 987*59036814SCostin Stroie { 988*59036814SCostin Stroie // Include config.php to get ChromaDB configuration 989*59036814SCostin Stroie require_once 'config.php'; 990*59036814SCostin Stroie 991*59036814SCostin Stroie // Get ChromaDB configuration from config.php 992*59036814SCostin Stroie $chromaHost = defined('CHROMA_HOST') ? CHROMA_HOST : 'localhost'; 993*59036814SCostin Stroie $chromaPort = defined('CHROMA_PORT') ? CHROMA_PORT : 8000; 994*59036814SCostin Stroie $chromaTenant = defined('CHROMA_TENANT') ? CHROMA_TENANT : 'dokullm'; 995*59036814SCostin Stroie $chromaDatabase = defined('CHROMA_DATABASE') ? CHROMA_DATABASE : 'dokullm'; 996*59036814SCostin Stroie $chromaDefaultCollection = defined('CHROMA_COLLECTION') ? CHROMA_COLLECTION : 'documents'; 997*59036814SCostin Stroie 998*59036814SCostin Stroie // Use the first part of the current page ID as collection name, fallback to default 999*59036814SCostin Stroie global $ID; 1000*59036814SCostin Stroie $chromaCollection = $chromaDefaultCollection; // Default collection name 1001*59036814SCostin Stroie 1002*59036814SCostin Stroie if (!empty($ID)) { 1003*59036814SCostin Stroie // Split the page ID by ':' and take the first part as collection name 1004*59036814SCostin Stroie $parts = explode(':', $ID); 1005*59036814SCostin Stroie if (isset($parts[0]) && !empty($parts[0])) { 1006*59036814SCostin Stroie // If the first part is 'playground', use the default collection 1007*59036814SCostin Stroie // Otherwise, use the first part as the collection name 1008*59036814SCostin Stroie if ($parts[0] === 'playground') { 1009*59036814SCostin Stroie $chromaCollection = $chromaDefaultCollection; 1010*59036814SCostin Stroie } else { 1011*59036814SCostin Stroie $chromaCollection = $parts[0]; 1012*59036814SCostin Stroie } 1013*59036814SCostin Stroie } 1014*59036814SCostin Stroie } 1015*59036814SCostin Stroie 1016*59036814SCostin Stroie // Create ChromaDB client 1017*59036814SCostin Stroie $chromaClient = new \dokuwiki\plugin\dokullm\ChromaDBClient($chromaHost, $chromaPort, $chromaTenant, $chromaDatabase); 1018*59036814SCostin Stroie 1019*59036814SCostin Stroie 1020*59036814SCostin Stroie return [$chromaClient, $chromaCollection]; 1021*59036814SCostin Stroie } 1022*59036814SCostin Stroie 1023*59036814SCostin Stroie /** 1024*59036814SCostin Stroie * Query ChromaDB for relevant documents 1025*59036814SCostin Stroie * 1026*59036814SCostin Stroie * Generates embeddings for the input text and queries ChromaDB for similar documents. 1027*59036814SCostin Stroie * Extracts modality from the current page ID to use as the collection name. 1028*59036814SCostin Stroie * 1029*59036814SCostin Stroie * @param string $text The text to find similar documents for 1030*59036814SCostin Stroie * @param int $limit Maximum number of documents to retrieve (default: 5) 1031*59036814SCostin Stroie * @param array|null $where Optional filter conditions for metadata 1032*59036814SCostin Stroie * @return array List of document IDs 1033*59036814SCostin Stroie */ 1034*59036814SCostin Stroie private function queryChromaDB($text, $limit = 5, $where = null) 1035*59036814SCostin Stroie { 1036*59036814SCostin Stroie try { 1037*59036814SCostin Stroie // Get ChromaDB client and collection name 1038*59036814SCostin Stroie list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1039*59036814SCostin Stroie // Query for similar documents 1040*59036814SCostin Stroie $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1041*59036814SCostin Stroie 1042*59036814SCostin Stroie // Extract document IDs from results 1043*59036814SCostin Stroie $documentIds = []; 1044*59036814SCostin Stroie if (isset($results['ids'][0]) && is_array($results['ids'][0])) { 1045*59036814SCostin Stroie foreach ($results['ids'][0] as $id) { 1046*59036814SCostin Stroie // Use the ChromaDB ID directly without conversion 1047*59036814SCostin Stroie $documentIds[] = $id; 1048*59036814SCostin Stroie } 1049*59036814SCostin Stroie } 1050*59036814SCostin Stroie 1051*59036814SCostin Stroie return $documentIds; 1052*59036814SCostin Stroie } catch (Exception $e) { 1053*59036814SCostin Stroie // Log error but don't fail the operation 1054*59036814SCostin Stroie error_log('ChromaDB query failed: ' . $e->getMessage()); 1055*59036814SCostin Stroie return []; 1056*59036814SCostin Stroie } 1057*59036814SCostin Stroie } 1058*59036814SCostin Stroie 1059*59036814SCostin Stroie /** 1060*59036814SCostin Stroie * Query ChromaDB for relevant documents and return text snippets 1061*59036814SCostin Stroie * 1062*59036814SCostin Stroie * Generates embeddings for the input text and queries ChromaDB for similar documents. 1063*59036814SCostin Stroie * Returns the actual text snippets instead of document IDs. 1064*59036814SCostin Stroie * 1065*59036814SCostin Stroie * @param string $text The text to find similar documents for 1066*59036814SCostin Stroie * @param int $limit Maximum number of documents to retrieve (default: 10) 1067*59036814SCostin Stroie * @param array|null $where Optional filter conditions for metadata 1068*59036814SCostin Stroie * @return array List of text snippets 1069*59036814SCostin Stroie */ 1070*59036814SCostin Stroie private function queryChromaDBSnippets($text, $limit = 10, $where = null) 1071*59036814SCostin Stroie { 1072*59036814SCostin Stroie try { 1073*59036814SCostin Stroie // Get ChromaDB client and collection name 1074*59036814SCostin Stroie list($chromaClient, $chromaCollection) = $this->getChromaDBClient(); 1075*59036814SCostin Stroie // Query for similar documents 1076*59036814SCostin Stroie $results = $chromaClient->queryCollection($chromaCollection, [$text], $limit, $where); 1077*59036814SCostin Stroie 1078*59036814SCostin Stroie // Extract document texts from results 1079*59036814SCostin Stroie $snippets = []; 1080*59036814SCostin Stroie if (isset($results['documents'][0]) && is_array($results['documents'][0])) { 1081*59036814SCostin Stroie foreach ($results['documents'][0] as $document) { 1082*59036814SCostin Stroie $snippets[] = $document; 1083*59036814SCostin Stroie } 1084*59036814SCostin Stroie } 1085*59036814SCostin Stroie 1086*59036814SCostin Stroie return $snippets; 1087*59036814SCostin Stroie } catch (Exception $e) { 1088*59036814SCostin Stroie // Log error but don't fail the operation 1089*59036814SCostin Stroie error_log('ChromaDB query failed: ' . $e->getMessage()); 1090*59036814SCostin Stroie return []; 1091*59036814SCostin Stroie } 1092*59036814SCostin Stroie } 1093*59036814SCostin Stroie 1094*59036814SCostin Stroie /** 1095*59036814SCostin Stroie * Query ChromaDB for a template document 1096*59036814SCostin Stroie * 1097*59036814SCostin Stroie * Generates embeddings for the input text and queries ChromaDB for a template document 1098*59036814SCostin Stroie * by filtering with metadata 'template=true'. 1099*59036814SCostin Stroie * 1100*59036814SCostin Stroie * @param string $text The text to find a template for 1101*59036814SCostin Stroie * @return array List of template document IDs (maximum 1) 1102*59036814SCostin Stroie */ 1103*59036814SCostin Stroie public function queryChromaDBTemplate($text) 1104*59036814SCostin Stroie { 1105*59036814SCostin Stroie $templateIds = $this->queryChromaDB($text, 1, ['type' => 'template']); 1106*59036814SCostin Stroie 1107*59036814SCostin Stroie // Remove chunk number (e.g., "@2") from the ID to get the base document ID 1108*59036814SCostin Stroie if (!empty($templateIds)) { 1109*59036814SCostin Stroie $templateIds[0] = preg_replace('/@\\d+$/', '', $templateIds[0]); 1110*59036814SCostin Stroie } 1111*59036814SCostin Stroie 1112*59036814SCostin Stroie return $templateIds; 1113*59036814SCostin Stroie } 1114*59036814SCostin Stroie 1115*59036814SCostin Stroie} 1116