xref: /plugin/dokullm/conf/default.php (revision 0fdec17d4bba85e60fffb55d2c25a2af89940046)
1<?php
2/**
3 * Default settings for the dokullm plugin
4 *
5 * This file defines the default configuration values for the LLM integration plugin.
6 * These values can be overridden by the user in the plugin configuration.
7 */
8
9/**
10 * The API endpoint URL for the LLM service
11 *
12 * This should be the full URL to the chat completions endpoint of your LLM provider.
13 * The default is set to OpenAI's GPT API endpoint.
14 *
15 * @var string
16 */
17$conf['api_url'] = 'https://api.openai.com/v1/chat/completions';
18
19/**
20 * The API authentication key
21 *
22 * This is the secret key used to authenticate with the LLM service.
23 * For security, this should be left empty in the default config and set by the user.
24 *
25 * @var string
26 */
27$conf['api_key'] = '';
28
29/**
30 * The model identifier to use for text processing
31 *
32 * Specifies which LLM model to use for processing requests.
33 * The default is gpt-3.5-turbo, but can be changed to other models like gpt-4.
34 *
35 * @var string
36 */
37$conf['model'] = 'gpt-3.5-turbo';
38
39/**
40 * The request timeout in seconds
41 *
42 * Maximum time to wait for a response from the LLM API before timing out.
43 * Set to 30 seconds by default, which should be sufficient for most requests.
44 *
45 * @var int
46 */
47$conf['timeout'] = 30;
48
49/**
50 * The language for prompts
51 *
52 * Specifies which language to use for the prompts.
53 * 'default' uses English prompts, 'ro' uses Romanian prompts.
54 *
55 * @var string
56 */
57$conf['language'] = 'default';
58
59/**
60 * The temperature setting for the LLM
61 *
62 * Controls the randomness of the LLM output. Lower values (0.0-0.5) make the output
63 * more deterministic and focused, while higher values (0.5-1.0) make it more random
64 * and creative. Default is 0.3 for consistent, high-quality responses.
65 *
66 * @var float
67 */
68$conf['temperature'] = 0.3;
69
70/**
71 * The top-p (nucleus sampling) setting for the LLM
72 *
73 * Controls the cumulative probability of token selection. Lower values (0.1-0.5) make
74 * the output more focused, while higher values (0.5-1.0) allow for more diverse outputs.
75 * Default is 0.8 for a good balance between creativity and coherence.
76 *
77 * @var float
78 */
79$conf['top_p'] = 0.8;
80
81/**
82 * The top-k setting for the LLM
83 *
84 * Limits the number of highest probability tokens considered for each step.
85 * Lower values (1-10) make the output more focused, while higher values (10-50)
86 * allow for more diverse outputs. Default is 20 for balanced diversity.
87 *
88 * @var int
89 */
90$conf['top_k'] = 20;
91
92/**
93 * The min-p setting for the LLM
94 *
95 * Sets a minimum probability threshold for token selection. Tokens with probabilities
96 * below this threshold are filtered out. Default is 0.0 (no filtering).
97 *
98 * @var float
99 */
100$conf['min_p'] = 0.0;
101
102/**
103 * Show copy button in the toolbar
104 *
105 * Controls whether the copy page button is displayed in the LLM toolbar.
106 * When true, the copy button will be visible; when false, it will be hidden.
107 *
108 * @var bool
109 */
110$conf['show_copy_button'] = true;
111
112/**
113 * Replace ID in template content
114 *
115 * Controls whether the template page ID should be replaced with the new page ID
116 * when copying a page with a template. When true, the template ID will be replaced;
117 * when false, it will be left as is.
118 *
119 * @var bool
120 */
121$conf['replace_id'] = true;
122
123/**
124 * Enable thinking in LLM responses
125 *
126 * Controls whether the LLM should engage in deeper thinking processes before responding.
127 * When true, the LLM will use thinking capabilities and may take longer to respond;
128 * when false, it will provide direct responses without extended thinking.
129 *
130 * @var bool
131 */
132$conf['think'] = false;
133
134/**
135 * Enable tool usage in LLM responses
136 *
137 * Controls whether the LLM can use tools to enhance its responses.
138 * When true, the LLM can call tools like get_document, get_template, and get_examples;
139 * when false, these tools will not be available to the LLM.
140 *
141 * @var bool
142 */
143$conf['use_tools'] = false;
144
145/**
146 * ChromaDB Host
147 *
148 * The hostname or IP address of your ChromaDB server.
149 * This is used for document storage and retrieval.
150 *
151 * @var string
152 */
153$conf['chroma_host'] = '10.200.8.16';
154
155/**
156 * ChromaDB Port
157 *
158 * The port number on which ChromaDB is running.
159 * Default ChromaDB port is 8000, but can be customized.
160 *
161 * @var int
162 */
163$conf['chroma_port'] = 8087;
164
165/**
166 * ChromaDB Tenant
167 *
168 * The tenant name for ChromaDB organization.
169 * Used to isolate data between different organizations or projects.
170 *
171 * @var string
172 */
173$conf['chroma_tenant'] = 'dokullm';
174
175/**
176 * ChromaDB Database
177 *
178 * The database name within the ChromaDB tenant.
179 * Used to organize collections within a tenant.
180 *
181 * @var string
182 */
183$conf['chroma_database'] = 'dokullm';
184
185/**
186 * ChromaDB Collection
187 *
188 * The default collection name for document storage.
189 * Collections are used to group related documents.
190 *
191 * @var string
192 */
193$conf['chroma_collection'] = 'reports';
194
195/**
196 * Default Institution
197 *
198 * The default institution identifier for templates.
199 * Used to determine which templates to use by default.
200 *
201 * @var string
202 */
203$conf['default_institution'] = 'scuc';
204
205/**
206 * Ollama Host
207 *
208 * The hostname or IP address of your Ollama server.
209 * Used for generating embeddings for document search.
210 *
211 * @var string
212 */
213$conf['ollama_host'] = '10.200.8.16';
214
215/**
216 * Ollama Port
217 *
218 * The port number on which Ollama is running.
219 * Default Ollama port is 11434.
220 *
221 * @var int
222 */
223$conf['ollama_port'] = 11434;
224
225/**
226 * Ollama Embeddings Model
227 *
228 * The model name used for generating text embeddings.
229 * Embeddings are used for semantic search in ChromaDB.
230 *
231 * @var string
232 */
233$conf['ollama_embeddings_model'] = 'nomic-embed-text';
234
235