1<?php
2
3namespace dokuwiki\plugin\aichat\Storage;
4
5use dokuwiki\HTTP\DokuHTTPClient;
6use dokuwiki\plugin\aichat\Chunk;
7
8/**
9 * Implements the storage backend using a Chroma DB in server mode
10 */
11class QdrantStorage extends AbstractStorage
12{
13    /** @var string URL to the qdrant server instance */
14    protected $baseurl;
15
16    /** @var DokuHTTPClient http client */
17    protected $http;
18
19    protected $collection = '';
20    protected $collectionName = '';
21
22
23    /** @inheritdoc */
24    public function __construct(array $config)
25    {
26
27        $this->baseurl = trim($config['qdrant_baseurl'] ?? '', '/');
28        $this->collectionName = $config['qdrant_collection'] ?? '';
29
30        $this->http = new DokuHTTPClient();
31        $this->http->headers['Content-Type'] = 'application/json';
32        $this->http->headers['Accept'] = 'application/json';
33        $this->http->keep_alive = false;
34        $this->http->timeout = 30;
35
36        if (!empty($config['qdrant_apikey'])) {
37            $this->http->headers['api-key'] = $config['qdrant_apikey'];
38        }
39    }
40
41    /**
42     * Execute a query against the Qdrant API
43     *
44     * @param string $endpoint API endpoint, will be added to the base URL
45     * @param mixed $data The data to send, will be JSON encoded
46     * @param string $method POST|GET|PUT etc
47     * @return mixed
48     * @throws \Exception
49     */
50    protected function runQuery($endpoint, mixed $data, $method = 'POST', $retry = 0)
51    {
52        $endpoint = trim($endpoint, '/');
53        $url = $this->baseurl . '/' . $endpoint . '?wait=true';
54
55        if ($data === []) {
56            $json = '{}';
57        } else {
58            $json = json_encode($data, JSON_THROW_ON_ERROR);
59        }
60
61        $this->http->sendRequest($url, $json, $method);
62        $response = $this->http->resp_body;
63
64        if (!$response) {
65            if($retry < 3) {
66                sleep(1 + $retry);
67                return $this->runQuery($endpoint, $data, $method, $retry + 1);
68            }
69
70            throw new \Exception(
71                'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status
72            );
73        }
74
75        try {
76            $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
77        } catch (\Exception $e) {
78            if($retry < 3) {
79                sleep(1 + $retry);
80                return $this->runQuery($endpoint, $data, $method, $retry + 1);
81            }
82
83            throw new \Exception('Qdrant API returned invalid JSON. ' . $response, 0, $e);
84        }
85
86        if ((int)$this->http->status !== 200) {
87            $error = $result['status']['error'] ?? $this->http->error;
88            throw new \Exception('Qdrant API returned error. ' . $error);
89        }
90
91        return $result['result'] ?? $result;
92    }
93
94    /**
95     * Get the name of the collection to use
96     *
97     * Initializes the collection if it doesn't exist yet
98     *
99     * @param int $createWithDimensions if > 0, the collection will be created with this many dimensions
100     * @return string
101     * @throws \Exception
102     */
103    public function getCollection($createWithDimensions = 0)
104    {
105        if ($this->collection) return $this->collection;
106
107        try {
108            $this->runQuery('/collections/' . $this->collectionName, '', 'GET');
109            $this->collection = $this->collectionName;
110            return $this->collection; // collection exists
111        } catch (\Exception $e) {
112            if (!$createWithDimensions) throw $e;
113        }
114
115        // still here? create the collection
116        $data = [
117            'vectors' => [
118                'size' => $createWithDimensions,
119                'distance' => 'Cosine',
120            ]
121        ];
122
123        // create the collection
124        $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT');
125        $this->collection = $this->collectionName;
126
127        return $this->collection;
128    }
129
130    /** @inheritdoc */
131    public function startCreation($clear = false)
132    {
133        if (!$clear) return;
134
135        // if a collection exists, delete it
136        try {
137            $collection = $this->getCollection();
138            $this->runQuery('/collections/' . $collection, '', 'DELETE');
139            $this->collection = '';
140        } catch (\Exception) {
141            // no such collection
142        }
143    }
144
145    /** @inheritdoc */
146    public function getChunk($chunkID)
147    {
148        try {
149            $data = $this->runQuery(
150                '/collections/' . $this->getCollection() . '/points/' . $chunkID,
151                '',
152                'GET'
153            );
154        } catch (\Exception) {
155            // no such point
156            return null;
157        }
158
159        return new Chunk(
160            $data['payload']['page'],
161            (int)$data['id'],
162            $data['payload']['text'],
163            $data['vector'],
164            $data['payload']['language'] ?? '',
165            (int)$data['payload']['created']
166        );
167    }
168
169
170    /** @inheritdoc */
171    public function reusePageChunks($page, $firstChunkID)
172    {
173        // no-op
174    }
175
176    /** @inheritdoc */
177    public function deletePageChunks($page, $firstChunkID)
178    {
179        try {
180            $collection = $this->getCollection();
181        } catch (\Exception) {
182            // no such collection
183            return;
184        }
185
186        // delete all possible chunk IDs
187        $ids = range($firstChunkID, $firstChunkID + 99, 1);
188
189        $this->runQuery(
190            '/collections/' . $collection . '/points/delete',
191            [
192                'points' => $ids
193            ],
194            'POST'
195        );
196    }
197
198    /** @inheritdoc */
199    public function addPageChunks($chunks)
200    {
201        $points = [];
202        foreach ($chunks as $chunk) {
203            $points[] = [
204                'id' => $chunk->getId(),
205                'vector' => $chunk->getEmbedding(),
206                'payload' => [
207                    'page' => $chunk->getPage(),
208                    'text' => $chunk->getText(),
209                    'created' => $chunk->getCreated(),
210                    'language' => $chunk->getLanguage()
211                ]
212            ];
213        }
214
215        $this->runQuery(
216            '/collections/' . $this->getCollection(count($chunk->getEmbedding())) . '/points',
217            [
218                'points' => $points
219            ],
220            'PUT'
221        );
222    }
223
224    /** @inheritdoc */
225    public function finalizeCreation()
226    {
227        // no-op
228    }
229
230    /** @inheritdoc */
231    public function runMaintenance()
232    {
233        // no-op
234    }
235
236    /** @inheritdoc */
237    public function getPageChunks($page, $firstChunkID)
238    {
239        $ids = range($firstChunkID, $firstChunkID + 99, 1);
240
241        $data = $this->runQuery(
242            '/collections/' . $this->getCollection() . '/points',
243            [
244                'ids' => $ids,
245                'with_payload' => true,
246                'with_vector' => true,
247            ],
248            'POST'
249        );
250
251        if (!$data) return [];
252
253        $chunks = [];
254        foreach ($data as $point) {
255            $chunks[] = new Chunk(
256                $point['payload']['page'],
257                (int)$point['id'],
258                $point['payload']['text'],
259                $point['vector'],
260                $point['payload']['language'] ?? '',
261                (int)$point['payload']['created']
262            );
263        }
264        return $chunks;
265    }
266
267    /** @inheritdoc */
268    public function getSimilarChunks($vector, $lang = '', $limit = 4)
269    {
270        $limit *= 2; // we can't check ACLs, so we return more than requested
271
272        if ($lang) {
273            $filter = [
274                'must' => [
275                    [
276                        'key' => 'language',
277                        'match' => [
278                            'value' => $lang
279                        ],
280                    ]
281                ]
282            ];
283        } else {
284            $filter = null;
285        }
286
287        $data = $this->runQuery(
288            '/collections/' . $this->getCollection() . '/points/search',
289            [
290                'vector' => $vector,
291                'limit' => (int)$limit,
292                'filter' => $filter,
293                'with_payload' => true,
294                'with_vector' => true,
295            ]
296        );
297
298        $chunks = [];
299        foreach ($data as $point) {
300            $chunks[] = new Chunk(
301                $point['payload']['page'],
302                (int)$point['id'],
303                $point['payload']['text'],
304                $point['vector'],
305                $point['payload']['language'] ?? '',
306                (int)$point['payload']['created'],
307                $point['score']
308            );
309        }
310        return $chunks;
311    }
312
313    /** @inheritdoc */
314    public function statistics()
315    {
316
317        $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET');
318        $telemetry = $this->runQuery('/telemetry', '', 'GET');
319
320        return [
321            'qdrant_version' => $telemetry['app']['version'],
322            'vector_config' => $info['config']['params']['vectors'],
323            'chunks' => $info['vectors_count'],
324            'segments' => $info['segments_count'],
325            'status' => $info['status'],
326        ];
327    }
328}
329