xref: /plugin/aichat/Storage/QdrantStorage.php (revision 4a647d20a89c87bc2746312604c5608ee49b0923)
1<?php
2
3namespace dokuwiki\plugin\aichat\Storage;
4
5use dokuwiki\HTTP\DokuHTTPClient;
6use dokuwiki\plugin\aichat\Chunk;
7
8/**
9 * Implements the storage backend using a Chroma DB in server mode
10 */
11class QdrantStorage extends AbstractStorage
12{
13    /** @var string URL to the qdrant server instance */
14    protected $baseurl;
15
16    /** @var DokuHTTPClient http client */
17    protected $http;
18
19    protected $collection = '';
20    protected $collectionName = '';
21
22
23    /** @inheritdoc */
24    public function __construct(array $config)
25    {
26
27        $this->baseurl = trim($config['qdrant_baseurl'] ?? '', '/');
28        $this->collectionName = $config['qdrant_collection'] ?? '';
29
30        $this->http = new DokuHTTPClient();
31        $this->http->headers['Content-Type'] = 'application/json';
32        $this->http->headers['Accept'] = 'application/json';
33        $this->http->keep_alive = false;
34        $this->http->timeout = 30;
35
36        if (!empty($config['qdrant_apikey'])) {
37            $this->http->headers['api-key'] = $config['qdrant_apikey'];
38        }
39    }
40
41    /**
42     * Execute a query against the Qdrant API
43     *
44     * @param string $endpoint API endpoint, will be added to the base URL
45     * @param mixed $data The data to send, will be JSON encoded
46     * @param string $method POST|GET|PUT etc
47     * @return mixed
48     * @throws \Exception
49     */
50    protected function runQuery($endpoint, mixed $data, $method = 'POST')
51    {
52        $endpoint = trim($endpoint, '/');
53        $url = $this->baseurl . '/' . $endpoint . '?wait=true';
54
55        if ($data === []) {
56            $json = '{}';
57        } else {
58            $json = json_encode($data, JSON_THROW_ON_ERROR);
59        }
60
61        $this->http->sendRequest($url, $json, $method);
62        $response = $this->http->resp_body;
63
64        if (!$response) {
65            throw new \Exception(
66                'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status
67            );
68        }
69
70        try {
71            $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
72        } catch (\Exception $e) {
73            throw new \Exception('Qdrant API returned invalid JSON. ' . $response, 0, $e);
74        }
75
76        if ((int)$this->http->status !== 200) {
77            $error = $result['status']['error'] ?? $this->http->error;
78            throw new \Exception('Qdrant API returned error. ' . $error);
79        }
80
81        return $result['result'] ?? $result;
82    }
83
84    /**
85     * Get the name of the collection to use
86     *
87     * Initializes the collection if it doesn't exist yet
88     *
89     * @param int $createWithDimensions if > 0, the collection will be created with this many dimensions
90     * @return string
91     * @throws \Exception
92     */
93    public function getCollection($createWithDimensions = 0)
94    {
95        if ($this->collection) return $this->collection;
96
97        try {
98            $this->runQuery('/collections/' . $this->collectionName, '', 'GET');
99            $this->collection = $this->collectionName;
100            return $this->collection; // collection exists
101        } catch (\Exception $e) {
102            if (!$createWithDimensions) throw $e;
103        }
104
105        // still here? create the collection
106        $data = [
107            'vectors' => [
108                'size' => $createWithDimensions,
109                'distance' => 'Cosine',
110            ]
111        ];
112
113        // create the collection
114        $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT');
115        $this->collection = $this->collectionName;
116
117        return $this->collection;
118    }
119
120    /** @inheritdoc */
121    public function startCreation($clear = false)
122    {
123        if (!$clear) return;
124
125        // if a collection exists, delete it
126        try {
127            $collection = $this->getCollection();
128            $this->runQuery('/collections/' . $collection, '', 'DELETE');
129            $this->collection = '';
130        } catch (\Exception) {
131            // no such collection
132        }
133    }
134
135    /** @inheritdoc */
136    public function getChunk($chunkID)
137    {
138        try {
139            $data = $this->runQuery(
140                '/collections/' . $this->getCollection() . '/points/' . $chunkID,
141                '',
142                'GET'
143            );
144        } catch (\Exception) {
145            // no such point
146            return null;
147        }
148
149        return new Chunk(
150            $data['payload']['page'],
151            (int)$data['id'],
152            $data['payload']['text'],
153            $data['vector'],
154            $data['payload']['language'] ?? '',
155            (int)$data['payload']['created']
156        );
157    }
158
159
160    /** @inheritdoc */
161    public function reusePageChunks($page, $firstChunkID)
162    {
163        // no-op
164    }
165
166    /** @inheritdoc */
167    public function deletePageChunks($page, $firstChunkID)
168    {
169        try {
170            $collection = $this->getCollection();
171        } catch (\Exception) {
172            // no such collection
173            return;
174        }
175
176        // delete all possible chunk IDs
177        $ids = range($firstChunkID, $firstChunkID + 99, 1);
178
179        $this->runQuery(
180            '/collections/' . $collection . '/points/delete',
181            [
182                'points' => $ids
183            ],
184            'POST'
185        );
186    }
187
188    /** @inheritdoc */
189    public function addPageChunks($chunks)
190    {
191        $points = [];
192        foreach ($chunks as $chunk) {
193            $points[] = [
194                'id' => $chunk->getId(),
195                'vector' => $chunk->getEmbedding(),
196                'payload' => [
197                    'page' => $chunk->getPage(),
198                    'text' => $chunk->getText(),
199                    'created' => $chunk->getCreated(),
200                    'language' => $chunk->getLanguage()
201                ]
202            ];
203        }
204
205        $this->runQuery(
206            '/collections/' . $this->getCollection(count($chunk->getEmbedding())) . '/points',
207            [
208                'points' => $points
209            ],
210            'PUT'
211        );
212    }
213
214    /** @inheritdoc */
215    public function finalizeCreation()
216    {
217        // no-op
218    }
219
220    /** @inheritdoc */
221    public function runMaintenance()
222    {
223        // no-op
224    }
225
226    /** @inheritdoc */
227    public function getPageChunks($page, $firstChunkID)
228    {
229        $ids = range($firstChunkID, $firstChunkID + 99, 1);
230
231        $data = $this->runQuery(
232            '/collections/' . $this->getCollection() . '/points',
233            [
234                'ids' => $ids,
235                'with_payload' => true,
236                'with_vector' => true,
237            ],
238            'POST'
239        );
240
241        if (!$data) return [];
242
243        $chunks = [];
244        foreach ($data as $point) {
245            $chunks[] = new Chunk(
246                $point['payload']['page'],
247                (int)$point['id'],
248                $point['payload']['text'],
249                $point['vector'],
250                $point['payload']['language'] ?? '',
251                (int)$point['payload']['created']
252            );
253        }
254        return $chunks;
255    }
256
257    /** @inheritdoc */
258    public function getSimilarChunks($vector, $lang = '', $limit = 4)
259    {
260        $limit *= 2; // we can't check ACLs, so we return more than requested
261
262        if ($lang) {
263            $filter = [
264                'must' => [
265                    [
266                        'key' => 'language',
267                        'match' => [
268                            'value' => $lang
269                        ],
270                    ]
271                ]
272            ];
273        } else {
274            $filter = null;
275        }
276
277        $data = $this->runQuery(
278            '/collections/' . $this->getCollection() . '/points/search',
279            [
280                'vector' => $vector,
281                'limit' => (int)$limit,
282                'filter' => $filter,
283                'with_payload' => true,
284                'with_vector' => true,
285            ]
286        );
287
288        $chunks = [];
289        foreach ($data as $point) {
290            $chunks[] = new Chunk(
291                $point['payload']['page'],
292                (int)$point['id'],
293                $point['payload']['text'],
294                $point['vector'],
295                $point['payload']['language'] ?? '',
296                (int)$point['payload']['created'],
297                $point['score']
298            );
299        }
300        return $chunks;
301    }
302
303    /** @inheritdoc */
304    public function statistics()
305    {
306
307        $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET');
308        $telemetry = $this->runQuery('/telemetry', '', 'GET');
309
310        return [
311            'qdrant_version' => $telemetry['app']['version'],
312            'vector_config' => $info['config']['params']['vectors'],
313            'chunks' => $info['vectors_count'],
314            'segments' => $info['segments_count'],
315            'status' => $info['status'],
316        ];
317    }
318}
319