xref: /plugin/aichat/Storage/QdrantStorage.php (revision 04afb84f6cb8a0c9b1d4d807e18f90fe739ec371)
1<?php
2
3namespace dokuwiki\plugin\aichat\Storage;
4
5use dokuwiki\HTTP\DokuHTTPClient;
6use dokuwiki\plugin\aichat\Chunk;
7
8/**
9 * Implements the storage backend using a Chroma DB in server mode
10 */
11class QdrantStorage extends AbstractStorage
12{
13    /** @var string URL to the qdrant server instance */
14    protected $baseurl;
15
16    /** @var DokuHTTPClient http client */
17    protected $http;
18
19    protected $collection = '';
20    protected $collectionName = '';
21
22
23    /** @inheritdoc */
24    public function __construct(array $config)
25    {
26
27        $this->baseurl = $config['qdrant_baseurl'] ?? '';
28        $this->collectionName = $config['qdrant_collection'] ?? '';
29
30        $this->http = new DokuHTTPClient();
31        $this->http->headers['Content-Type'] = 'application/json';
32        $this->http->headers['Accept'] = 'application/json';
33        $this->http->keep_alive = false;
34        $this->http->timeout = 30;
35
36        if (!empty($config['qdrant_apikey']) {
37            $this->http->headers['api-key'] = $config['qdrant_apikey'];
38        }
39    }
40
41    /**
42     * Execute a query against the Qdrant API
43     *
44     * @param string $endpoint API endpoint, will be added to the base URL
45     * @param mixed $data The data to send, will be JSON encoded
46     * @param string $method POST|GET|PUT etc
47     * @return mixed
48     * @throws \Exception
49     */
50    protected function runQuery($endpoint, mixed $data, $method = 'POST')
51    {
52        $endpoint = trim($endpoint, '/');
53        $url = $this->baseurl . '/' . $endpoint . '?wait=true';
54
55        if ($data === []) {
56            $json = '{}';
57        } else {
58            $json = json_encode($data, JSON_THROW_ON_ERROR);
59        }
60
61        $this->http->sendRequest($url, $json, $method);
62        $response = $this->http->resp_body;
63
64        if (!$response) {
65            throw new \Exception(
66                'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status
67            );
68        }
69
70        try {
71            $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
72        } catch (\Exception) {
73            throw new \Exception('Qdrant API returned invalid JSON. ' . $response);
74        }
75
76        if ((int)$this->http->status !== 200) {
77            $error = $result['status']['error'] ?? $this->http->error;
78            throw new \Exception('Qdrant API returned error. ' . $error);
79        }
80
81        return $result['result'] ?? $result;
82    }
83
84    /**
85     * Get the name of the collection to use
86     *
87     * Initializes the collection if it doesn't exist yet
88     *
89     * @return string
90     * @throws \Exception
91     */
92    public function getCollection()
93    {
94        if ($this->collection) return $this->collection;
95
96        try {
97            $this->runQuery('/collections/' . $this->collectionName, '', 'GET');
98            $this->collection = $this->collectionName;
99            return $this->collection; // collection exists
100        } catch (\Exception) {
101            // collection seems not to exist
102        }
103
104        $data = [
105            'vectors' => [
106                'size' => 1536, // FIXME should not be hardcoded
107                'distance' => 'Cosine',
108            ]
109        ];
110
111        // create the collection
112        $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT');
113        $this->collection = $this->collectionName;
114
115        return $this->collection;
116    }
117
118    /** @inheritdoc */
119    public function startCreation($clear = false)
120    {
121        if (!$clear) return;
122
123        // if a collection exists, delete it
124        $collection = $this->getCollection();
125        if ($collection) {
126            $this->runQuery('/collections/' . $collection, '', 'DELETE');
127            $this->collection = '';
128        }
129    }
130
131    /** @inheritdoc */
132    public function getChunk($chunkID)
133    {
134        try {
135            $data = $this->runQuery(
136                '/collections/' . $this->getCollection() . '/points/' . $chunkID,
137                '',
138                'GET'
139            );
140        } catch (\Exception) {
141            // no such point
142            return null;
143        }
144
145        return new Chunk(
146            $data['payload']['page'],
147            (int)$data['id'],
148            $data['payload']['text'],
149            $data['vector'],
150            $data['payload']['language'] ?? '',
151            (int)$data['payload']['created']
152        );
153    }
154
155
156    /** @inheritdoc */
157    public function reusePageChunks($page, $firstChunkID)
158    {
159        // no-op
160    }
161
162    /** @inheritdoc */
163    public function deletePageChunks($page, $firstChunkID)
164    {
165        // delete all possible chunk IDs
166        $ids = range($firstChunkID, $firstChunkID + 99, 1);
167
168        $this->runQuery(
169            '/collections/' . $this->getCollection() . '/points/delete',
170            [
171                'points' => $ids
172            ],
173            'POST'
174        );
175    }
176
177    /** @inheritdoc */
178    public function addPageChunks($chunks)
179    {
180        $points = [];
181        foreach ($chunks as $chunk) {
182            $points[] = [
183                'id' => $chunk->getId(),
184                'vector' => $chunk->getEmbedding(),
185                'payload' => [
186                    'page' => $chunk->getPage(),
187                    'text' => $chunk->getText(),
188                    'created' => $chunk->getCreated(),
189                    'language' => $chunk->getLanguage()
190                ]
191            ];
192        }
193
194        $this->runQuery(
195            '/collections/' . $this->getCollection() . '/points',
196            [
197                'points' => $points
198            ],
199            'PUT'
200        );
201    }
202
203    /** @inheritdoc */
204    public function finalizeCreation()
205    {
206        // no-op
207    }
208
209    /** @inheritdoc */
210    public function runMaintenance()
211    {
212        // no-op
213    }
214
215    /** @inheritdoc */
216    public function getPageChunks($page, $firstChunkID)
217    {
218        $ids = range($firstChunkID, $firstChunkID + 99, 1);
219
220        $data = $this->runQuery(
221            '/collections/' . $this->getCollection() . '/points',
222            [
223                'ids' => $ids,
224                'with_payload' => true,
225                'with_vector' => true,
226            ],
227            'POST'
228        );
229
230        if (!$data) return [];
231
232        $chunks = [];
233        foreach ($data as $point) {
234            $chunks[] = new Chunk(
235                $point['payload']['page'],
236                (int)$point['id'],
237                $point['payload']['text'],
238                $point['vector'],
239                $point['payload']['language'] ?? '',
240                (int)$point['payload']['created']
241            );
242        }
243        return $chunks;
244    }
245
246    /** @inheritdoc */
247    public function getSimilarChunks($vector, $lang = '', $limit = 4)
248    {
249        $limit *= 2; // we can't check ACLs, so we return more than requested
250
251        if ($lang) {
252            $filter = [
253                'must' => [
254                    [
255                        'key' => 'language',
256                        'match' => [
257                            'value' => $lang
258                        ],
259                    ]
260                ]
261            ];
262        } else {
263            $filter = null;
264        }
265
266        $data = $this->runQuery(
267            '/collections/' . $this->getCollection() . '/points/search',
268            [
269                'vector' => $vector,
270                'limit' => (int)$limit,
271                'filter' => $filter,
272                'with_payload' => true,
273                'with_vector' => true,
274            ]
275        );
276
277        $chunks = [];
278        foreach ($data as $point) {
279            $chunks[] = new Chunk(
280                $point['payload']['page'],
281                (int)$point['id'],
282                $point['payload']['text'],
283                $point['vector'],
284                $point['payload']['language'] ?? '',
285                (int)$point['payload']['created'],
286                $point['score']
287            );
288        }
289        return $chunks;
290    }
291
292    /** @inheritdoc */
293    public function statistics()
294    {
295
296        $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET');
297        $telemetry = $this->runQuery('/telemetry', '', 'GET');
298
299        return [
300            'qdrant_version' => $telemetry['app']['version'],
301            'vector_config' => $info['config']['params']['vectors'],
302            'chunks' => $info['vectors_count'],
303            'segments' => $info['segments_count'],
304            'status' => $info['status'],
305        ];
306    }
307}
308