xref: /plugin/aichat/Storage/QdrantStorage.php (revision 4c0099a889ba3b789f9c81b3cd963aadb567ea68)
1<?php
2
3namespace dokuwiki\plugin\aichat\Storage;
4
5use dokuwiki\HTTP\DokuHTTPClient;
6use dokuwiki\plugin\aichat\Chunk;
7
8/**
9 * Implements the storage backend using a Chroma DB in server mode
10 */
11class QdrantStorage extends AbstractStorage
12{
13    /** @var string URL to the qdrant server instance */
14    protected $baseurl;
15
16    /** @var DokuHTTPClient http client */
17    protected $http;
18
19    protected $collection = '';
20    protected $collectionName = '';
21
22
23    /**
24     * QdrantStorage constructor.
25     */
26    public function __construct()
27    {
28        $helper = plugin_load('helper', 'aichat');
29
30        $this->baseurl = $helper->getConf('qdrant_baseurl');
31        $this->collectionName = $helper->getConf('qdrant_collection');
32
33        $this->http = new DokuHTTPClient();
34        $this->http->headers['Content-Type'] = 'application/json';
35        $this->http->headers['Accept'] = 'application/json';
36        $this->http->keep_alive = false;
37        $this->http->timeout = 30;
38
39        if ($helper->getConf('qdrant_apikey')) {
40            $this->http->headers['api-key'] = $helper->getConf('qdrant_apikey');
41        }
42    }
43
44    /**
45     * Execute a query against the Qdrant API
46     *
47     * @param string $endpoint API endpoint, will be added to the base URL
48     * @param mixed $data The data to send, will be JSON encoded
49     * @param string $method POST|GET|PUT etc
50     * @return mixed
51     * @throws \Exception
52     */
53    protected function runQuery($endpoint, mixed $data, $method = 'POST')
54    {
55        $endpoint = trim($endpoint, '/');
56        $url = $this->baseurl . '/' . $endpoint . '?wait=true';
57
58        if ($data === []) {
59            $json = '{}';
60        } else {
61            $json = json_encode($data, JSON_THROW_ON_ERROR);
62        }
63
64        $this->http->sendRequest($url, $json, $method);
65        $response = $this->http->resp_body;
66
67        if (!$response) {
68            throw new \Exception(
69                'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status
70            );
71        }
72
73        try {
74            $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
75        } catch (\Exception) {
76            throw new \Exception('Qdrant API returned invalid JSON. ' . $response);
77        }
78
79        if ((int)$this->http->status !== 200) {
80            $error = $result['status']['error'] ?? $this->http->error;
81            throw new \Exception('Qdrant API returned error. ' . $error);
82        }
83
84        return $result['result'] ?? $result;
85    }
86
87    /**
88     * Get the name of the collection to use
89     *
90     * Initializes the collection if it doesn't exist yet
91     *
92     * @return string
93     * @throws \Exception
94     */
95    public function getCollection()
96    {
97        if ($this->collection) return $this->collection;
98
99        try {
100            $this->runQuery('/collections/' . $this->collectionName, '', 'GET');
101            $this->collection = $this->collectionName;
102            return $this->collection; // collection exists
103        } catch (\Exception) {
104            // collection seems not to exist
105        }
106
107        $data = [
108            'vectors' => [
109                'size' => 1536, // FIXME should not be hardcoded
110                'distance' => 'Cosine',
111            ]
112        ];
113
114        // create the collection
115        $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT');
116        $this->collection = $this->collectionName;
117
118        return $this->collection;
119    }
120
121    /** @inheritdoc */
122    public function startCreation($clear = false)
123    {
124        if (!$clear) return;
125
126        // if a collection exists, delete it
127        $collection = $this->getCollection();
128        if ($collection) {
129            $this->runQuery('/collections/' . $collection, '', 'DELETE');
130            $this->collection = '';
131        }
132    }
133
134    /** @inheritdoc */
135    public function getChunk($chunkID)
136    {
137        try {
138            $data = $this->runQuery(
139                '/collections/' . $this->getCollection() . '/points/' . $chunkID,
140                '',
141                'GET'
142            );
143        } catch (\Exception) {
144            // no such point
145            return null;
146        }
147
148        return new Chunk(
149            $data['payload']['page'],
150            (int)$data['id'],
151            $data['payload']['text'],
152            $data['vector'],
153            $data['payload']['language'] ?? '',
154            (int)$data['payload']['created']
155        );
156    }
157
158
159    /** @inheritdoc */
160    public function reusePageChunks($page, $firstChunkID)
161    {
162        // no-op
163    }
164
165    /** @inheritdoc */
166    public function deletePageChunks($page, $firstChunkID)
167    {
168        // delete all possible chunk IDs
169        $ids = range($firstChunkID, $firstChunkID + 99, 1);
170
171        $this->runQuery(
172            '/collections/' . $this->getCollection() . '/points/delete',
173            [
174                'points' => $ids
175            ],
176            'POST'
177        );
178    }
179
180    /** @inheritdoc */
181    public function addPageChunks($chunks)
182    {
183        $points = [];
184        foreach ($chunks as $chunk) {
185            $points[] = [
186                'id' => $chunk->getId(),
187                'vector' => $chunk->getEmbedding(),
188                'payload' => [
189                    'page' => $chunk->getPage(),
190                    'text' => $chunk->getText(),
191                    'created' => $chunk->getCreated(),
192                    'language' => $chunk->getLanguage()
193                ]
194            ];
195        }
196
197        $this->runQuery(
198            '/collections/' . $this->getCollection() . '/points',
199            [
200                'points' => $points
201            ],
202            'PUT'
203        );
204    }
205
206    /** @inheritdoc */
207    public function finalizeCreation()
208    {
209        // no-op
210    }
211
212    /** @inheritdoc */
213    public function runMaintenance()
214    {
215        // no-op
216    }
217
218    /** @inheritdoc */
219    public function getPageChunks($page, $firstChunkID)
220    {
221        $ids = range($firstChunkID, $firstChunkID + 99, 1);
222
223        $data = $this->runQuery(
224            '/collections/' . $this->getCollection() . '/points',
225            [
226                'ids' => $ids,
227                'with_payload' => true,
228                'with_vector' => true,
229            ],
230            'POST'
231        );
232
233        if (!$data) return [];
234
235        $chunks = [];
236        foreach ($data as $point) {
237            $chunks[] = new Chunk(
238                $point['payload']['page'],
239                (int)$point['id'],
240                $point['payload']['text'],
241                $point['vector'],
242                $point['payload']['language'] ?? '',
243                (int)$point['payload']['created']
244            );
245        }
246        return $chunks;
247    }
248
249    /** @inheritdoc */
250    public function getSimilarChunks($vector, $lang = '', $limit = 4)
251    {
252        $limit *= 2; // we can't check ACLs, so we return more than requested
253
254        if ($lang) {
255            $filter = [
256                'must' => [
257                    [
258                        'key' => 'language',
259                        'match' => [
260                            'value' => $lang
261                        ],
262                    ]
263                ]
264            ];
265        } else {
266            $filter = null;
267        }
268
269        $data = $this->runQuery(
270            '/collections/' . $this->getCollection() . '/points/search',
271            [
272                'vector' => $vector,
273                'limit' => (int)$limit,
274                'filter' => $filter,
275                'with_payload' => true,
276                'with_vector' => true,
277            ]
278        );
279
280        $chunks = [];
281        foreach ($data as $point) {
282            $chunks[] = new Chunk(
283                $point['payload']['page'],
284                (int)$point['id'],
285                $point['payload']['text'],
286                $point['vector'],
287                $point['payload']['language'] ?? '',
288                (int)$point['payload']['created'],
289                $point['score']
290            );
291        }
292        return $chunks;
293    }
294
295    /** @inheritdoc */
296    public function statistics()
297    {
298
299        $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET');
300        $telemetry = $this->runQuery('/telemetry', '', 'GET');
301
302        return [
303            'qdrant_version' => $telemetry['app']['version'],
304            'vector_config' => $info['config']['params']['vectors'],
305            'chunks' => $info['vectors_count'],
306            'segments' => $info['segments_count'],
307            'status' => $info['status'],
308        ];
309    }
310}
311