xref: /plugin/aichat/Storage/QdrantStorage.php (revision 4c0099a889ba3b789f9c81b3cd963aadb567ea68)
1*4c0099a8SAndreas Gohr<?php
2*4c0099a8SAndreas Gohr
3*4c0099a8SAndreas Gohrnamespace dokuwiki\plugin\aichat\Storage;
4*4c0099a8SAndreas Gohr
5*4c0099a8SAndreas Gohruse dokuwiki\HTTP\DokuHTTPClient;
6*4c0099a8SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
7*4c0099a8SAndreas Gohr
8*4c0099a8SAndreas Gohr/**
9*4c0099a8SAndreas Gohr * Implements the storage backend using a Chroma DB in server mode
10*4c0099a8SAndreas Gohr */
11*4c0099a8SAndreas Gohrclass QdrantStorage extends AbstractStorage
12*4c0099a8SAndreas Gohr{
13*4c0099a8SAndreas Gohr    /** @var string URL to the qdrant server instance */
14*4c0099a8SAndreas Gohr    protected $baseurl;
15*4c0099a8SAndreas Gohr
16*4c0099a8SAndreas Gohr    /** @var DokuHTTPClient http client */
17*4c0099a8SAndreas Gohr    protected $http;
18*4c0099a8SAndreas Gohr
19*4c0099a8SAndreas Gohr    protected $collection = '';
20*4c0099a8SAndreas Gohr    protected $collectionName = '';
21*4c0099a8SAndreas Gohr
22*4c0099a8SAndreas Gohr
23*4c0099a8SAndreas Gohr    /**
24*4c0099a8SAndreas Gohr     * QdrantStorage constructor.
25*4c0099a8SAndreas Gohr     */
26*4c0099a8SAndreas Gohr    public function __construct()
27*4c0099a8SAndreas Gohr    {
28*4c0099a8SAndreas Gohr        $helper = plugin_load('helper', 'aichat');
29*4c0099a8SAndreas Gohr
30*4c0099a8SAndreas Gohr        $this->baseurl = $helper->getConf('qdrant_baseurl');
31*4c0099a8SAndreas Gohr        $this->collectionName = $helper->getConf('qdrant_collection');
32*4c0099a8SAndreas Gohr
33*4c0099a8SAndreas Gohr        $this->http = new DokuHTTPClient();
34*4c0099a8SAndreas Gohr        $this->http->headers['Content-Type'] = 'application/json';
35*4c0099a8SAndreas Gohr        $this->http->headers['Accept'] = 'application/json';
36*4c0099a8SAndreas Gohr        $this->http->keep_alive = false;
37*4c0099a8SAndreas Gohr        $this->http->timeout = 30;
38*4c0099a8SAndreas Gohr
39*4c0099a8SAndreas Gohr        if ($helper->getConf('qdrant_apikey')) {
40*4c0099a8SAndreas Gohr            $this->http->headers['api-key'] = $helper->getConf('qdrant_apikey');
41*4c0099a8SAndreas Gohr        }
42*4c0099a8SAndreas Gohr    }
43*4c0099a8SAndreas Gohr
44*4c0099a8SAndreas Gohr    /**
45*4c0099a8SAndreas Gohr     * Execute a query against the Qdrant API
46*4c0099a8SAndreas Gohr     *
47*4c0099a8SAndreas Gohr     * @param string $endpoint API endpoint, will be added to the base URL
48*4c0099a8SAndreas Gohr     * @param mixed $data The data to send, will be JSON encoded
49*4c0099a8SAndreas Gohr     * @param string $method POST|GET|PUT etc
50*4c0099a8SAndreas Gohr     * @return mixed
51*4c0099a8SAndreas Gohr     * @throws \Exception
52*4c0099a8SAndreas Gohr     */
53*4c0099a8SAndreas Gohr    protected function runQuery($endpoint, mixed $data, $method = 'POST')
54*4c0099a8SAndreas Gohr    {
55*4c0099a8SAndreas Gohr        $endpoint = trim($endpoint, '/');
56*4c0099a8SAndreas Gohr        $url = $this->baseurl . '/' . $endpoint . '?wait=true';
57*4c0099a8SAndreas Gohr
58*4c0099a8SAndreas Gohr        if ($data === []) {
59*4c0099a8SAndreas Gohr            $json = '{}';
60*4c0099a8SAndreas Gohr        } else {
61*4c0099a8SAndreas Gohr            $json = json_encode($data, JSON_THROW_ON_ERROR);
62*4c0099a8SAndreas Gohr        }
63*4c0099a8SAndreas Gohr
64*4c0099a8SAndreas Gohr        $this->http->sendRequest($url, $json, $method);
65*4c0099a8SAndreas Gohr        $response = $this->http->resp_body;
66*4c0099a8SAndreas Gohr
67*4c0099a8SAndreas Gohr        if (!$response) {
68*4c0099a8SAndreas Gohr            throw new \Exception(
69*4c0099a8SAndreas Gohr                'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status
70*4c0099a8SAndreas Gohr            );
71*4c0099a8SAndreas Gohr        }
72*4c0099a8SAndreas Gohr
73*4c0099a8SAndreas Gohr        try {
74*4c0099a8SAndreas Gohr            $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
75*4c0099a8SAndreas Gohr        } catch (\Exception) {
76*4c0099a8SAndreas Gohr            throw new \Exception('Qdrant API returned invalid JSON. ' . $response);
77*4c0099a8SAndreas Gohr        }
78*4c0099a8SAndreas Gohr
79*4c0099a8SAndreas Gohr        if ((int)$this->http->status !== 200) {
80*4c0099a8SAndreas Gohr            $error = $result['status']['error'] ?? $this->http->error;
81*4c0099a8SAndreas Gohr            throw new \Exception('Qdrant API returned error. ' . $error);
82*4c0099a8SAndreas Gohr        }
83*4c0099a8SAndreas Gohr
84*4c0099a8SAndreas Gohr        return $result['result'] ?? $result;
85*4c0099a8SAndreas Gohr    }
86*4c0099a8SAndreas Gohr
87*4c0099a8SAndreas Gohr    /**
88*4c0099a8SAndreas Gohr     * Get the name of the collection to use
89*4c0099a8SAndreas Gohr     *
90*4c0099a8SAndreas Gohr     * Initializes the collection if it doesn't exist yet
91*4c0099a8SAndreas Gohr     *
92*4c0099a8SAndreas Gohr     * @return string
93*4c0099a8SAndreas Gohr     * @throws \Exception
94*4c0099a8SAndreas Gohr     */
95*4c0099a8SAndreas Gohr    public function getCollection()
96*4c0099a8SAndreas Gohr    {
97*4c0099a8SAndreas Gohr        if ($this->collection) return $this->collection;
98*4c0099a8SAndreas Gohr
99*4c0099a8SAndreas Gohr        try {
100*4c0099a8SAndreas Gohr            $this->runQuery('/collections/' . $this->collectionName, '', 'GET');
101*4c0099a8SAndreas Gohr            $this->collection = $this->collectionName;
102*4c0099a8SAndreas Gohr            return $this->collection; // collection exists
103*4c0099a8SAndreas Gohr        } catch (\Exception) {
104*4c0099a8SAndreas Gohr            // collection seems not to exist
105*4c0099a8SAndreas Gohr        }
106*4c0099a8SAndreas Gohr
107*4c0099a8SAndreas Gohr        $data = [
108*4c0099a8SAndreas Gohr            'vectors' => [
109*4c0099a8SAndreas Gohr                'size' => 1536, // FIXME should not be hardcoded
110*4c0099a8SAndreas Gohr                'distance' => 'Cosine',
111*4c0099a8SAndreas Gohr            ]
112*4c0099a8SAndreas Gohr        ];
113*4c0099a8SAndreas Gohr
114*4c0099a8SAndreas Gohr        // create the collection
115*4c0099a8SAndreas Gohr        $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT');
116*4c0099a8SAndreas Gohr        $this->collection = $this->collectionName;
117*4c0099a8SAndreas Gohr
118*4c0099a8SAndreas Gohr        return $this->collection;
119*4c0099a8SAndreas Gohr    }
120*4c0099a8SAndreas Gohr
121*4c0099a8SAndreas Gohr    /** @inheritdoc */
122*4c0099a8SAndreas Gohr    public function startCreation($clear = false)
123*4c0099a8SAndreas Gohr    {
124*4c0099a8SAndreas Gohr        if (!$clear) return;
125*4c0099a8SAndreas Gohr
126*4c0099a8SAndreas Gohr        // if a collection exists, delete it
127*4c0099a8SAndreas Gohr        $collection = $this->getCollection();
128*4c0099a8SAndreas Gohr        if ($collection) {
129*4c0099a8SAndreas Gohr            $this->runQuery('/collections/' . $collection, '', 'DELETE');
130*4c0099a8SAndreas Gohr            $this->collection = '';
131*4c0099a8SAndreas Gohr        }
132*4c0099a8SAndreas Gohr    }
133*4c0099a8SAndreas Gohr
134*4c0099a8SAndreas Gohr    /** @inheritdoc */
135*4c0099a8SAndreas Gohr    public function getChunk($chunkID)
136*4c0099a8SAndreas Gohr    {
137*4c0099a8SAndreas Gohr        try {
138*4c0099a8SAndreas Gohr            $data = $this->runQuery(
139*4c0099a8SAndreas Gohr                '/collections/' . $this->getCollection() . '/points/' . $chunkID,
140*4c0099a8SAndreas Gohr                '',
141*4c0099a8SAndreas Gohr                'GET'
142*4c0099a8SAndreas Gohr            );
143*4c0099a8SAndreas Gohr        } catch (\Exception) {
144*4c0099a8SAndreas Gohr            // no such point
145*4c0099a8SAndreas Gohr            return null;
146*4c0099a8SAndreas Gohr        }
147*4c0099a8SAndreas Gohr
148*4c0099a8SAndreas Gohr        return new Chunk(
149*4c0099a8SAndreas Gohr            $data['payload']['page'],
150*4c0099a8SAndreas Gohr            (int)$data['id'],
151*4c0099a8SAndreas Gohr            $data['payload']['text'],
152*4c0099a8SAndreas Gohr            $data['vector'],
153*4c0099a8SAndreas Gohr            $data['payload']['language'] ?? '',
154*4c0099a8SAndreas Gohr            (int)$data['payload']['created']
155*4c0099a8SAndreas Gohr        );
156*4c0099a8SAndreas Gohr    }
157*4c0099a8SAndreas Gohr
158*4c0099a8SAndreas Gohr
159*4c0099a8SAndreas Gohr    /** @inheritdoc */
160*4c0099a8SAndreas Gohr    public function reusePageChunks($page, $firstChunkID)
161*4c0099a8SAndreas Gohr    {
162*4c0099a8SAndreas Gohr        // no-op
163*4c0099a8SAndreas Gohr    }
164*4c0099a8SAndreas Gohr
165*4c0099a8SAndreas Gohr    /** @inheritdoc */
166*4c0099a8SAndreas Gohr    public function deletePageChunks($page, $firstChunkID)
167*4c0099a8SAndreas Gohr    {
168*4c0099a8SAndreas Gohr        // delete all possible chunk IDs
169*4c0099a8SAndreas Gohr        $ids = range($firstChunkID, $firstChunkID + 99, 1);
170*4c0099a8SAndreas Gohr
171*4c0099a8SAndreas Gohr        $this->runQuery(
172*4c0099a8SAndreas Gohr            '/collections/' . $this->getCollection() . '/points/delete',
173*4c0099a8SAndreas Gohr            [
174*4c0099a8SAndreas Gohr                'points' => $ids
175*4c0099a8SAndreas Gohr            ],
176*4c0099a8SAndreas Gohr            'POST'
177*4c0099a8SAndreas Gohr        );
178*4c0099a8SAndreas Gohr    }
179*4c0099a8SAndreas Gohr
180*4c0099a8SAndreas Gohr    /** @inheritdoc */
181*4c0099a8SAndreas Gohr    public function addPageChunks($chunks)
182*4c0099a8SAndreas Gohr    {
183*4c0099a8SAndreas Gohr        $points = [];
184*4c0099a8SAndreas Gohr        foreach ($chunks as $chunk) {
185*4c0099a8SAndreas Gohr            $points[] = [
186*4c0099a8SAndreas Gohr                'id' => $chunk->getId(),
187*4c0099a8SAndreas Gohr                'vector' => $chunk->getEmbedding(),
188*4c0099a8SAndreas Gohr                'payload' => [
189*4c0099a8SAndreas Gohr                    'page' => $chunk->getPage(),
190*4c0099a8SAndreas Gohr                    'text' => $chunk->getText(),
191*4c0099a8SAndreas Gohr                    'created' => $chunk->getCreated(),
192*4c0099a8SAndreas Gohr                    'language' => $chunk->getLanguage()
193*4c0099a8SAndreas Gohr                ]
194*4c0099a8SAndreas Gohr            ];
195*4c0099a8SAndreas Gohr        }
196*4c0099a8SAndreas Gohr
197*4c0099a8SAndreas Gohr        $this->runQuery(
198*4c0099a8SAndreas Gohr            '/collections/' . $this->getCollection() . '/points',
199*4c0099a8SAndreas Gohr            [
200*4c0099a8SAndreas Gohr                'points' => $points
201*4c0099a8SAndreas Gohr            ],
202*4c0099a8SAndreas Gohr            'PUT'
203*4c0099a8SAndreas Gohr        );
204*4c0099a8SAndreas Gohr    }
205*4c0099a8SAndreas Gohr
206*4c0099a8SAndreas Gohr    /** @inheritdoc */
207*4c0099a8SAndreas Gohr    public function finalizeCreation()
208*4c0099a8SAndreas Gohr    {
209*4c0099a8SAndreas Gohr        // no-op
210*4c0099a8SAndreas Gohr    }
211*4c0099a8SAndreas Gohr
212*4c0099a8SAndreas Gohr    /** @inheritdoc */
213*4c0099a8SAndreas Gohr    public function runMaintenance()
214*4c0099a8SAndreas Gohr    {
215*4c0099a8SAndreas Gohr        // no-op
216*4c0099a8SAndreas Gohr    }
217*4c0099a8SAndreas Gohr
218*4c0099a8SAndreas Gohr    /** @inheritdoc */
219*4c0099a8SAndreas Gohr    public function getPageChunks($page, $firstChunkID)
220*4c0099a8SAndreas Gohr    {
221*4c0099a8SAndreas Gohr        $ids = range($firstChunkID, $firstChunkID + 99, 1);
222*4c0099a8SAndreas Gohr
223*4c0099a8SAndreas Gohr        $data = $this->runQuery(
224*4c0099a8SAndreas Gohr            '/collections/' . $this->getCollection() . '/points',
225*4c0099a8SAndreas Gohr            [
226*4c0099a8SAndreas Gohr                'ids' => $ids,
227*4c0099a8SAndreas Gohr                'with_payload' => true,
228*4c0099a8SAndreas Gohr                'with_vector' => true,
229*4c0099a8SAndreas Gohr            ],
230*4c0099a8SAndreas Gohr            'POST'
231*4c0099a8SAndreas Gohr        );
232*4c0099a8SAndreas Gohr
233*4c0099a8SAndreas Gohr        if (!$data) return [];
234*4c0099a8SAndreas Gohr
235*4c0099a8SAndreas Gohr        $chunks = [];
236*4c0099a8SAndreas Gohr        foreach ($data as $point) {
237*4c0099a8SAndreas Gohr            $chunks[] = new Chunk(
238*4c0099a8SAndreas Gohr                $point['payload']['page'],
239*4c0099a8SAndreas Gohr                (int)$point['id'],
240*4c0099a8SAndreas Gohr                $point['payload']['text'],
241*4c0099a8SAndreas Gohr                $point['vector'],
242*4c0099a8SAndreas Gohr                $point['payload']['language'] ?? '',
243*4c0099a8SAndreas Gohr                (int)$point['payload']['created']
244*4c0099a8SAndreas Gohr            );
245*4c0099a8SAndreas Gohr        }
246*4c0099a8SAndreas Gohr        return $chunks;
247*4c0099a8SAndreas Gohr    }
248*4c0099a8SAndreas Gohr
249*4c0099a8SAndreas Gohr    /** @inheritdoc */
250*4c0099a8SAndreas Gohr    public function getSimilarChunks($vector, $lang = '', $limit = 4)
251*4c0099a8SAndreas Gohr    {
252*4c0099a8SAndreas Gohr        $limit *= 2; // we can't check ACLs, so we return more than requested
253*4c0099a8SAndreas Gohr
254*4c0099a8SAndreas Gohr        if ($lang) {
255*4c0099a8SAndreas Gohr            $filter = [
256*4c0099a8SAndreas Gohr                'must' => [
257*4c0099a8SAndreas Gohr                    [
258*4c0099a8SAndreas Gohr                        'key' => 'language',
259*4c0099a8SAndreas Gohr                        'match' => [
260*4c0099a8SAndreas Gohr                            'value' => $lang
261*4c0099a8SAndreas Gohr                        ],
262*4c0099a8SAndreas Gohr                    ]
263*4c0099a8SAndreas Gohr                ]
264*4c0099a8SAndreas Gohr            ];
265*4c0099a8SAndreas Gohr        } else {
266*4c0099a8SAndreas Gohr            $filter = null;
267*4c0099a8SAndreas Gohr        }
268*4c0099a8SAndreas Gohr
269*4c0099a8SAndreas Gohr        $data = $this->runQuery(
270*4c0099a8SAndreas Gohr            '/collections/' . $this->getCollection() . '/points/search',
271*4c0099a8SAndreas Gohr            [
272*4c0099a8SAndreas Gohr                'vector' => $vector,
273*4c0099a8SAndreas Gohr                'limit' => (int)$limit,
274*4c0099a8SAndreas Gohr                'filter' => $filter,
275*4c0099a8SAndreas Gohr                'with_payload' => true,
276*4c0099a8SAndreas Gohr                'with_vector' => true,
277*4c0099a8SAndreas Gohr            ]
278*4c0099a8SAndreas Gohr        );
279*4c0099a8SAndreas Gohr
280*4c0099a8SAndreas Gohr        $chunks = [];
281*4c0099a8SAndreas Gohr        foreach ($data as $point) {
282*4c0099a8SAndreas Gohr            $chunks[] = new Chunk(
283*4c0099a8SAndreas Gohr                $point['payload']['page'],
284*4c0099a8SAndreas Gohr                (int)$point['id'],
285*4c0099a8SAndreas Gohr                $point['payload']['text'],
286*4c0099a8SAndreas Gohr                $point['vector'],
287*4c0099a8SAndreas Gohr                $point['payload']['language'] ?? '',
288*4c0099a8SAndreas Gohr                (int)$point['payload']['created'],
289*4c0099a8SAndreas Gohr                $point['score']
290*4c0099a8SAndreas Gohr            );
291*4c0099a8SAndreas Gohr        }
292*4c0099a8SAndreas Gohr        return $chunks;
293*4c0099a8SAndreas Gohr    }
294*4c0099a8SAndreas Gohr
295*4c0099a8SAndreas Gohr    /** @inheritdoc */
296*4c0099a8SAndreas Gohr    public function statistics()
297*4c0099a8SAndreas Gohr    {
298*4c0099a8SAndreas Gohr
299*4c0099a8SAndreas Gohr        $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET');
300*4c0099a8SAndreas Gohr        $telemetry = $this->runQuery('/telemetry', '', 'GET');
301*4c0099a8SAndreas Gohr
302*4c0099a8SAndreas Gohr        return [
303*4c0099a8SAndreas Gohr            'qdrant_version' => $telemetry['app']['version'],
304*4c0099a8SAndreas Gohr            'vector_config' => $info['config']['params']['vectors'],
305*4c0099a8SAndreas Gohr            'chunks' => $info['vectors_count'],
306*4c0099a8SAndreas Gohr            'segments' => $info['segments_count'],
307*4c0099a8SAndreas Gohr            'status' => $info['status'],
308*4c0099a8SAndreas Gohr        ];
309*4c0099a8SAndreas Gohr    }
310*4c0099a8SAndreas Gohr}
311