xref: /plugin/aichat/Storage/QdrantStorage.php (revision 8502e301c39a7a0971370cb9448b84e60a1c8a86)
14c0099a8SAndreas Gohr<?php
24c0099a8SAndreas Gohr
34c0099a8SAndreas Gohrnamespace dokuwiki\plugin\aichat\Storage;
44c0099a8SAndreas Gohr
54c0099a8SAndreas Gohruse dokuwiki\HTTP\DokuHTTPClient;
64c0099a8SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
74c0099a8SAndreas Gohr
84c0099a8SAndreas Gohr/**
94c0099a8SAndreas Gohr * Implements the storage backend using a Chroma DB in server mode
104c0099a8SAndreas Gohr */
114c0099a8SAndreas Gohrclass QdrantStorage extends AbstractStorage
124c0099a8SAndreas Gohr{
134c0099a8SAndreas Gohr    /** @var string URL to the qdrant server instance */
144c0099a8SAndreas Gohr    protected $baseurl;
154c0099a8SAndreas Gohr
164c0099a8SAndreas Gohr    /** @var DokuHTTPClient http client */
174c0099a8SAndreas Gohr    protected $http;
184c0099a8SAndreas Gohr
194c0099a8SAndreas Gohr    protected $collection = '';
204c0099a8SAndreas Gohr    protected $collectionName = '';
214c0099a8SAndreas Gohr
224c0099a8SAndreas Gohr
2304afb84fSAndreas Gohr    /** @inheritdoc */
2404afb84fSAndreas Gohr    public function __construct(array $config)
254c0099a8SAndreas Gohr    {
264c0099a8SAndreas Gohr
27ecb0a423SAndreas Gohr        $this->baseurl = trim($config['qdrant_baseurl'] ?? '', '/');
2804afb84fSAndreas Gohr        $this->collectionName = $config['qdrant_collection'] ?? '';
294c0099a8SAndreas Gohr
304c0099a8SAndreas Gohr        $this->http = new DokuHTTPClient();
314c0099a8SAndreas Gohr        $this->http->headers['Content-Type'] = 'application/json';
324c0099a8SAndreas Gohr        $this->http->headers['Accept'] = 'application/json';
334c0099a8SAndreas Gohr        $this->http->keep_alive = false;
344c0099a8SAndreas Gohr        $this->http->timeout = 30;
354c0099a8SAndreas Gohr
3632d37907SAndreas Gohr        if (!empty($config['qdrant_apikey'])) {
3704afb84fSAndreas Gohr            $this->http->headers['api-key'] = $config['qdrant_apikey'];
384c0099a8SAndreas Gohr        }
394c0099a8SAndreas Gohr    }
404c0099a8SAndreas Gohr
414c0099a8SAndreas Gohr    /**
424c0099a8SAndreas Gohr     * Execute a query against the Qdrant API
434c0099a8SAndreas Gohr     *
444c0099a8SAndreas Gohr     * @param string $endpoint API endpoint, will be added to the base URL
454c0099a8SAndreas Gohr     * @param mixed $data The data to send, will be JSON encoded
464c0099a8SAndreas Gohr     * @param string $method POST|GET|PUT etc
474c0099a8SAndreas Gohr     * @return mixed
484c0099a8SAndreas Gohr     * @throws \Exception
494c0099a8SAndreas Gohr     */
50*8502e301SAndreas Gohr    protected function runQuery($endpoint, mixed $data, $method = 'POST', $retry = 0)
514c0099a8SAndreas Gohr    {
524c0099a8SAndreas Gohr        $endpoint = trim($endpoint, '/');
534c0099a8SAndreas Gohr        $url = $this->baseurl . '/' . $endpoint . '?wait=true';
544c0099a8SAndreas Gohr
554c0099a8SAndreas Gohr        if ($data === []) {
564c0099a8SAndreas Gohr            $json = '{}';
574c0099a8SAndreas Gohr        } else {
584c0099a8SAndreas Gohr            $json = json_encode($data, JSON_THROW_ON_ERROR);
594c0099a8SAndreas Gohr        }
604c0099a8SAndreas Gohr
614c0099a8SAndreas Gohr        $this->http->sendRequest($url, $json, $method);
624c0099a8SAndreas Gohr        $response = $this->http->resp_body;
634c0099a8SAndreas Gohr
644c0099a8SAndreas Gohr        if (!$response) {
65*8502e301SAndreas Gohr            if($retry < 3) {
66*8502e301SAndreas Gohr                sleep(1 + $retry);
67*8502e301SAndreas Gohr                return $this->runQuery($endpoint, $data, $method, $retry + 1);
68*8502e301SAndreas Gohr            }
69*8502e301SAndreas Gohr
704c0099a8SAndreas Gohr            throw new \Exception(
714c0099a8SAndreas Gohr                'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status
724c0099a8SAndreas Gohr            );
734c0099a8SAndreas Gohr        }
744c0099a8SAndreas Gohr
754c0099a8SAndreas Gohr        try {
764c0099a8SAndreas Gohr            $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
774a647d20SAndreas Gohr        } catch (\Exception $e) {
78*8502e301SAndreas Gohr            if($retry < 3) {
79*8502e301SAndreas Gohr                sleep(1 + $retry);
80*8502e301SAndreas Gohr                return $this->runQuery($endpoint, $data, $method, $retry + 1);
81*8502e301SAndreas Gohr            }
82*8502e301SAndreas Gohr
834a647d20SAndreas Gohr            throw new \Exception('Qdrant API returned invalid JSON. ' . $response, 0, $e);
844c0099a8SAndreas Gohr        }
854c0099a8SAndreas Gohr
864c0099a8SAndreas Gohr        if ((int)$this->http->status !== 200) {
874c0099a8SAndreas Gohr            $error = $result['status']['error'] ?? $this->http->error;
884c0099a8SAndreas Gohr            throw new \Exception('Qdrant API returned error. ' . $error);
894c0099a8SAndreas Gohr        }
904c0099a8SAndreas Gohr
914c0099a8SAndreas Gohr        return $result['result'] ?? $result;
924c0099a8SAndreas Gohr    }
934c0099a8SAndreas Gohr
944c0099a8SAndreas Gohr    /**
954c0099a8SAndreas Gohr     * Get the name of the collection to use
964c0099a8SAndreas Gohr     *
974c0099a8SAndreas Gohr     * Initializes the collection if it doesn't exist yet
984c0099a8SAndreas Gohr     *
99ecb0a423SAndreas Gohr     * @param int $createWithDimensions if > 0, the collection will be created with this many dimensions
1004c0099a8SAndreas Gohr     * @return string
1014c0099a8SAndreas Gohr     * @throws \Exception
1024c0099a8SAndreas Gohr     */
103ecb0a423SAndreas Gohr    public function getCollection($createWithDimensions = 0)
1044c0099a8SAndreas Gohr    {
1054c0099a8SAndreas Gohr        if ($this->collection) return $this->collection;
1064c0099a8SAndreas Gohr
1074c0099a8SAndreas Gohr        try {
1084c0099a8SAndreas Gohr            $this->runQuery('/collections/' . $this->collectionName, '', 'GET');
1094c0099a8SAndreas Gohr            $this->collection = $this->collectionName;
1104c0099a8SAndreas Gohr            return $this->collection; // collection exists
111ecb0a423SAndreas Gohr        } catch (\Exception $e) {
112ecb0a423SAndreas Gohr            if (!$createWithDimensions) throw $e;
1134c0099a8SAndreas Gohr        }
1144c0099a8SAndreas Gohr
115ecb0a423SAndreas Gohr        // still here? create the collection
1164c0099a8SAndreas Gohr        $data = [
1174c0099a8SAndreas Gohr            'vectors' => [
118ecb0a423SAndreas Gohr                'size' => $createWithDimensions,
1194c0099a8SAndreas Gohr                'distance' => 'Cosine',
1204c0099a8SAndreas Gohr            ]
1214c0099a8SAndreas Gohr        ];
1224c0099a8SAndreas Gohr
1234c0099a8SAndreas Gohr        // create the collection
1244c0099a8SAndreas Gohr        $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT');
1254c0099a8SAndreas Gohr        $this->collection = $this->collectionName;
1264c0099a8SAndreas Gohr
1274c0099a8SAndreas Gohr        return $this->collection;
1284c0099a8SAndreas Gohr    }
1294c0099a8SAndreas Gohr
1304c0099a8SAndreas Gohr    /** @inheritdoc */
1314c0099a8SAndreas Gohr    public function startCreation($clear = false)
1324c0099a8SAndreas Gohr    {
1334c0099a8SAndreas Gohr        if (!$clear) return;
1344c0099a8SAndreas Gohr
1354c0099a8SAndreas Gohr        // if a collection exists, delete it
136ecb0a423SAndreas Gohr        try {
1374c0099a8SAndreas Gohr            $collection = $this->getCollection();
1384c0099a8SAndreas Gohr            $this->runQuery('/collections/' . $collection, '', 'DELETE');
1394c0099a8SAndreas Gohr            $this->collection = '';
140ecb0a423SAndreas Gohr        } catch (\Exception) {
141ecb0a423SAndreas Gohr            // no such collection
1424c0099a8SAndreas Gohr        }
1434c0099a8SAndreas Gohr    }
1444c0099a8SAndreas Gohr
1454c0099a8SAndreas Gohr    /** @inheritdoc */
1464c0099a8SAndreas Gohr    public function getChunk($chunkID)
1474c0099a8SAndreas Gohr    {
1484c0099a8SAndreas Gohr        try {
1494c0099a8SAndreas Gohr            $data = $this->runQuery(
1504c0099a8SAndreas Gohr                '/collections/' . $this->getCollection() . '/points/' . $chunkID,
1514c0099a8SAndreas Gohr                '',
1524c0099a8SAndreas Gohr                'GET'
1534c0099a8SAndreas Gohr            );
1544c0099a8SAndreas Gohr        } catch (\Exception) {
1554c0099a8SAndreas Gohr            // no such point
1564c0099a8SAndreas Gohr            return null;
1574c0099a8SAndreas Gohr        }
1584c0099a8SAndreas Gohr
1594c0099a8SAndreas Gohr        return new Chunk(
1604c0099a8SAndreas Gohr            $data['payload']['page'],
1614c0099a8SAndreas Gohr            (int)$data['id'],
1624c0099a8SAndreas Gohr            $data['payload']['text'],
1634c0099a8SAndreas Gohr            $data['vector'],
1644c0099a8SAndreas Gohr            $data['payload']['language'] ?? '',
1654c0099a8SAndreas Gohr            (int)$data['payload']['created']
1664c0099a8SAndreas Gohr        );
1674c0099a8SAndreas Gohr    }
1684c0099a8SAndreas Gohr
1694c0099a8SAndreas Gohr
1704c0099a8SAndreas Gohr    /** @inheritdoc */
1714c0099a8SAndreas Gohr    public function reusePageChunks($page, $firstChunkID)
1724c0099a8SAndreas Gohr    {
1734c0099a8SAndreas Gohr        // no-op
1744c0099a8SAndreas Gohr    }
1754c0099a8SAndreas Gohr
1764c0099a8SAndreas Gohr    /** @inheritdoc */
1774c0099a8SAndreas Gohr    public function deletePageChunks($page, $firstChunkID)
1784c0099a8SAndreas Gohr    {
179ecb0a423SAndreas Gohr        try {
180ecb0a423SAndreas Gohr            $collection = $this->getCollection();
181ecb0a423SAndreas Gohr        } catch (\Exception) {
182ecb0a423SAndreas Gohr            // no such collection
183ecb0a423SAndreas Gohr            return;
184ecb0a423SAndreas Gohr        }
185ecb0a423SAndreas Gohr
1864c0099a8SAndreas Gohr        // delete all possible chunk IDs
1874c0099a8SAndreas Gohr        $ids = range($firstChunkID, $firstChunkID + 99, 1);
1884c0099a8SAndreas Gohr
1894c0099a8SAndreas Gohr        $this->runQuery(
190ecb0a423SAndreas Gohr            '/collections/' . $collection . '/points/delete',
1914c0099a8SAndreas Gohr            [
1924c0099a8SAndreas Gohr                'points' => $ids
1934c0099a8SAndreas Gohr            ],
1944c0099a8SAndreas Gohr            'POST'
1954c0099a8SAndreas Gohr        );
1964c0099a8SAndreas Gohr    }
1974c0099a8SAndreas Gohr
1984c0099a8SAndreas Gohr    /** @inheritdoc */
1994c0099a8SAndreas Gohr    public function addPageChunks($chunks)
2004c0099a8SAndreas Gohr    {
2014c0099a8SAndreas Gohr        $points = [];
2024c0099a8SAndreas Gohr        foreach ($chunks as $chunk) {
2034c0099a8SAndreas Gohr            $points[] = [
2044c0099a8SAndreas Gohr                'id' => $chunk->getId(),
2054c0099a8SAndreas Gohr                'vector' => $chunk->getEmbedding(),
2064c0099a8SAndreas Gohr                'payload' => [
2074c0099a8SAndreas Gohr                    'page' => $chunk->getPage(),
2084c0099a8SAndreas Gohr                    'text' => $chunk->getText(),
2094c0099a8SAndreas Gohr                    'created' => $chunk->getCreated(),
2104c0099a8SAndreas Gohr                    'language' => $chunk->getLanguage()
2114c0099a8SAndreas Gohr                ]
2124c0099a8SAndreas Gohr            ];
2134c0099a8SAndreas Gohr        }
2144c0099a8SAndreas Gohr
2154c0099a8SAndreas Gohr        $this->runQuery(
216ecb0a423SAndreas Gohr            '/collections/' . $this->getCollection(count($chunk->getEmbedding())) . '/points',
2174c0099a8SAndreas Gohr            [
2184c0099a8SAndreas Gohr                'points' => $points
2194c0099a8SAndreas Gohr            ],
2204c0099a8SAndreas Gohr            'PUT'
2214c0099a8SAndreas Gohr        );
2224c0099a8SAndreas Gohr    }
2234c0099a8SAndreas Gohr
2244c0099a8SAndreas Gohr    /** @inheritdoc */
2254c0099a8SAndreas Gohr    public function finalizeCreation()
2264c0099a8SAndreas Gohr    {
2274c0099a8SAndreas Gohr        // no-op
2284c0099a8SAndreas Gohr    }
2294c0099a8SAndreas Gohr
2304c0099a8SAndreas Gohr    /** @inheritdoc */
2314c0099a8SAndreas Gohr    public function runMaintenance()
2324c0099a8SAndreas Gohr    {
2334c0099a8SAndreas Gohr        // no-op
2344c0099a8SAndreas Gohr    }
2354c0099a8SAndreas Gohr
2364c0099a8SAndreas Gohr    /** @inheritdoc */
2374c0099a8SAndreas Gohr    public function getPageChunks($page, $firstChunkID)
2384c0099a8SAndreas Gohr    {
2394c0099a8SAndreas Gohr        $ids = range($firstChunkID, $firstChunkID + 99, 1);
2404c0099a8SAndreas Gohr
2414c0099a8SAndreas Gohr        $data = $this->runQuery(
2424c0099a8SAndreas Gohr            '/collections/' . $this->getCollection() . '/points',
2434c0099a8SAndreas Gohr            [
2444c0099a8SAndreas Gohr                'ids' => $ids,
2454c0099a8SAndreas Gohr                'with_payload' => true,
2464c0099a8SAndreas Gohr                'with_vector' => true,
2474c0099a8SAndreas Gohr            ],
2484c0099a8SAndreas Gohr            'POST'
2494c0099a8SAndreas Gohr        );
2504c0099a8SAndreas Gohr
2514c0099a8SAndreas Gohr        if (!$data) return [];
2524c0099a8SAndreas Gohr
2534c0099a8SAndreas Gohr        $chunks = [];
2544c0099a8SAndreas Gohr        foreach ($data as $point) {
2554c0099a8SAndreas Gohr            $chunks[] = new Chunk(
2564c0099a8SAndreas Gohr                $point['payload']['page'],
2574c0099a8SAndreas Gohr                (int)$point['id'],
2584c0099a8SAndreas Gohr                $point['payload']['text'],
2594c0099a8SAndreas Gohr                $point['vector'],
2604c0099a8SAndreas Gohr                $point['payload']['language'] ?? '',
2614c0099a8SAndreas Gohr                (int)$point['payload']['created']
2624c0099a8SAndreas Gohr            );
2634c0099a8SAndreas Gohr        }
2644c0099a8SAndreas Gohr        return $chunks;
2654c0099a8SAndreas Gohr    }
2664c0099a8SAndreas Gohr
2674c0099a8SAndreas Gohr    /** @inheritdoc */
2684c0099a8SAndreas Gohr    public function getSimilarChunks($vector, $lang = '', $limit = 4)
2694c0099a8SAndreas Gohr    {
2704c0099a8SAndreas Gohr        $limit *= 2; // we can't check ACLs, so we return more than requested
2714c0099a8SAndreas Gohr
2724c0099a8SAndreas Gohr        if ($lang) {
2734c0099a8SAndreas Gohr            $filter = [
2744c0099a8SAndreas Gohr                'must' => [
2754c0099a8SAndreas Gohr                    [
2764c0099a8SAndreas Gohr                        'key' => 'language',
2774c0099a8SAndreas Gohr                        'match' => [
2784c0099a8SAndreas Gohr                            'value' => $lang
2794c0099a8SAndreas Gohr                        ],
2804c0099a8SAndreas Gohr                    ]
2814c0099a8SAndreas Gohr                ]
2824c0099a8SAndreas Gohr            ];
2834c0099a8SAndreas Gohr        } else {
2844c0099a8SAndreas Gohr            $filter = null;
2854c0099a8SAndreas Gohr        }
2864c0099a8SAndreas Gohr
2874c0099a8SAndreas Gohr        $data = $this->runQuery(
2884c0099a8SAndreas Gohr            '/collections/' . $this->getCollection() . '/points/search',
2894c0099a8SAndreas Gohr            [
2904c0099a8SAndreas Gohr                'vector' => $vector,
2914c0099a8SAndreas Gohr                'limit' => (int)$limit,
2924c0099a8SAndreas Gohr                'filter' => $filter,
2934c0099a8SAndreas Gohr                'with_payload' => true,
2944c0099a8SAndreas Gohr                'with_vector' => true,
2954c0099a8SAndreas Gohr            ]
2964c0099a8SAndreas Gohr        );
2974c0099a8SAndreas Gohr
2984c0099a8SAndreas Gohr        $chunks = [];
2994c0099a8SAndreas Gohr        foreach ($data as $point) {
3004c0099a8SAndreas Gohr            $chunks[] = new Chunk(
3014c0099a8SAndreas Gohr                $point['payload']['page'],
3024c0099a8SAndreas Gohr                (int)$point['id'],
3034c0099a8SAndreas Gohr                $point['payload']['text'],
3044c0099a8SAndreas Gohr                $point['vector'],
3054c0099a8SAndreas Gohr                $point['payload']['language'] ?? '',
3064c0099a8SAndreas Gohr                (int)$point['payload']['created'],
3074c0099a8SAndreas Gohr                $point['score']
3084c0099a8SAndreas Gohr            );
3094c0099a8SAndreas Gohr        }
3104c0099a8SAndreas Gohr        return $chunks;
3114c0099a8SAndreas Gohr    }
3124c0099a8SAndreas Gohr
3134c0099a8SAndreas Gohr    /** @inheritdoc */
3144c0099a8SAndreas Gohr    public function statistics()
3154c0099a8SAndreas Gohr    {
3164c0099a8SAndreas Gohr
3174c0099a8SAndreas Gohr        $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET');
3184c0099a8SAndreas Gohr        $telemetry = $this->runQuery('/telemetry', '', 'GET');
3194c0099a8SAndreas Gohr
3204c0099a8SAndreas Gohr        return [
3214c0099a8SAndreas Gohr            'qdrant_version' => $telemetry['app']['version'],
3224c0099a8SAndreas Gohr            'vector_config' => $info['config']['params']['vectors'],
3234c0099a8SAndreas Gohr            'chunks' => $info['vectors_count'],
3244c0099a8SAndreas Gohr            'segments' => $info['segments_count'],
3254c0099a8SAndreas Gohr            'status' => $info['status'],
3264c0099a8SAndreas Gohr        ];
3274c0099a8SAndreas Gohr    }
3284c0099a8SAndreas Gohr}
329