14c0099a8SAndreas Gohr<?php 24c0099a8SAndreas Gohr 34c0099a8SAndreas Gohrnamespace dokuwiki\plugin\aichat\Storage; 44c0099a8SAndreas Gohr 54c0099a8SAndreas Gohruse dokuwiki\HTTP\DokuHTTPClient; 64c0099a8SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 74c0099a8SAndreas Gohr 84c0099a8SAndreas Gohr/** 94c0099a8SAndreas Gohr * Implements the storage backend using a Chroma DB in server mode 104c0099a8SAndreas Gohr */ 114c0099a8SAndreas Gohrclass QdrantStorage extends AbstractStorage 124c0099a8SAndreas Gohr{ 134c0099a8SAndreas Gohr /** @var string URL to the qdrant server instance */ 144c0099a8SAndreas Gohr protected $baseurl; 154c0099a8SAndreas Gohr 164c0099a8SAndreas Gohr /** @var DokuHTTPClient http client */ 174c0099a8SAndreas Gohr protected $http; 184c0099a8SAndreas Gohr 194c0099a8SAndreas Gohr protected $collection = ''; 204c0099a8SAndreas Gohr protected $collectionName = ''; 214c0099a8SAndreas Gohr 224c0099a8SAndreas Gohr 2304afb84fSAndreas Gohr /** @inheritdoc */ 2404afb84fSAndreas Gohr public function __construct(array $config) 254c0099a8SAndreas Gohr { 264c0099a8SAndreas Gohr 27*ecb0a423SAndreas Gohr $this->baseurl = trim($config['qdrant_baseurl'] ?? '', '/'); 2804afb84fSAndreas Gohr $this->collectionName = $config['qdrant_collection'] ?? ''; 294c0099a8SAndreas Gohr 304c0099a8SAndreas Gohr $this->http = new DokuHTTPClient(); 314c0099a8SAndreas Gohr $this->http->headers['Content-Type'] = 'application/json'; 324c0099a8SAndreas Gohr $this->http->headers['Accept'] = 'application/json'; 334c0099a8SAndreas Gohr $this->http->keep_alive = false; 344c0099a8SAndreas Gohr $this->http->timeout = 30; 354c0099a8SAndreas Gohr 3632d37907SAndreas Gohr if (!empty($config['qdrant_apikey'])) { 3704afb84fSAndreas Gohr $this->http->headers['api-key'] = $config['qdrant_apikey']; 384c0099a8SAndreas Gohr } 394c0099a8SAndreas Gohr } 404c0099a8SAndreas Gohr 414c0099a8SAndreas Gohr /** 424c0099a8SAndreas Gohr * Execute a query against the Qdrant API 434c0099a8SAndreas Gohr * 444c0099a8SAndreas Gohr * @param string $endpoint API endpoint, will be added to the base URL 454c0099a8SAndreas Gohr * @param mixed $data The data to send, will be JSON encoded 464c0099a8SAndreas Gohr * @param string $method POST|GET|PUT etc 474c0099a8SAndreas Gohr * @return mixed 484c0099a8SAndreas Gohr * @throws \Exception 494c0099a8SAndreas Gohr */ 504c0099a8SAndreas Gohr protected function runQuery($endpoint, mixed $data, $method = 'POST') 514c0099a8SAndreas Gohr { 524c0099a8SAndreas Gohr $endpoint = trim($endpoint, '/'); 534c0099a8SAndreas Gohr $url = $this->baseurl . '/' . $endpoint . '?wait=true'; 544c0099a8SAndreas Gohr 554c0099a8SAndreas Gohr if ($data === []) { 564c0099a8SAndreas Gohr $json = '{}'; 574c0099a8SAndreas Gohr } else { 584c0099a8SAndreas Gohr $json = json_encode($data, JSON_THROW_ON_ERROR); 594c0099a8SAndreas Gohr } 604c0099a8SAndreas Gohr 614c0099a8SAndreas Gohr $this->http->sendRequest($url, $json, $method); 624c0099a8SAndreas Gohr $response = $this->http->resp_body; 634c0099a8SAndreas Gohr 644c0099a8SAndreas Gohr if (!$response) { 654c0099a8SAndreas Gohr throw new \Exception( 664c0099a8SAndreas Gohr 'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status 674c0099a8SAndreas Gohr ); 684c0099a8SAndreas Gohr } 694c0099a8SAndreas Gohr 704c0099a8SAndreas Gohr try { 714c0099a8SAndreas Gohr $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR); 724c0099a8SAndreas Gohr } catch (\Exception) { 734c0099a8SAndreas Gohr throw new \Exception('Qdrant API returned invalid JSON. ' . $response); 744c0099a8SAndreas Gohr } 754c0099a8SAndreas Gohr 764c0099a8SAndreas Gohr if ((int)$this->http->status !== 200) { 774c0099a8SAndreas Gohr $error = $result['status']['error'] ?? $this->http->error; 784c0099a8SAndreas Gohr throw new \Exception('Qdrant API returned error. ' . $error); 794c0099a8SAndreas Gohr } 804c0099a8SAndreas Gohr 814c0099a8SAndreas Gohr return $result['result'] ?? $result; 824c0099a8SAndreas Gohr } 834c0099a8SAndreas Gohr 844c0099a8SAndreas Gohr /** 854c0099a8SAndreas Gohr * Get the name of the collection to use 864c0099a8SAndreas Gohr * 874c0099a8SAndreas Gohr * Initializes the collection if it doesn't exist yet 884c0099a8SAndreas Gohr * 89*ecb0a423SAndreas Gohr * @param int $createWithDimensions if > 0, the collection will be created with this many dimensions 904c0099a8SAndreas Gohr * @return string 914c0099a8SAndreas Gohr * @throws \Exception 924c0099a8SAndreas Gohr */ 93*ecb0a423SAndreas Gohr public function getCollection($createWithDimensions = 0) 944c0099a8SAndreas Gohr { 954c0099a8SAndreas Gohr if ($this->collection) return $this->collection; 964c0099a8SAndreas Gohr 974c0099a8SAndreas Gohr try { 984c0099a8SAndreas Gohr $this->runQuery('/collections/' . $this->collectionName, '', 'GET'); 994c0099a8SAndreas Gohr $this->collection = $this->collectionName; 1004c0099a8SAndreas Gohr return $this->collection; // collection exists 101*ecb0a423SAndreas Gohr } catch (\Exception $e) { 102*ecb0a423SAndreas Gohr if (!$createWithDimensions) throw $e; 1034c0099a8SAndreas Gohr } 1044c0099a8SAndreas Gohr 105*ecb0a423SAndreas Gohr // still here? create the collection 1064c0099a8SAndreas Gohr $data = [ 1074c0099a8SAndreas Gohr 'vectors' => [ 108*ecb0a423SAndreas Gohr 'size' => $createWithDimensions, 1094c0099a8SAndreas Gohr 'distance' => 'Cosine', 1104c0099a8SAndreas Gohr ] 1114c0099a8SAndreas Gohr ]; 1124c0099a8SAndreas Gohr 1134c0099a8SAndreas Gohr // create the collection 1144c0099a8SAndreas Gohr $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT'); 1154c0099a8SAndreas Gohr $this->collection = $this->collectionName; 1164c0099a8SAndreas Gohr 1174c0099a8SAndreas Gohr return $this->collection; 1184c0099a8SAndreas Gohr } 1194c0099a8SAndreas Gohr 1204c0099a8SAndreas Gohr /** @inheritdoc */ 1214c0099a8SAndreas Gohr public function startCreation($clear = false) 1224c0099a8SAndreas Gohr { 1234c0099a8SAndreas Gohr if (!$clear) return; 1244c0099a8SAndreas Gohr 1254c0099a8SAndreas Gohr // if a collection exists, delete it 126*ecb0a423SAndreas Gohr try { 1274c0099a8SAndreas Gohr $collection = $this->getCollection(); 1284c0099a8SAndreas Gohr $this->runQuery('/collections/' . $collection, '', 'DELETE'); 1294c0099a8SAndreas Gohr $this->collection = ''; 130*ecb0a423SAndreas Gohr } catch (\Exception) { 131*ecb0a423SAndreas Gohr // no such collection 1324c0099a8SAndreas Gohr } 1334c0099a8SAndreas Gohr } 1344c0099a8SAndreas Gohr 1354c0099a8SAndreas Gohr /** @inheritdoc */ 1364c0099a8SAndreas Gohr public function getChunk($chunkID) 1374c0099a8SAndreas Gohr { 1384c0099a8SAndreas Gohr try { 1394c0099a8SAndreas Gohr $data = $this->runQuery( 1404c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points/' . $chunkID, 1414c0099a8SAndreas Gohr '', 1424c0099a8SAndreas Gohr 'GET' 1434c0099a8SAndreas Gohr ); 1444c0099a8SAndreas Gohr } catch (\Exception) { 1454c0099a8SAndreas Gohr // no such point 1464c0099a8SAndreas Gohr return null; 1474c0099a8SAndreas Gohr } 1484c0099a8SAndreas Gohr 1494c0099a8SAndreas Gohr return new Chunk( 1504c0099a8SAndreas Gohr $data['payload']['page'], 1514c0099a8SAndreas Gohr (int)$data['id'], 1524c0099a8SAndreas Gohr $data['payload']['text'], 1534c0099a8SAndreas Gohr $data['vector'], 1544c0099a8SAndreas Gohr $data['payload']['language'] ?? '', 1554c0099a8SAndreas Gohr (int)$data['payload']['created'] 1564c0099a8SAndreas Gohr ); 1574c0099a8SAndreas Gohr } 1584c0099a8SAndreas Gohr 1594c0099a8SAndreas Gohr 1604c0099a8SAndreas Gohr /** @inheritdoc */ 1614c0099a8SAndreas Gohr public function reusePageChunks($page, $firstChunkID) 1624c0099a8SAndreas Gohr { 1634c0099a8SAndreas Gohr // no-op 1644c0099a8SAndreas Gohr } 1654c0099a8SAndreas Gohr 1664c0099a8SAndreas Gohr /** @inheritdoc */ 1674c0099a8SAndreas Gohr public function deletePageChunks($page, $firstChunkID) 1684c0099a8SAndreas Gohr { 169*ecb0a423SAndreas Gohr try { 170*ecb0a423SAndreas Gohr $collection = $this->getCollection(); 171*ecb0a423SAndreas Gohr } catch (\Exception) { 172*ecb0a423SAndreas Gohr // no such collection 173*ecb0a423SAndreas Gohr return; 174*ecb0a423SAndreas Gohr } 175*ecb0a423SAndreas Gohr 1764c0099a8SAndreas Gohr // delete all possible chunk IDs 1774c0099a8SAndreas Gohr $ids = range($firstChunkID, $firstChunkID + 99, 1); 1784c0099a8SAndreas Gohr 1794c0099a8SAndreas Gohr $this->runQuery( 180*ecb0a423SAndreas Gohr '/collections/' . $collection . '/points/delete', 1814c0099a8SAndreas Gohr [ 1824c0099a8SAndreas Gohr 'points' => $ids 1834c0099a8SAndreas Gohr ], 1844c0099a8SAndreas Gohr 'POST' 1854c0099a8SAndreas Gohr ); 1864c0099a8SAndreas Gohr } 1874c0099a8SAndreas Gohr 1884c0099a8SAndreas Gohr /** @inheritdoc */ 1894c0099a8SAndreas Gohr public function addPageChunks($chunks) 1904c0099a8SAndreas Gohr { 1914c0099a8SAndreas Gohr $points = []; 1924c0099a8SAndreas Gohr foreach ($chunks as $chunk) { 1934c0099a8SAndreas Gohr $points[] = [ 1944c0099a8SAndreas Gohr 'id' => $chunk->getId(), 1954c0099a8SAndreas Gohr 'vector' => $chunk->getEmbedding(), 1964c0099a8SAndreas Gohr 'payload' => [ 1974c0099a8SAndreas Gohr 'page' => $chunk->getPage(), 1984c0099a8SAndreas Gohr 'text' => $chunk->getText(), 1994c0099a8SAndreas Gohr 'created' => $chunk->getCreated(), 2004c0099a8SAndreas Gohr 'language' => $chunk->getLanguage() 2014c0099a8SAndreas Gohr ] 2024c0099a8SAndreas Gohr ]; 2034c0099a8SAndreas Gohr } 2044c0099a8SAndreas Gohr 2054c0099a8SAndreas Gohr $this->runQuery( 206*ecb0a423SAndreas Gohr '/collections/' . $this->getCollection(count($chunk->getEmbedding())) . '/points', 2074c0099a8SAndreas Gohr [ 2084c0099a8SAndreas Gohr 'points' => $points 2094c0099a8SAndreas Gohr ], 2104c0099a8SAndreas Gohr 'PUT' 2114c0099a8SAndreas Gohr ); 2124c0099a8SAndreas Gohr } 2134c0099a8SAndreas Gohr 2144c0099a8SAndreas Gohr /** @inheritdoc */ 2154c0099a8SAndreas Gohr public function finalizeCreation() 2164c0099a8SAndreas Gohr { 2174c0099a8SAndreas Gohr // no-op 2184c0099a8SAndreas Gohr } 2194c0099a8SAndreas Gohr 2204c0099a8SAndreas Gohr /** @inheritdoc */ 2214c0099a8SAndreas Gohr public function runMaintenance() 2224c0099a8SAndreas Gohr { 2234c0099a8SAndreas Gohr // no-op 2244c0099a8SAndreas Gohr } 2254c0099a8SAndreas Gohr 2264c0099a8SAndreas Gohr /** @inheritdoc */ 2274c0099a8SAndreas Gohr public function getPageChunks($page, $firstChunkID) 2284c0099a8SAndreas Gohr { 2294c0099a8SAndreas Gohr $ids = range($firstChunkID, $firstChunkID + 99, 1); 2304c0099a8SAndreas Gohr 2314c0099a8SAndreas Gohr $data = $this->runQuery( 2324c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points', 2334c0099a8SAndreas Gohr [ 2344c0099a8SAndreas Gohr 'ids' => $ids, 2354c0099a8SAndreas Gohr 'with_payload' => true, 2364c0099a8SAndreas Gohr 'with_vector' => true, 2374c0099a8SAndreas Gohr ], 2384c0099a8SAndreas Gohr 'POST' 2394c0099a8SAndreas Gohr ); 2404c0099a8SAndreas Gohr 2414c0099a8SAndreas Gohr if (!$data) return []; 2424c0099a8SAndreas Gohr 2434c0099a8SAndreas Gohr $chunks = []; 2444c0099a8SAndreas Gohr foreach ($data as $point) { 2454c0099a8SAndreas Gohr $chunks[] = new Chunk( 2464c0099a8SAndreas Gohr $point['payload']['page'], 2474c0099a8SAndreas Gohr (int)$point['id'], 2484c0099a8SAndreas Gohr $point['payload']['text'], 2494c0099a8SAndreas Gohr $point['vector'], 2504c0099a8SAndreas Gohr $point['payload']['language'] ?? '', 2514c0099a8SAndreas Gohr (int)$point['payload']['created'] 2524c0099a8SAndreas Gohr ); 2534c0099a8SAndreas Gohr } 2544c0099a8SAndreas Gohr return $chunks; 2554c0099a8SAndreas Gohr } 2564c0099a8SAndreas Gohr 2574c0099a8SAndreas Gohr /** @inheritdoc */ 2584c0099a8SAndreas Gohr public function getSimilarChunks($vector, $lang = '', $limit = 4) 2594c0099a8SAndreas Gohr { 2604c0099a8SAndreas Gohr $limit *= 2; // we can't check ACLs, so we return more than requested 2614c0099a8SAndreas Gohr 2624c0099a8SAndreas Gohr if ($lang) { 2634c0099a8SAndreas Gohr $filter = [ 2644c0099a8SAndreas Gohr 'must' => [ 2654c0099a8SAndreas Gohr [ 2664c0099a8SAndreas Gohr 'key' => 'language', 2674c0099a8SAndreas Gohr 'match' => [ 2684c0099a8SAndreas Gohr 'value' => $lang 2694c0099a8SAndreas Gohr ], 2704c0099a8SAndreas Gohr ] 2714c0099a8SAndreas Gohr ] 2724c0099a8SAndreas Gohr ]; 2734c0099a8SAndreas Gohr } else { 2744c0099a8SAndreas Gohr $filter = null; 2754c0099a8SAndreas Gohr } 2764c0099a8SAndreas Gohr 2774c0099a8SAndreas Gohr $data = $this->runQuery( 2784c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points/search', 2794c0099a8SAndreas Gohr [ 2804c0099a8SAndreas Gohr 'vector' => $vector, 2814c0099a8SAndreas Gohr 'limit' => (int)$limit, 2824c0099a8SAndreas Gohr 'filter' => $filter, 2834c0099a8SAndreas Gohr 'with_payload' => true, 2844c0099a8SAndreas Gohr 'with_vector' => true, 2854c0099a8SAndreas Gohr ] 2864c0099a8SAndreas Gohr ); 2874c0099a8SAndreas Gohr 2884c0099a8SAndreas Gohr $chunks = []; 2894c0099a8SAndreas Gohr foreach ($data as $point) { 2904c0099a8SAndreas Gohr $chunks[] = new Chunk( 2914c0099a8SAndreas Gohr $point['payload']['page'], 2924c0099a8SAndreas Gohr (int)$point['id'], 2934c0099a8SAndreas Gohr $point['payload']['text'], 2944c0099a8SAndreas Gohr $point['vector'], 2954c0099a8SAndreas Gohr $point['payload']['language'] ?? '', 2964c0099a8SAndreas Gohr (int)$point['payload']['created'], 2974c0099a8SAndreas Gohr $point['score'] 2984c0099a8SAndreas Gohr ); 2994c0099a8SAndreas Gohr } 3004c0099a8SAndreas Gohr return $chunks; 3014c0099a8SAndreas Gohr } 3024c0099a8SAndreas Gohr 3034c0099a8SAndreas Gohr /** @inheritdoc */ 3044c0099a8SAndreas Gohr public function statistics() 3054c0099a8SAndreas Gohr { 3064c0099a8SAndreas Gohr 3074c0099a8SAndreas Gohr $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET'); 3084c0099a8SAndreas Gohr $telemetry = $this->runQuery('/telemetry', '', 'GET'); 3094c0099a8SAndreas Gohr 3104c0099a8SAndreas Gohr return [ 3114c0099a8SAndreas Gohr 'qdrant_version' => $telemetry['app']['version'], 3124c0099a8SAndreas Gohr 'vector_config' => $info['config']['params']['vectors'], 3134c0099a8SAndreas Gohr 'chunks' => $info['vectors_count'], 3144c0099a8SAndreas Gohr 'segments' => $info['segments_count'], 3154c0099a8SAndreas Gohr 'status' => $info['status'], 3164c0099a8SAndreas Gohr ]; 3174c0099a8SAndreas Gohr } 3184c0099a8SAndreas Gohr} 319