14c0099a8SAndreas Gohr<?php 24c0099a8SAndreas Gohr 34c0099a8SAndreas Gohrnamespace dokuwiki\plugin\aichat\Storage; 44c0099a8SAndreas Gohr 54c0099a8SAndreas Gohruse dokuwiki\HTTP\DokuHTTPClient; 64c0099a8SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 74c0099a8SAndreas Gohr 84c0099a8SAndreas Gohr/** 94c0099a8SAndreas Gohr * Implements the storage backend using a Chroma DB in server mode 104c0099a8SAndreas Gohr */ 114c0099a8SAndreas Gohrclass QdrantStorage extends AbstractStorage 124c0099a8SAndreas Gohr{ 134c0099a8SAndreas Gohr /** @var string URL to the qdrant server instance */ 144c0099a8SAndreas Gohr protected $baseurl; 154c0099a8SAndreas Gohr 164c0099a8SAndreas Gohr /** @var DokuHTTPClient http client */ 174c0099a8SAndreas Gohr protected $http; 184c0099a8SAndreas Gohr 194c0099a8SAndreas Gohr protected $collection = ''; 204c0099a8SAndreas Gohr protected $collectionName = ''; 214c0099a8SAndreas Gohr 224c0099a8SAndreas Gohr 2304afb84fSAndreas Gohr /** @inheritdoc */ 2404afb84fSAndreas Gohr public function __construct(array $config) 254c0099a8SAndreas Gohr { 264c0099a8SAndreas Gohr 27ecb0a423SAndreas Gohr $this->baseurl = trim($config['qdrant_baseurl'] ?? '', '/'); 2804afb84fSAndreas Gohr $this->collectionName = $config['qdrant_collection'] ?? ''; 294c0099a8SAndreas Gohr 304c0099a8SAndreas Gohr $this->http = new DokuHTTPClient(); 314c0099a8SAndreas Gohr $this->http->headers['Content-Type'] = 'application/json'; 324c0099a8SAndreas Gohr $this->http->headers['Accept'] = 'application/json'; 334c0099a8SAndreas Gohr $this->http->keep_alive = false; 344c0099a8SAndreas Gohr $this->http->timeout = 30; 354c0099a8SAndreas Gohr 3632d37907SAndreas Gohr if (!empty($config['qdrant_apikey'])) { 3704afb84fSAndreas Gohr $this->http->headers['api-key'] = $config['qdrant_apikey']; 384c0099a8SAndreas Gohr } 394c0099a8SAndreas Gohr } 404c0099a8SAndreas Gohr 414c0099a8SAndreas Gohr /** 424c0099a8SAndreas Gohr * Execute a query against the Qdrant API 434c0099a8SAndreas Gohr * 444c0099a8SAndreas Gohr * @param string $endpoint API endpoint, will be added to the base URL 454c0099a8SAndreas Gohr * @param mixed $data The data to send, will be JSON encoded 464c0099a8SAndreas Gohr * @param string $method POST|GET|PUT etc 474c0099a8SAndreas Gohr * @return mixed 484c0099a8SAndreas Gohr * @throws \Exception 494c0099a8SAndreas Gohr */ 50*8502e301SAndreas Gohr protected function runQuery($endpoint, mixed $data, $method = 'POST', $retry = 0) 514c0099a8SAndreas Gohr { 524c0099a8SAndreas Gohr $endpoint = trim($endpoint, '/'); 534c0099a8SAndreas Gohr $url = $this->baseurl . '/' . $endpoint . '?wait=true'; 544c0099a8SAndreas Gohr 554c0099a8SAndreas Gohr if ($data === []) { 564c0099a8SAndreas Gohr $json = '{}'; 574c0099a8SAndreas Gohr } else { 584c0099a8SAndreas Gohr $json = json_encode($data, JSON_THROW_ON_ERROR); 594c0099a8SAndreas Gohr } 604c0099a8SAndreas Gohr 614c0099a8SAndreas Gohr $this->http->sendRequest($url, $json, $method); 624c0099a8SAndreas Gohr $response = $this->http->resp_body; 634c0099a8SAndreas Gohr 644c0099a8SAndreas Gohr if (!$response) { 65*8502e301SAndreas Gohr if($retry < 3) { 66*8502e301SAndreas Gohr sleep(1 + $retry); 67*8502e301SAndreas Gohr return $this->runQuery($endpoint, $data, $method, $retry + 1); 68*8502e301SAndreas Gohr } 69*8502e301SAndreas Gohr 704c0099a8SAndreas Gohr throw new \Exception( 714c0099a8SAndreas Gohr 'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status 724c0099a8SAndreas Gohr ); 734c0099a8SAndreas Gohr } 744c0099a8SAndreas Gohr 754c0099a8SAndreas Gohr try { 764c0099a8SAndreas Gohr $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR); 774a647d20SAndreas Gohr } catch (\Exception $e) { 78*8502e301SAndreas Gohr if($retry < 3) { 79*8502e301SAndreas Gohr sleep(1 + $retry); 80*8502e301SAndreas Gohr return $this->runQuery($endpoint, $data, $method, $retry + 1); 81*8502e301SAndreas Gohr } 82*8502e301SAndreas Gohr 834a647d20SAndreas Gohr throw new \Exception('Qdrant API returned invalid JSON. ' . $response, 0, $e); 844c0099a8SAndreas Gohr } 854c0099a8SAndreas Gohr 864c0099a8SAndreas Gohr if ((int)$this->http->status !== 200) { 874c0099a8SAndreas Gohr $error = $result['status']['error'] ?? $this->http->error; 884c0099a8SAndreas Gohr throw new \Exception('Qdrant API returned error. ' . $error); 894c0099a8SAndreas Gohr } 904c0099a8SAndreas Gohr 914c0099a8SAndreas Gohr return $result['result'] ?? $result; 924c0099a8SAndreas Gohr } 934c0099a8SAndreas Gohr 944c0099a8SAndreas Gohr /** 954c0099a8SAndreas Gohr * Get the name of the collection to use 964c0099a8SAndreas Gohr * 974c0099a8SAndreas Gohr * Initializes the collection if it doesn't exist yet 984c0099a8SAndreas Gohr * 99ecb0a423SAndreas Gohr * @param int $createWithDimensions if > 0, the collection will be created with this many dimensions 1004c0099a8SAndreas Gohr * @return string 1014c0099a8SAndreas Gohr * @throws \Exception 1024c0099a8SAndreas Gohr */ 103ecb0a423SAndreas Gohr public function getCollection($createWithDimensions = 0) 1044c0099a8SAndreas Gohr { 1054c0099a8SAndreas Gohr if ($this->collection) return $this->collection; 1064c0099a8SAndreas Gohr 1074c0099a8SAndreas Gohr try { 1084c0099a8SAndreas Gohr $this->runQuery('/collections/' . $this->collectionName, '', 'GET'); 1094c0099a8SAndreas Gohr $this->collection = $this->collectionName; 1104c0099a8SAndreas Gohr return $this->collection; // collection exists 111ecb0a423SAndreas Gohr } catch (\Exception $e) { 112ecb0a423SAndreas Gohr if (!$createWithDimensions) throw $e; 1134c0099a8SAndreas Gohr } 1144c0099a8SAndreas Gohr 115ecb0a423SAndreas Gohr // still here? create the collection 1164c0099a8SAndreas Gohr $data = [ 1174c0099a8SAndreas Gohr 'vectors' => [ 118ecb0a423SAndreas Gohr 'size' => $createWithDimensions, 1194c0099a8SAndreas Gohr 'distance' => 'Cosine', 1204c0099a8SAndreas Gohr ] 1214c0099a8SAndreas Gohr ]; 1224c0099a8SAndreas Gohr 1234c0099a8SAndreas Gohr // create the collection 1244c0099a8SAndreas Gohr $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT'); 1254c0099a8SAndreas Gohr $this->collection = $this->collectionName; 1264c0099a8SAndreas Gohr 1274c0099a8SAndreas Gohr return $this->collection; 1284c0099a8SAndreas Gohr } 1294c0099a8SAndreas Gohr 1304c0099a8SAndreas Gohr /** @inheritdoc */ 1314c0099a8SAndreas Gohr public function startCreation($clear = false) 1324c0099a8SAndreas Gohr { 1334c0099a8SAndreas Gohr if (!$clear) return; 1344c0099a8SAndreas Gohr 1354c0099a8SAndreas Gohr // if a collection exists, delete it 136ecb0a423SAndreas Gohr try { 1374c0099a8SAndreas Gohr $collection = $this->getCollection(); 1384c0099a8SAndreas Gohr $this->runQuery('/collections/' . $collection, '', 'DELETE'); 1394c0099a8SAndreas Gohr $this->collection = ''; 140ecb0a423SAndreas Gohr } catch (\Exception) { 141ecb0a423SAndreas Gohr // no such collection 1424c0099a8SAndreas Gohr } 1434c0099a8SAndreas Gohr } 1444c0099a8SAndreas Gohr 1454c0099a8SAndreas Gohr /** @inheritdoc */ 1464c0099a8SAndreas Gohr public function getChunk($chunkID) 1474c0099a8SAndreas Gohr { 1484c0099a8SAndreas Gohr try { 1494c0099a8SAndreas Gohr $data = $this->runQuery( 1504c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points/' . $chunkID, 1514c0099a8SAndreas Gohr '', 1524c0099a8SAndreas Gohr 'GET' 1534c0099a8SAndreas Gohr ); 1544c0099a8SAndreas Gohr } catch (\Exception) { 1554c0099a8SAndreas Gohr // no such point 1564c0099a8SAndreas Gohr return null; 1574c0099a8SAndreas Gohr } 1584c0099a8SAndreas Gohr 1594c0099a8SAndreas Gohr return new Chunk( 1604c0099a8SAndreas Gohr $data['payload']['page'], 1614c0099a8SAndreas Gohr (int)$data['id'], 1624c0099a8SAndreas Gohr $data['payload']['text'], 1634c0099a8SAndreas Gohr $data['vector'], 1644c0099a8SAndreas Gohr $data['payload']['language'] ?? '', 1654c0099a8SAndreas Gohr (int)$data['payload']['created'] 1664c0099a8SAndreas Gohr ); 1674c0099a8SAndreas Gohr } 1684c0099a8SAndreas Gohr 1694c0099a8SAndreas Gohr 1704c0099a8SAndreas Gohr /** @inheritdoc */ 1714c0099a8SAndreas Gohr public function reusePageChunks($page, $firstChunkID) 1724c0099a8SAndreas Gohr { 1734c0099a8SAndreas Gohr // no-op 1744c0099a8SAndreas Gohr } 1754c0099a8SAndreas Gohr 1764c0099a8SAndreas Gohr /** @inheritdoc */ 1774c0099a8SAndreas Gohr public function deletePageChunks($page, $firstChunkID) 1784c0099a8SAndreas Gohr { 179ecb0a423SAndreas Gohr try { 180ecb0a423SAndreas Gohr $collection = $this->getCollection(); 181ecb0a423SAndreas Gohr } catch (\Exception) { 182ecb0a423SAndreas Gohr // no such collection 183ecb0a423SAndreas Gohr return; 184ecb0a423SAndreas Gohr } 185ecb0a423SAndreas Gohr 1864c0099a8SAndreas Gohr // delete all possible chunk IDs 1874c0099a8SAndreas Gohr $ids = range($firstChunkID, $firstChunkID + 99, 1); 1884c0099a8SAndreas Gohr 1894c0099a8SAndreas Gohr $this->runQuery( 190ecb0a423SAndreas Gohr '/collections/' . $collection . '/points/delete', 1914c0099a8SAndreas Gohr [ 1924c0099a8SAndreas Gohr 'points' => $ids 1934c0099a8SAndreas Gohr ], 1944c0099a8SAndreas Gohr 'POST' 1954c0099a8SAndreas Gohr ); 1964c0099a8SAndreas Gohr } 1974c0099a8SAndreas Gohr 1984c0099a8SAndreas Gohr /** @inheritdoc */ 1994c0099a8SAndreas Gohr public function addPageChunks($chunks) 2004c0099a8SAndreas Gohr { 2014c0099a8SAndreas Gohr $points = []; 2024c0099a8SAndreas Gohr foreach ($chunks as $chunk) { 2034c0099a8SAndreas Gohr $points[] = [ 2044c0099a8SAndreas Gohr 'id' => $chunk->getId(), 2054c0099a8SAndreas Gohr 'vector' => $chunk->getEmbedding(), 2064c0099a8SAndreas Gohr 'payload' => [ 2074c0099a8SAndreas Gohr 'page' => $chunk->getPage(), 2084c0099a8SAndreas Gohr 'text' => $chunk->getText(), 2094c0099a8SAndreas Gohr 'created' => $chunk->getCreated(), 2104c0099a8SAndreas Gohr 'language' => $chunk->getLanguage() 2114c0099a8SAndreas Gohr ] 2124c0099a8SAndreas Gohr ]; 2134c0099a8SAndreas Gohr } 2144c0099a8SAndreas Gohr 2154c0099a8SAndreas Gohr $this->runQuery( 216ecb0a423SAndreas Gohr '/collections/' . $this->getCollection(count($chunk->getEmbedding())) . '/points', 2174c0099a8SAndreas Gohr [ 2184c0099a8SAndreas Gohr 'points' => $points 2194c0099a8SAndreas Gohr ], 2204c0099a8SAndreas Gohr 'PUT' 2214c0099a8SAndreas Gohr ); 2224c0099a8SAndreas Gohr } 2234c0099a8SAndreas Gohr 2244c0099a8SAndreas Gohr /** @inheritdoc */ 2254c0099a8SAndreas Gohr public function finalizeCreation() 2264c0099a8SAndreas Gohr { 2274c0099a8SAndreas Gohr // no-op 2284c0099a8SAndreas Gohr } 2294c0099a8SAndreas Gohr 2304c0099a8SAndreas Gohr /** @inheritdoc */ 2314c0099a8SAndreas Gohr public function runMaintenance() 2324c0099a8SAndreas Gohr { 2334c0099a8SAndreas Gohr // no-op 2344c0099a8SAndreas Gohr } 2354c0099a8SAndreas Gohr 2364c0099a8SAndreas Gohr /** @inheritdoc */ 2374c0099a8SAndreas Gohr public function getPageChunks($page, $firstChunkID) 2384c0099a8SAndreas Gohr { 2394c0099a8SAndreas Gohr $ids = range($firstChunkID, $firstChunkID + 99, 1); 2404c0099a8SAndreas Gohr 2414c0099a8SAndreas Gohr $data = $this->runQuery( 2424c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points', 2434c0099a8SAndreas Gohr [ 2444c0099a8SAndreas Gohr 'ids' => $ids, 2454c0099a8SAndreas Gohr 'with_payload' => true, 2464c0099a8SAndreas Gohr 'with_vector' => true, 2474c0099a8SAndreas Gohr ], 2484c0099a8SAndreas Gohr 'POST' 2494c0099a8SAndreas Gohr ); 2504c0099a8SAndreas Gohr 2514c0099a8SAndreas Gohr if (!$data) return []; 2524c0099a8SAndreas Gohr 2534c0099a8SAndreas Gohr $chunks = []; 2544c0099a8SAndreas Gohr foreach ($data as $point) { 2554c0099a8SAndreas Gohr $chunks[] = new Chunk( 2564c0099a8SAndreas Gohr $point['payload']['page'], 2574c0099a8SAndreas Gohr (int)$point['id'], 2584c0099a8SAndreas Gohr $point['payload']['text'], 2594c0099a8SAndreas Gohr $point['vector'], 2604c0099a8SAndreas Gohr $point['payload']['language'] ?? '', 2614c0099a8SAndreas Gohr (int)$point['payload']['created'] 2624c0099a8SAndreas Gohr ); 2634c0099a8SAndreas Gohr } 2644c0099a8SAndreas Gohr return $chunks; 2654c0099a8SAndreas Gohr } 2664c0099a8SAndreas Gohr 2674c0099a8SAndreas Gohr /** @inheritdoc */ 2684c0099a8SAndreas Gohr public function getSimilarChunks($vector, $lang = '', $limit = 4) 2694c0099a8SAndreas Gohr { 2704c0099a8SAndreas Gohr $limit *= 2; // we can't check ACLs, so we return more than requested 2714c0099a8SAndreas Gohr 2724c0099a8SAndreas Gohr if ($lang) { 2734c0099a8SAndreas Gohr $filter = [ 2744c0099a8SAndreas Gohr 'must' => [ 2754c0099a8SAndreas Gohr [ 2764c0099a8SAndreas Gohr 'key' => 'language', 2774c0099a8SAndreas Gohr 'match' => [ 2784c0099a8SAndreas Gohr 'value' => $lang 2794c0099a8SAndreas Gohr ], 2804c0099a8SAndreas Gohr ] 2814c0099a8SAndreas Gohr ] 2824c0099a8SAndreas Gohr ]; 2834c0099a8SAndreas Gohr } else { 2844c0099a8SAndreas Gohr $filter = null; 2854c0099a8SAndreas Gohr } 2864c0099a8SAndreas Gohr 2874c0099a8SAndreas Gohr $data = $this->runQuery( 2884c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points/search', 2894c0099a8SAndreas Gohr [ 2904c0099a8SAndreas Gohr 'vector' => $vector, 2914c0099a8SAndreas Gohr 'limit' => (int)$limit, 2924c0099a8SAndreas Gohr 'filter' => $filter, 2934c0099a8SAndreas Gohr 'with_payload' => true, 2944c0099a8SAndreas Gohr 'with_vector' => true, 2954c0099a8SAndreas Gohr ] 2964c0099a8SAndreas Gohr ); 2974c0099a8SAndreas Gohr 2984c0099a8SAndreas Gohr $chunks = []; 2994c0099a8SAndreas Gohr foreach ($data as $point) { 3004c0099a8SAndreas Gohr $chunks[] = new Chunk( 3014c0099a8SAndreas Gohr $point['payload']['page'], 3024c0099a8SAndreas Gohr (int)$point['id'], 3034c0099a8SAndreas Gohr $point['payload']['text'], 3044c0099a8SAndreas Gohr $point['vector'], 3054c0099a8SAndreas Gohr $point['payload']['language'] ?? '', 3064c0099a8SAndreas Gohr (int)$point['payload']['created'], 3074c0099a8SAndreas Gohr $point['score'] 3084c0099a8SAndreas Gohr ); 3094c0099a8SAndreas Gohr } 3104c0099a8SAndreas Gohr return $chunks; 3114c0099a8SAndreas Gohr } 3124c0099a8SAndreas Gohr 3134c0099a8SAndreas Gohr /** @inheritdoc */ 3144c0099a8SAndreas Gohr public function statistics() 3154c0099a8SAndreas Gohr { 3164c0099a8SAndreas Gohr 3174c0099a8SAndreas Gohr $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET'); 3184c0099a8SAndreas Gohr $telemetry = $this->runQuery('/telemetry', '', 'GET'); 3194c0099a8SAndreas Gohr 3204c0099a8SAndreas Gohr return [ 3214c0099a8SAndreas Gohr 'qdrant_version' => $telemetry['app']['version'], 3224c0099a8SAndreas Gohr 'vector_config' => $info['config']['params']['vectors'], 3234c0099a8SAndreas Gohr 'chunks' => $info['vectors_count'], 3244c0099a8SAndreas Gohr 'segments' => $info['segments_count'], 3254c0099a8SAndreas Gohr 'status' => $info['status'], 3264c0099a8SAndreas Gohr ]; 3274c0099a8SAndreas Gohr } 3284c0099a8SAndreas Gohr} 329