14c0099a8SAndreas Gohr<?php 24c0099a8SAndreas Gohr 34c0099a8SAndreas Gohrnamespace dokuwiki\plugin\aichat\Storage; 44c0099a8SAndreas Gohr 54c0099a8SAndreas Gohruse dokuwiki\HTTP\DokuHTTPClient; 64c0099a8SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 74c0099a8SAndreas Gohr 84c0099a8SAndreas Gohr/** 94c0099a8SAndreas Gohr * Implements the storage backend using a Chroma DB in server mode 104c0099a8SAndreas Gohr */ 114c0099a8SAndreas Gohrclass QdrantStorage extends AbstractStorage 124c0099a8SAndreas Gohr{ 134c0099a8SAndreas Gohr /** @var string URL to the qdrant server instance */ 144c0099a8SAndreas Gohr protected $baseurl; 154c0099a8SAndreas Gohr 164c0099a8SAndreas Gohr /** @var DokuHTTPClient http client */ 174c0099a8SAndreas Gohr protected $http; 184c0099a8SAndreas Gohr 194c0099a8SAndreas Gohr protected $collection = ''; 204c0099a8SAndreas Gohr protected $collectionName = ''; 214c0099a8SAndreas Gohr 224c0099a8SAndreas Gohr 23*04afb84fSAndreas Gohr /** @inheritdoc */ 24*04afb84fSAndreas Gohr public function __construct(array $config) 254c0099a8SAndreas Gohr { 264c0099a8SAndreas Gohr 27*04afb84fSAndreas Gohr $this->baseurl = $config['qdrant_baseurl'] ?? ''; 28*04afb84fSAndreas Gohr $this->collectionName = $config['qdrant_collection'] ?? ''; 294c0099a8SAndreas Gohr 304c0099a8SAndreas Gohr $this->http = new DokuHTTPClient(); 314c0099a8SAndreas Gohr $this->http->headers['Content-Type'] = 'application/json'; 324c0099a8SAndreas Gohr $this->http->headers['Accept'] = 'application/json'; 334c0099a8SAndreas Gohr $this->http->keep_alive = false; 344c0099a8SAndreas Gohr $this->http->timeout = 30; 354c0099a8SAndreas Gohr 36*04afb84fSAndreas Gohr if (!empty($config['qdrant_apikey']) { 37*04afb84fSAndreas Gohr $this->http->headers['api-key'] = $config['qdrant_apikey']; 384c0099a8SAndreas Gohr } 394c0099a8SAndreas Gohr } 404c0099a8SAndreas Gohr 414c0099a8SAndreas Gohr /** 424c0099a8SAndreas Gohr * Execute a query against the Qdrant API 434c0099a8SAndreas Gohr * 444c0099a8SAndreas Gohr * @param string $endpoint API endpoint, will be added to the base URL 454c0099a8SAndreas Gohr * @param mixed $data The data to send, will be JSON encoded 464c0099a8SAndreas Gohr * @param string $method POST|GET|PUT etc 474c0099a8SAndreas Gohr * @return mixed 484c0099a8SAndreas Gohr * @throws \Exception 494c0099a8SAndreas Gohr */ 504c0099a8SAndreas Gohr protected function runQuery($endpoint, mixed $data, $method = 'POST') 514c0099a8SAndreas Gohr { 524c0099a8SAndreas Gohr $endpoint = trim($endpoint, '/'); 534c0099a8SAndreas Gohr $url = $this->baseurl . '/' . $endpoint . '?wait=true'; 544c0099a8SAndreas Gohr 554c0099a8SAndreas Gohr if ($data === []) { 564c0099a8SAndreas Gohr $json = '{}'; 574c0099a8SAndreas Gohr } else { 584c0099a8SAndreas Gohr $json = json_encode($data, JSON_THROW_ON_ERROR); 594c0099a8SAndreas Gohr } 604c0099a8SAndreas Gohr 614c0099a8SAndreas Gohr $this->http->sendRequest($url, $json, $method); 624c0099a8SAndreas Gohr $response = $this->http->resp_body; 634c0099a8SAndreas Gohr 644c0099a8SAndreas Gohr if (!$response) { 654c0099a8SAndreas Gohr throw new \Exception( 664c0099a8SAndreas Gohr 'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status 674c0099a8SAndreas Gohr ); 684c0099a8SAndreas Gohr } 694c0099a8SAndreas Gohr 704c0099a8SAndreas Gohr try { 714c0099a8SAndreas Gohr $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR); 724c0099a8SAndreas Gohr } catch (\Exception) { 734c0099a8SAndreas Gohr throw new \Exception('Qdrant API returned invalid JSON. ' . $response); 744c0099a8SAndreas Gohr } 754c0099a8SAndreas Gohr 764c0099a8SAndreas Gohr if ((int)$this->http->status !== 200) { 774c0099a8SAndreas Gohr $error = $result['status']['error'] ?? $this->http->error; 784c0099a8SAndreas Gohr throw new \Exception('Qdrant API returned error. ' . $error); 794c0099a8SAndreas Gohr } 804c0099a8SAndreas Gohr 814c0099a8SAndreas Gohr return $result['result'] ?? $result; 824c0099a8SAndreas Gohr } 834c0099a8SAndreas Gohr 844c0099a8SAndreas Gohr /** 854c0099a8SAndreas Gohr * Get the name of the collection to use 864c0099a8SAndreas Gohr * 874c0099a8SAndreas Gohr * Initializes the collection if it doesn't exist yet 884c0099a8SAndreas Gohr * 894c0099a8SAndreas Gohr * @return string 904c0099a8SAndreas Gohr * @throws \Exception 914c0099a8SAndreas Gohr */ 924c0099a8SAndreas Gohr public function getCollection() 934c0099a8SAndreas Gohr { 944c0099a8SAndreas Gohr if ($this->collection) return $this->collection; 954c0099a8SAndreas Gohr 964c0099a8SAndreas Gohr try { 974c0099a8SAndreas Gohr $this->runQuery('/collections/' . $this->collectionName, '', 'GET'); 984c0099a8SAndreas Gohr $this->collection = $this->collectionName; 994c0099a8SAndreas Gohr return $this->collection; // collection exists 1004c0099a8SAndreas Gohr } catch (\Exception) { 1014c0099a8SAndreas Gohr // collection seems not to exist 1024c0099a8SAndreas Gohr } 1034c0099a8SAndreas Gohr 1044c0099a8SAndreas Gohr $data = [ 1054c0099a8SAndreas Gohr 'vectors' => [ 1064c0099a8SAndreas Gohr 'size' => 1536, // FIXME should not be hardcoded 1074c0099a8SAndreas Gohr 'distance' => 'Cosine', 1084c0099a8SAndreas Gohr ] 1094c0099a8SAndreas Gohr ]; 1104c0099a8SAndreas Gohr 1114c0099a8SAndreas Gohr // create the collection 1124c0099a8SAndreas Gohr $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT'); 1134c0099a8SAndreas Gohr $this->collection = $this->collectionName; 1144c0099a8SAndreas Gohr 1154c0099a8SAndreas Gohr return $this->collection; 1164c0099a8SAndreas Gohr } 1174c0099a8SAndreas Gohr 1184c0099a8SAndreas Gohr /** @inheritdoc */ 1194c0099a8SAndreas Gohr public function startCreation($clear = false) 1204c0099a8SAndreas Gohr { 1214c0099a8SAndreas Gohr if (!$clear) return; 1224c0099a8SAndreas Gohr 1234c0099a8SAndreas Gohr // if a collection exists, delete it 1244c0099a8SAndreas Gohr $collection = $this->getCollection(); 1254c0099a8SAndreas Gohr if ($collection) { 1264c0099a8SAndreas Gohr $this->runQuery('/collections/' . $collection, '', 'DELETE'); 1274c0099a8SAndreas Gohr $this->collection = ''; 1284c0099a8SAndreas Gohr } 1294c0099a8SAndreas Gohr } 1304c0099a8SAndreas Gohr 1314c0099a8SAndreas Gohr /** @inheritdoc */ 1324c0099a8SAndreas Gohr public function getChunk($chunkID) 1334c0099a8SAndreas Gohr { 1344c0099a8SAndreas Gohr try { 1354c0099a8SAndreas Gohr $data = $this->runQuery( 1364c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points/' . $chunkID, 1374c0099a8SAndreas Gohr '', 1384c0099a8SAndreas Gohr 'GET' 1394c0099a8SAndreas Gohr ); 1404c0099a8SAndreas Gohr } catch (\Exception) { 1414c0099a8SAndreas Gohr // no such point 1424c0099a8SAndreas Gohr return null; 1434c0099a8SAndreas Gohr } 1444c0099a8SAndreas Gohr 1454c0099a8SAndreas Gohr return new Chunk( 1464c0099a8SAndreas Gohr $data['payload']['page'], 1474c0099a8SAndreas Gohr (int)$data['id'], 1484c0099a8SAndreas Gohr $data['payload']['text'], 1494c0099a8SAndreas Gohr $data['vector'], 1504c0099a8SAndreas Gohr $data['payload']['language'] ?? '', 1514c0099a8SAndreas Gohr (int)$data['payload']['created'] 1524c0099a8SAndreas Gohr ); 1534c0099a8SAndreas Gohr } 1544c0099a8SAndreas Gohr 1554c0099a8SAndreas Gohr 1564c0099a8SAndreas Gohr /** @inheritdoc */ 1574c0099a8SAndreas Gohr public function reusePageChunks($page, $firstChunkID) 1584c0099a8SAndreas Gohr { 1594c0099a8SAndreas Gohr // no-op 1604c0099a8SAndreas Gohr } 1614c0099a8SAndreas Gohr 1624c0099a8SAndreas Gohr /** @inheritdoc */ 1634c0099a8SAndreas Gohr public function deletePageChunks($page, $firstChunkID) 1644c0099a8SAndreas Gohr { 1654c0099a8SAndreas Gohr // delete all possible chunk IDs 1664c0099a8SAndreas Gohr $ids = range($firstChunkID, $firstChunkID + 99, 1); 1674c0099a8SAndreas Gohr 1684c0099a8SAndreas Gohr $this->runQuery( 1694c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points/delete', 1704c0099a8SAndreas Gohr [ 1714c0099a8SAndreas Gohr 'points' => $ids 1724c0099a8SAndreas Gohr ], 1734c0099a8SAndreas Gohr 'POST' 1744c0099a8SAndreas Gohr ); 1754c0099a8SAndreas Gohr } 1764c0099a8SAndreas Gohr 1774c0099a8SAndreas Gohr /** @inheritdoc */ 1784c0099a8SAndreas Gohr public function addPageChunks($chunks) 1794c0099a8SAndreas Gohr { 1804c0099a8SAndreas Gohr $points = []; 1814c0099a8SAndreas Gohr foreach ($chunks as $chunk) { 1824c0099a8SAndreas Gohr $points[] = [ 1834c0099a8SAndreas Gohr 'id' => $chunk->getId(), 1844c0099a8SAndreas Gohr 'vector' => $chunk->getEmbedding(), 1854c0099a8SAndreas Gohr 'payload' => [ 1864c0099a8SAndreas Gohr 'page' => $chunk->getPage(), 1874c0099a8SAndreas Gohr 'text' => $chunk->getText(), 1884c0099a8SAndreas Gohr 'created' => $chunk->getCreated(), 1894c0099a8SAndreas Gohr 'language' => $chunk->getLanguage() 1904c0099a8SAndreas Gohr ] 1914c0099a8SAndreas Gohr ]; 1924c0099a8SAndreas Gohr } 1934c0099a8SAndreas Gohr 1944c0099a8SAndreas Gohr $this->runQuery( 1954c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points', 1964c0099a8SAndreas Gohr [ 1974c0099a8SAndreas Gohr 'points' => $points 1984c0099a8SAndreas Gohr ], 1994c0099a8SAndreas Gohr 'PUT' 2004c0099a8SAndreas Gohr ); 2014c0099a8SAndreas Gohr } 2024c0099a8SAndreas Gohr 2034c0099a8SAndreas Gohr /** @inheritdoc */ 2044c0099a8SAndreas Gohr public function finalizeCreation() 2054c0099a8SAndreas Gohr { 2064c0099a8SAndreas Gohr // no-op 2074c0099a8SAndreas Gohr } 2084c0099a8SAndreas Gohr 2094c0099a8SAndreas Gohr /** @inheritdoc */ 2104c0099a8SAndreas Gohr public function runMaintenance() 2114c0099a8SAndreas Gohr { 2124c0099a8SAndreas Gohr // no-op 2134c0099a8SAndreas Gohr } 2144c0099a8SAndreas Gohr 2154c0099a8SAndreas Gohr /** @inheritdoc */ 2164c0099a8SAndreas Gohr public function getPageChunks($page, $firstChunkID) 2174c0099a8SAndreas Gohr { 2184c0099a8SAndreas Gohr $ids = range($firstChunkID, $firstChunkID + 99, 1); 2194c0099a8SAndreas Gohr 2204c0099a8SAndreas Gohr $data = $this->runQuery( 2214c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points', 2224c0099a8SAndreas Gohr [ 2234c0099a8SAndreas Gohr 'ids' => $ids, 2244c0099a8SAndreas Gohr 'with_payload' => true, 2254c0099a8SAndreas Gohr 'with_vector' => true, 2264c0099a8SAndreas Gohr ], 2274c0099a8SAndreas Gohr 'POST' 2284c0099a8SAndreas Gohr ); 2294c0099a8SAndreas Gohr 2304c0099a8SAndreas Gohr if (!$data) return []; 2314c0099a8SAndreas Gohr 2324c0099a8SAndreas Gohr $chunks = []; 2334c0099a8SAndreas Gohr foreach ($data as $point) { 2344c0099a8SAndreas Gohr $chunks[] = new Chunk( 2354c0099a8SAndreas Gohr $point['payload']['page'], 2364c0099a8SAndreas Gohr (int)$point['id'], 2374c0099a8SAndreas Gohr $point['payload']['text'], 2384c0099a8SAndreas Gohr $point['vector'], 2394c0099a8SAndreas Gohr $point['payload']['language'] ?? '', 2404c0099a8SAndreas Gohr (int)$point['payload']['created'] 2414c0099a8SAndreas Gohr ); 2424c0099a8SAndreas Gohr } 2434c0099a8SAndreas Gohr return $chunks; 2444c0099a8SAndreas Gohr } 2454c0099a8SAndreas Gohr 2464c0099a8SAndreas Gohr /** @inheritdoc */ 2474c0099a8SAndreas Gohr public function getSimilarChunks($vector, $lang = '', $limit = 4) 2484c0099a8SAndreas Gohr { 2494c0099a8SAndreas Gohr $limit *= 2; // we can't check ACLs, so we return more than requested 2504c0099a8SAndreas Gohr 2514c0099a8SAndreas Gohr if ($lang) { 2524c0099a8SAndreas Gohr $filter = [ 2534c0099a8SAndreas Gohr 'must' => [ 2544c0099a8SAndreas Gohr [ 2554c0099a8SAndreas Gohr 'key' => 'language', 2564c0099a8SAndreas Gohr 'match' => [ 2574c0099a8SAndreas Gohr 'value' => $lang 2584c0099a8SAndreas Gohr ], 2594c0099a8SAndreas Gohr ] 2604c0099a8SAndreas Gohr ] 2614c0099a8SAndreas Gohr ]; 2624c0099a8SAndreas Gohr } else { 2634c0099a8SAndreas Gohr $filter = null; 2644c0099a8SAndreas Gohr } 2654c0099a8SAndreas Gohr 2664c0099a8SAndreas Gohr $data = $this->runQuery( 2674c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points/search', 2684c0099a8SAndreas Gohr [ 2694c0099a8SAndreas Gohr 'vector' => $vector, 2704c0099a8SAndreas Gohr 'limit' => (int)$limit, 2714c0099a8SAndreas Gohr 'filter' => $filter, 2724c0099a8SAndreas Gohr 'with_payload' => true, 2734c0099a8SAndreas Gohr 'with_vector' => true, 2744c0099a8SAndreas Gohr ] 2754c0099a8SAndreas Gohr ); 2764c0099a8SAndreas Gohr 2774c0099a8SAndreas Gohr $chunks = []; 2784c0099a8SAndreas Gohr foreach ($data as $point) { 2794c0099a8SAndreas Gohr $chunks[] = new Chunk( 2804c0099a8SAndreas Gohr $point['payload']['page'], 2814c0099a8SAndreas Gohr (int)$point['id'], 2824c0099a8SAndreas Gohr $point['payload']['text'], 2834c0099a8SAndreas Gohr $point['vector'], 2844c0099a8SAndreas Gohr $point['payload']['language'] ?? '', 2854c0099a8SAndreas Gohr (int)$point['payload']['created'], 2864c0099a8SAndreas Gohr $point['score'] 2874c0099a8SAndreas Gohr ); 2884c0099a8SAndreas Gohr } 2894c0099a8SAndreas Gohr return $chunks; 2904c0099a8SAndreas Gohr } 2914c0099a8SAndreas Gohr 2924c0099a8SAndreas Gohr /** @inheritdoc */ 2934c0099a8SAndreas Gohr public function statistics() 2944c0099a8SAndreas Gohr { 2954c0099a8SAndreas Gohr 2964c0099a8SAndreas Gohr $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET'); 2974c0099a8SAndreas Gohr $telemetry = $this->runQuery('/telemetry', '', 'GET'); 2984c0099a8SAndreas Gohr 2994c0099a8SAndreas Gohr return [ 3004c0099a8SAndreas Gohr 'qdrant_version' => $telemetry['app']['version'], 3014c0099a8SAndreas Gohr 'vector_config' => $info['config']['params']['vectors'], 3024c0099a8SAndreas Gohr 'chunks' => $info['vectors_count'], 3034c0099a8SAndreas Gohr 'segments' => $info['segments_count'], 3044c0099a8SAndreas Gohr 'status' => $info['status'], 3054c0099a8SAndreas Gohr ]; 3064c0099a8SAndreas Gohr } 3074c0099a8SAndreas Gohr} 308