1<?php 2 3namespace dokuwiki\plugin\aichat\Storage; 4 5use dokuwiki\HTTP\DokuHTTPClient; 6use dokuwiki\plugin\aichat\Chunk; 7 8/** 9 * Implements the storage backend using a Chroma DB in server mode 10 */ 11class QdrantStorage extends AbstractStorage 12{ 13 /** @var string URL to the qdrant server instance */ 14 protected $baseurl; 15 16 /** @var DokuHTTPClient http client */ 17 protected $http; 18 19 protected $collection = ''; 20 protected $collectionName = ''; 21 22 23 /** @inheritdoc */ 24 public function __construct(array $config) 25 { 26 27 $this->baseurl = trim($config['qdrant_baseurl'] ?? '', '/'); 28 $this->collectionName = $config['qdrant_collection'] ?? ''; 29 30 $this->http = new DokuHTTPClient(); 31 $this->http->headers['Content-Type'] = 'application/json'; 32 $this->http->headers['Accept'] = 'application/json'; 33 $this->http->keep_alive = false; 34 $this->http->timeout = 30; 35 36 if (!empty($config['qdrant_apikey'])) { 37 $this->http->headers['api-key'] = $config['qdrant_apikey']; 38 } 39 } 40 41 /** 42 * Execute a query against the Qdrant API 43 * 44 * @param string $endpoint API endpoint, will be added to the base URL 45 * @param mixed $data The data to send, will be JSON encoded 46 * @param string $method POST|GET|PUT etc 47 * @return mixed 48 * @throws \Exception 49 */ 50 protected function runQuery($endpoint, mixed $data, $method = 'POST') 51 { 52 $endpoint = trim($endpoint, '/'); 53 $url = $this->baseurl . '/' . $endpoint . '?wait=true'; 54 55 if ($data === []) { 56 $json = '{}'; 57 } else { 58 $json = json_encode($data, JSON_THROW_ON_ERROR); 59 } 60 61 $this->http->sendRequest($url, $json, $method); 62 $response = $this->http->resp_body; 63 64 if (!$response) { 65 throw new \Exception( 66 'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status 67 ); 68 } 69 70 try { 71 $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR); 72 } catch (\Exception $e) { 73 throw new \Exception('Qdrant API returned invalid JSON. ' . $response, 0, $e); 74 } 75 76 if ((int)$this->http->status !== 200) { 77 $error = $result['status']['error'] ?? $this->http->error; 78 throw new \Exception('Qdrant API returned error. ' . $error); 79 } 80 81 return $result['result'] ?? $result; 82 } 83 84 /** 85 * Get the name of the collection to use 86 * 87 * Initializes the collection if it doesn't exist yet 88 * 89 * @param int $createWithDimensions if > 0, the collection will be created with this many dimensions 90 * @return string 91 * @throws \Exception 92 */ 93 public function getCollection($createWithDimensions = 0) 94 { 95 if ($this->collection) return $this->collection; 96 97 try { 98 $this->runQuery('/collections/' . $this->collectionName, '', 'GET'); 99 $this->collection = $this->collectionName; 100 return $this->collection; // collection exists 101 } catch (\Exception $e) { 102 if (!$createWithDimensions) throw $e; 103 } 104 105 // still here? create the collection 106 $data = [ 107 'vectors' => [ 108 'size' => $createWithDimensions, 109 'distance' => 'Cosine', 110 ] 111 ]; 112 113 // create the collection 114 $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT'); 115 $this->collection = $this->collectionName; 116 117 return $this->collection; 118 } 119 120 /** @inheritdoc */ 121 public function startCreation($clear = false) 122 { 123 if (!$clear) return; 124 125 // if a collection exists, delete it 126 try { 127 $collection = $this->getCollection(); 128 $this->runQuery('/collections/' . $collection, '', 'DELETE'); 129 $this->collection = ''; 130 } catch (\Exception) { 131 // no such collection 132 } 133 } 134 135 /** @inheritdoc */ 136 public function getChunk($chunkID) 137 { 138 try { 139 $data = $this->runQuery( 140 '/collections/' . $this->getCollection() . '/points/' . $chunkID, 141 '', 142 'GET' 143 ); 144 } catch (\Exception) { 145 // no such point 146 return null; 147 } 148 149 return new Chunk( 150 $data['payload']['page'], 151 (int)$data['id'], 152 $data['payload']['text'], 153 $data['vector'], 154 $data['payload']['language'] ?? '', 155 (int)$data['payload']['created'] 156 ); 157 } 158 159 160 /** @inheritdoc */ 161 public function reusePageChunks($page, $firstChunkID) 162 { 163 // no-op 164 } 165 166 /** @inheritdoc */ 167 public function deletePageChunks($page, $firstChunkID) 168 { 169 try { 170 $collection = $this->getCollection(); 171 } catch (\Exception) { 172 // no such collection 173 return; 174 } 175 176 // delete all possible chunk IDs 177 $ids = range($firstChunkID, $firstChunkID + 99, 1); 178 179 $this->runQuery( 180 '/collections/' . $collection . '/points/delete', 181 [ 182 'points' => $ids 183 ], 184 'POST' 185 ); 186 } 187 188 /** @inheritdoc */ 189 public function addPageChunks($chunks) 190 { 191 $points = []; 192 foreach ($chunks as $chunk) { 193 $points[] = [ 194 'id' => $chunk->getId(), 195 'vector' => $chunk->getEmbedding(), 196 'payload' => [ 197 'page' => $chunk->getPage(), 198 'text' => $chunk->getText(), 199 'created' => $chunk->getCreated(), 200 'language' => $chunk->getLanguage() 201 ] 202 ]; 203 } 204 205 $this->runQuery( 206 '/collections/' . $this->getCollection(count($chunk->getEmbedding())) . '/points', 207 [ 208 'points' => $points 209 ], 210 'PUT' 211 ); 212 } 213 214 /** @inheritdoc */ 215 public function finalizeCreation() 216 { 217 // no-op 218 } 219 220 /** @inheritdoc */ 221 public function runMaintenance() 222 { 223 // no-op 224 } 225 226 /** @inheritdoc */ 227 public function getPageChunks($page, $firstChunkID) 228 { 229 $ids = range($firstChunkID, $firstChunkID + 99, 1); 230 231 $data = $this->runQuery( 232 '/collections/' . $this->getCollection() . '/points', 233 [ 234 'ids' => $ids, 235 'with_payload' => true, 236 'with_vector' => true, 237 ], 238 'POST' 239 ); 240 241 if (!$data) return []; 242 243 $chunks = []; 244 foreach ($data as $point) { 245 $chunks[] = new Chunk( 246 $point['payload']['page'], 247 (int)$point['id'], 248 $point['payload']['text'], 249 $point['vector'], 250 $point['payload']['language'] ?? '', 251 (int)$point['payload']['created'] 252 ); 253 } 254 return $chunks; 255 } 256 257 /** @inheritdoc */ 258 public function getSimilarChunks($vector, $lang = '', $limit = 4) 259 { 260 $limit *= 2; // we can't check ACLs, so we return more than requested 261 262 if ($lang) { 263 $filter = [ 264 'must' => [ 265 [ 266 'key' => 'language', 267 'match' => [ 268 'value' => $lang 269 ], 270 ] 271 ] 272 ]; 273 } else { 274 $filter = null; 275 } 276 277 $data = $this->runQuery( 278 '/collections/' . $this->getCollection() . '/points/search', 279 [ 280 'vector' => $vector, 281 'limit' => (int)$limit, 282 'filter' => $filter, 283 'with_payload' => true, 284 'with_vector' => true, 285 ] 286 ); 287 288 $chunks = []; 289 foreach ($data as $point) { 290 $chunks[] = new Chunk( 291 $point['payload']['page'], 292 (int)$point['id'], 293 $point['payload']['text'], 294 $point['vector'], 295 $point['payload']['language'] ?? '', 296 (int)$point['payload']['created'], 297 $point['score'] 298 ); 299 } 300 return $chunks; 301 } 302 303 /** @inheritdoc */ 304 public function statistics() 305 { 306 307 $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET'); 308 $telemetry = $this->runQuery('/telemetry', '', 'GET'); 309 310 return [ 311 'qdrant_version' => $telemetry['app']['version'], 312 'vector_config' => $info['config']['params']['vectors'], 313 'chunks' => $info['vectors_count'], 314 'segments' => $info['segments_count'], 315 'status' => $info['status'], 316 ]; 317 } 318} 319