1<?php 2 3namespace dokuwiki\plugin\aichat\Storage; 4 5use dokuwiki\HTTP\DokuHTTPClient; 6use dokuwiki\plugin\aichat\Chunk; 7 8/** 9 * Implements the storage backend using a Chroma DB in server mode 10 */ 11class QdrantStorage extends AbstractStorage 12{ 13 /** @var string URL to the qdrant server instance */ 14 protected $baseurl; 15 16 /** @var DokuHTTPClient http client */ 17 protected $http; 18 19 protected $collection = ''; 20 protected $collectionName = ''; 21 22 23 /** @inheritdoc */ 24 public function __construct(array $config) 25 { 26 27 $this->baseurl = trim($config['qdrant_baseurl'] ?? '', '/'); 28 $this->collectionName = $config['qdrant_collection'] ?? ''; 29 30 $this->http = new DokuHTTPClient(); 31 $this->http->headers['Content-Type'] = 'application/json'; 32 $this->http->headers['Accept'] = 'application/json'; 33 $this->http->keep_alive = false; 34 $this->http->timeout = 30; 35 36 if (!empty($config['qdrant_apikey'])) { 37 $this->http->headers['api-key'] = $config['qdrant_apikey']; 38 } 39 } 40 41 /** 42 * Execute a query against the Qdrant API 43 * 44 * @param string $endpoint API endpoint, will be added to the base URL 45 * @param mixed $data The data to send, will be JSON encoded 46 * @param string $method POST|GET|PUT etc 47 * @return mixed 48 * @throws \Exception 49 */ 50 protected function runQuery($endpoint, mixed $data, $method = 'POST', $retry = 0) 51 { 52 $endpoint = trim($endpoint, '/'); 53 $url = $this->baseurl . '/' . $endpoint . '?wait=true'; 54 55 if ($data === []) { 56 $json = '{}'; 57 } else { 58 $json = json_encode($data, JSON_THROW_ON_ERROR); 59 } 60 61 $this->http->sendRequest($url, $json, $method); 62 $response = $this->http->resp_body; 63 64 if (!$response) { 65 if($retry < 3) { 66 sleep(1 + $retry); 67 return $this->runQuery($endpoint, $data, $method, $retry + 1); 68 } 69 70 throw new \Exception( 71 'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status 72 ); 73 } 74 75 try { 76 $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR); 77 } catch (\Exception $e) { 78 if($retry < 3) { 79 sleep(1 + $retry); 80 return $this->runQuery($endpoint, $data, $method, $retry + 1); 81 } 82 83 throw new \Exception('Qdrant API returned invalid JSON. ' . $response, 0, $e); 84 } 85 86 if ((int)$this->http->status !== 200) { 87 $error = $result['status']['error'] ?? $this->http->error; 88 throw new \Exception('Qdrant API returned error. ' . $error); 89 } 90 91 return $result['result'] ?? $result; 92 } 93 94 /** 95 * Get the name of the collection to use 96 * 97 * Initializes the collection if it doesn't exist yet 98 * 99 * @param int $createWithDimensions if > 0, the collection will be created with this many dimensions 100 * @return string 101 * @throws \Exception 102 */ 103 public function getCollection($createWithDimensions = 0) 104 { 105 if ($this->collection) return $this->collection; 106 107 try { 108 $this->runQuery('/collections/' . $this->collectionName, '', 'GET'); 109 $this->collection = $this->collectionName; 110 return $this->collection; // collection exists 111 } catch (\Exception $e) { 112 if (!$createWithDimensions) throw $e; 113 } 114 115 // still here? create the collection 116 $data = [ 117 'vectors' => [ 118 'size' => $createWithDimensions, 119 'distance' => 'Cosine', 120 ] 121 ]; 122 123 // create the collection 124 $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT'); 125 $this->collection = $this->collectionName; 126 127 return $this->collection; 128 } 129 130 /** @inheritdoc */ 131 public function startCreation($clear = false) 132 { 133 if (!$clear) return; 134 135 // if a collection exists, delete it 136 try { 137 $collection = $this->getCollection(); 138 $this->runQuery('/collections/' . $collection, '', 'DELETE'); 139 $this->collection = ''; 140 } catch (\Exception) { 141 // no such collection 142 } 143 } 144 145 /** @inheritdoc */ 146 public function getChunk($chunkID) 147 { 148 try { 149 $data = $this->runQuery( 150 '/collections/' . $this->getCollection() . '/points/' . $chunkID, 151 '', 152 'GET' 153 ); 154 } catch (\Exception) { 155 // no such point 156 return null; 157 } 158 159 return new Chunk( 160 $data['payload']['page'], 161 (int)$data['id'], 162 $data['payload']['text'], 163 $data['vector'], 164 $data['payload']['language'] ?? '', 165 (int)$data['payload']['created'] 166 ); 167 } 168 169 170 /** @inheritdoc */ 171 public function reusePageChunks($page, $firstChunkID) 172 { 173 // no-op 174 } 175 176 /** @inheritdoc */ 177 public function deletePageChunks($page, $firstChunkID) 178 { 179 try { 180 $collection = $this->getCollection(); 181 } catch (\Exception) { 182 // no such collection 183 return; 184 } 185 186 // delete all possible chunk IDs 187 $ids = range($firstChunkID, $firstChunkID + 99, 1); 188 189 $this->runQuery( 190 '/collections/' . $collection . '/points/delete', 191 [ 192 'points' => $ids 193 ], 194 'POST' 195 ); 196 } 197 198 /** @inheritdoc */ 199 public function addPageChunks($chunks) 200 { 201 $points = []; 202 foreach ($chunks as $chunk) { 203 $points[] = [ 204 'id' => $chunk->getId(), 205 'vector' => $chunk->getEmbedding(), 206 'payload' => [ 207 'page' => $chunk->getPage(), 208 'text' => $chunk->getText(), 209 'created' => $chunk->getCreated(), 210 'language' => $chunk->getLanguage() 211 ] 212 ]; 213 } 214 215 $this->runQuery( 216 '/collections/' . $this->getCollection(count($chunk->getEmbedding())) . '/points', 217 [ 218 'points' => $points 219 ], 220 'PUT' 221 ); 222 } 223 224 /** @inheritdoc */ 225 public function finalizeCreation() 226 { 227 // no-op 228 } 229 230 /** @inheritdoc */ 231 public function runMaintenance() 232 { 233 // no-op 234 } 235 236 /** @inheritdoc */ 237 public function getPageChunks($page, $firstChunkID) 238 { 239 $ids = range($firstChunkID, $firstChunkID + 99, 1); 240 241 $data = $this->runQuery( 242 '/collections/' . $this->getCollection() . '/points', 243 [ 244 'ids' => $ids, 245 'with_payload' => true, 246 'with_vector' => true, 247 ], 248 'POST' 249 ); 250 251 if (!$data) return []; 252 253 $chunks = []; 254 foreach ($data as $point) { 255 $chunks[] = new Chunk( 256 $point['payload']['page'], 257 (int)$point['id'], 258 $point['payload']['text'], 259 $point['vector'], 260 $point['payload']['language'] ?? '', 261 (int)$point['payload']['created'] 262 ); 263 } 264 return $chunks; 265 } 266 267 /** @inheritdoc */ 268 public function getSimilarChunks($vector, $lang = '', $limit = 4) 269 { 270 $limit *= 2; // we can't check ACLs, so we return more than requested 271 272 if ($lang) { 273 $filter = [ 274 'must' => [ 275 [ 276 'key' => 'language', 277 'match' => [ 278 'value' => $lang 279 ], 280 ] 281 ] 282 ]; 283 } else { 284 $filter = null; 285 } 286 287 $data = $this->runQuery( 288 '/collections/' . $this->getCollection() . '/points/search', 289 [ 290 'vector' => $vector, 291 'limit' => (int)$limit, 292 'filter' => $filter, 293 'with_payload' => true, 294 'with_vector' => true, 295 ] 296 ); 297 298 $chunks = []; 299 foreach ($data as $point) { 300 $chunks[] = new Chunk( 301 $point['payload']['page'], 302 (int)$point['id'], 303 $point['payload']['text'], 304 $point['vector'], 305 $point['payload']['language'] ?? '', 306 (int)$point['payload']['created'], 307 $point['score'] 308 ); 309 } 310 return $chunks; 311 } 312 313 /** @inheritdoc */ 314 public function statistics() 315 { 316 317 $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET'); 318 $telemetry = $this->runQuery('/telemetry', '', 'GET'); 319 320 return [ 321 'qdrant_version' => $telemetry['app']['version'], 322 'vector_config' => $info['config']['params']['vectors'], 323 'chunks' => $info['vectors_count'], 324 'segments' => $info['segments_count'], 325 'status' => $info['status'], 326 ]; 327 } 328} 329