1<?php 2 3namespace dokuwiki\plugin\aichat\Storage; 4 5use dokuwiki\HTTP\DokuHTTPClient; 6use dokuwiki\plugin\aichat\Chunk; 7 8/** 9 * Implements the storage backend using a Chroma DB in server mode 10 */ 11class QdrantStorage extends AbstractStorage 12{ 13 /** @var string URL to the qdrant server instance */ 14 protected $baseurl; 15 16 /** @var DokuHTTPClient http client */ 17 protected $http; 18 19 protected $collection = ''; 20 protected $collectionName = ''; 21 22 23 /** @inheritdoc */ 24 public function __construct(array $config) 25 { 26 27 $this->baseurl = $config['qdrant_baseurl'] ?? ''; 28 $this->collectionName = $config['qdrant_collection'] ?? ''; 29 30 $this->http = new DokuHTTPClient(); 31 $this->http->headers['Content-Type'] = 'application/json'; 32 $this->http->headers['Accept'] = 'application/json'; 33 $this->http->keep_alive = false; 34 $this->http->timeout = 30; 35 36 if (!empty($config['qdrant_apikey']) { 37 $this->http->headers['api-key'] = $config['qdrant_apikey']; 38 } 39 } 40 41 /** 42 * Execute a query against the Qdrant API 43 * 44 * @param string $endpoint API endpoint, will be added to the base URL 45 * @param mixed $data The data to send, will be JSON encoded 46 * @param string $method POST|GET|PUT etc 47 * @return mixed 48 * @throws \Exception 49 */ 50 protected function runQuery($endpoint, mixed $data, $method = 'POST') 51 { 52 $endpoint = trim($endpoint, '/'); 53 $url = $this->baseurl . '/' . $endpoint . '?wait=true'; 54 55 if ($data === []) { 56 $json = '{}'; 57 } else { 58 $json = json_encode($data, JSON_THROW_ON_ERROR); 59 } 60 61 $this->http->sendRequest($url, $json, $method); 62 $response = $this->http->resp_body; 63 64 if (!$response) { 65 throw new \Exception( 66 'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status 67 ); 68 } 69 70 try { 71 $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR); 72 } catch (\Exception) { 73 throw new \Exception('Qdrant API returned invalid JSON. ' . $response); 74 } 75 76 if ((int)$this->http->status !== 200) { 77 $error = $result['status']['error'] ?? $this->http->error; 78 throw new \Exception('Qdrant API returned error. ' . $error); 79 } 80 81 return $result['result'] ?? $result; 82 } 83 84 /** 85 * Get the name of the collection to use 86 * 87 * Initializes the collection if it doesn't exist yet 88 * 89 * @return string 90 * @throws \Exception 91 */ 92 public function getCollection() 93 { 94 if ($this->collection) return $this->collection; 95 96 try { 97 $this->runQuery('/collections/' . $this->collectionName, '', 'GET'); 98 $this->collection = $this->collectionName; 99 return $this->collection; // collection exists 100 } catch (\Exception) { 101 // collection seems not to exist 102 } 103 104 $data = [ 105 'vectors' => [ 106 'size' => 1536, // FIXME should not be hardcoded 107 'distance' => 'Cosine', 108 ] 109 ]; 110 111 // create the collection 112 $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT'); 113 $this->collection = $this->collectionName; 114 115 return $this->collection; 116 } 117 118 /** @inheritdoc */ 119 public function startCreation($clear = false) 120 { 121 if (!$clear) return; 122 123 // if a collection exists, delete it 124 $collection = $this->getCollection(); 125 if ($collection) { 126 $this->runQuery('/collections/' . $collection, '', 'DELETE'); 127 $this->collection = ''; 128 } 129 } 130 131 /** @inheritdoc */ 132 public function getChunk($chunkID) 133 { 134 try { 135 $data = $this->runQuery( 136 '/collections/' . $this->getCollection() . '/points/' . $chunkID, 137 '', 138 'GET' 139 ); 140 } catch (\Exception) { 141 // no such point 142 return null; 143 } 144 145 return new Chunk( 146 $data['payload']['page'], 147 (int)$data['id'], 148 $data['payload']['text'], 149 $data['vector'], 150 $data['payload']['language'] ?? '', 151 (int)$data['payload']['created'] 152 ); 153 } 154 155 156 /** @inheritdoc */ 157 public function reusePageChunks($page, $firstChunkID) 158 { 159 // no-op 160 } 161 162 /** @inheritdoc */ 163 public function deletePageChunks($page, $firstChunkID) 164 { 165 // delete all possible chunk IDs 166 $ids = range($firstChunkID, $firstChunkID + 99, 1); 167 168 $this->runQuery( 169 '/collections/' . $this->getCollection() . '/points/delete', 170 [ 171 'points' => $ids 172 ], 173 'POST' 174 ); 175 } 176 177 /** @inheritdoc */ 178 public function addPageChunks($chunks) 179 { 180 $points = []; 181 foreach ($chunks as $chunk) { 182 $points[] = [ 183 'id' => $chunk->getId(), 184 'vector' => $chunk->getEmbedding(), 185 'payload' => [ 186 'page' => $chunk->getPage(), 187 'text' => $chunk->getText(), 188 'created' => $chunk->getCreated(), 189 'language' => $chunk->getLanguage() 190 ] 191 ]; 192 } 193 194 $this->runQuery( 195 '/collections/' . $this->getCollection() . '/points', 196 [ 197 'points' => $points 198 ], 199 'PUT' 200 ); 201 } 202 203 /** @inheritdoc */ 204 public function finalizeCreation() 205 { 206 // no-op 207 } 208 209 /** @inheritdoc */ 210 public function runMaintenance() 211 { 212 // no-op 213 } 214 215 /** @inheritdoc */ 216 public function getPageChunks($page, $firstChunkID) 217 { 218 $ids = range($firstChunkID, $firstChunkID + 99, 1); 219 220 $data = $this->runQuery( 221 '/collections/' . $this->getCollection() . '/points', 222 [ 223 'ids' => $ids, 224 'with_payload' => true, 225 'with_vector' => true, 226 ], 227 'POST' 228 ); 229 230 if (!$data) return []; 231 232 $chunks = []; 233 foreach ($data as $point) { 234 $chunks[] = new Chunk( 235 $point['payload']['page'], 236 (int)$point['id'], 237 $point['payload']['text'], 238 $point['vector'], 239 $point['payload']['language'] ?? '', 240 (int)$point['payload']['created'] 241 ); 242 } 243 return $chunks; 244 } 245 246 /** @inheritdoc */ 247 public function getSimilarChunks($vector, $lang = '', $limit = 4) 248 { 249 $limit *= 2; // we can't check ACLs, so we return more than requested 250 251 if ($lang) { 252 $filter = [ 253 'must' => [ 254 [ 255 'key' => 'language', 256 'match' => [ 257 'value' => $lang 258 ], 259 ] 260 ] 261 ]; 262 } else { 263 $filter = null; 264 } 265 266 $data = $this->runQuery( 267 '/collections/' . $this->getCollection() . '/points/search', 268 [ 269 'vector' => $vector, 270 'limit' => (int)$limit, 271 'filter' => $filter, 272 'with_payload' => true, 273 'with_vector' => true, 274 ] 275 ); 276 277 $chunks = []; 278 foreach ($data as $point) { 279 $chunks[] = new Chunk( 280 $point['payload']['page'], 281 (int)$point['id'], 282 $point['payload']['text'], 283 $point['vector'], 284 $point['payload']['language'] ?? '', 285 (int)$point['payload']['created'], 286 $point['score'] 287 ); 288 } 289 return $chunks; 290 } 291 292 /** @inheritdoc */ 293 public function statistics() 294 { 295 296 $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET'); 297 $telemetry = $this->runQuery('/telemetry', '', 'GET'); 298 299 return [ 300 'qdrant_version' => $telemetry['app']['version'], 301 'vector_config' => $info['config']['params']['vectors'], 302 'chunks' => $info['vectors_count'], 303 'segments' => $info['segments_count'], 304 'status' => $info['status'], 305 ]; 306 } 307} 308