1<?php 2 3namespace dokuwiki\plugin\aichat\Storage; 4 5use dokuwiki\HTTP\DokuHTTPClient; 6use dokuwiki\plugin\aichat\Chunk; 7 8/** 9 * Implements the storage backend using a Chroma DB in server mode 10 */ 11class QdrantStorage extends AbstractStorage 12{ 13 /** @var string URL to the qdrant server instance */ 14 protected $baseurl; 15 16 /** @var DokuHTTPClient http client */ 17 protected $http; 18 19 protected $collection = ''; 20 protected $collectionName = ''; 21 22 23 /** 24 * QdrantStorage constructor. 25 */ 26 public function __construct() 27 { 28 $helper = plugin_load('helper', 'aichat'); 29 30 $this->baseurl = $helper->getConf('qdrant_baseurl'); 31 $this->collectionName = $helper->getConf('qdrant_collection'); 32 33 $this->http = new DokuHTTPClient(); 34 $this->http->headers['Content-Type'] = 'application/json'; 35 $this->http->headers['Accept'] = 'application/json'; 36 $this->http->keep_alive = false; 37 $this->http->timeout = 30; 38 39 if ($helper->getConf('qdrant_apikey')) { 40 $this->http->headers['api-key'] = $helper->getConf('qdrant_apikey'); 41 } 42 } 43 44 /** 45 * Execute a query against the Qdrant API 46 * 47 * @param string $endpoint API endpoint, will be added to the base URL 48 * @param mixed $data The data to send, will be JSON encoded 49 * @param string $method POST|GET|PUT etc 50 * @return mixed 51 * @throws \Exception 52 */ 53 protected function runQuery($endpoint, mixed $data, $method = 'POST') 54 { 55 $endpoint = trim($endpoint, '/'); 56 $url = $this->baseurl . '/' . $endpoint . '?wait=true'; 57 58 if ($data === []) { 59 $json = '{}'; 60 } else { 61 $json = json_encode($data, JSON_THROW_ON_ERROR); 62 } 63 64 $this->http->sendRequest($url, $json, $method); 65 $response = $this->http->resp_body; 66 67 if (!$response) { 68 throw new \Exception( 69 'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status 70 ); 71 } 72 73 try { 74 $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR); 75 } catch (\Exception) { 76 throw new \Exception('Qdrant API returned invalid JSON. ' . $response); 77 } 78 79 if ((int)$this->http->status !== 200) { 80 $error = $result['status']['error'] ?? $this->http->error; 81 throw new \Exception('Qdrant API returned error. ' . $error); 82 } 83 84 return $result['result'] ?? $result; 85 } 86 87 /** 88 * Get the name of the collection to use 89 * 90 * Initializes the collection if it doesn't exist yet 91 * 92 * @return string 93 * @throws \Exception 94 */ 95 public function getCollection() 96 { 97 if ($this->collection) return $this->collection; 98 99 try { 100 $this->runQuery('/collections/' . $this->collectionName, '', 'GET'); 101 $this->collection = $this->collectionName; 102 return $this->collection; // collection exists 103 } catch (\Exception) { 104 // collection seems not to exist 105 } 106 107 $data = [ 108 'vectors' => [ 109 'size' => 1536, // FIXME should not be hardcoded 110 'distance' => 'Cosine', 111 ] 112 ]; 113 114 // create the collection 115 $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT'); 116 $this->collection = $this->collectionName; 117 118 return $this->collection; 119 } 120 121 /** @inheritdoc */ 122 public function startCreation($clear = false) 123 { 124 if (!$clear) return; 125 126 // if a collection exists, delete it 127 $collection = $this->getCollection(); 128 if ($collection) { 129 $this->runQuery('/collections/' . $collection, '', 'DELETE'); 130 $this->collection = ''; 131 } 132 } 133 134 /** @inheritdoc */ 135 public function getChunk($chunkID) 136 { 137 try { 138 $data = $this->runQuery( 139 '/collections/' . $this->getCollection() . '/points/' . $chunkID, 140 '', 141 'GET' 142 ); 143 } catch (\Exception) { 144 // no such point 145 return null; 146 } 147 148 return new Chunk( 149 $data['payload']['page'], 150 (int)$data['id'], 151 $data['payload']['text'], 152 $data['vector'], 153 $data['payload']['language'] ?? '', 154 (int)$data['payload']['created'] 155 ); 156 } 157 158 159 /** @inheritdoc */ 160 public function reusePageChunks($page, $firstChunkID) 161 { 162 // no-op 163 } 164 165 /** @inheritdoc */ 166 public function deletePageChunks($page, $firstChunkID) 167 { 168 // delete all possible chunk IDs 169 $ids = range($firstChunkID, $firstChunkID + 99, 1); 170 171 $this->runQuery( 172 '/collections/' . $this->getCollection() . '/points/delete', 173 [ 174 'points' => $ids 175 ], 176 'POST' 177 ); 178 } 179 180 /** @inheritdoc */ 181 public function addPageChunks($chunks) 182 { 183 $points = []; 184 foreach ($chunks as $chunk) { 185 $points[] = [ 186 'id' => $chunk->getId(), 187 'vector' => $chunk->getEmbedding(), 188 'payload' => [ 189 'page' => $chunk->getPage(), 190 'text' => $chunk->getText(), 191 'created' => $chunk->getCreated(), 192 'language' => $chunk->getLanguage() 193 ] 194 ]; 195 } 196 197 $this->runQuery( 198 '/collections/' . $this->getCollection() . '/points', 199 [ 200 'points' => $points 201 ], 202 'PUT' 203 ); 204 } 205 206 /** @inheritdoc */ 207 public function finalizeCreation() 208 { 209 // no-op 210 } 211 212 /** @inheritdoc */ 213 public function runMaintenance() 214 { 215 // no-op 216 } 217 218 /** @inheritdoc */ 219 public function getPageChunks($page, $firstChunkID) 220 { 221 $ids = range($firstChunkID, $firstChunkID + 99, 1); 222 223 $data = $this->runQuery( 224 '/collections/' . $this->getCollection() . '/points', 225 [ 226 'ids' => $ids, 227 'with_payload' => true, 228 'with_vector' => true, 229 ], 230 'POST' 231 ); 232 233 if (!$data) return []; 234 235 $chunks = []; 236 foreach ($data as $point) { 237 $chunks[] = new Chunk( 238 $point['payload']['page'], 239 (int)$point['id'], 240 $point['payload']['text'], 241 $point['vector'], 242 $point['payload']['language'] ?? '', 243 (int)$point['payload']['created'] 244 ); 245 } 246 return $chunks; 247 } 248 249 /** @inheritdoc */ 250 public function getSimilarChunks($vector, $lang = '', $limit = 4) 251 { 252 $limit *= 2; // we can't check ACLs, so we return more than requested 253 254 if ($lang) { 255 $filter = [ 256 'must' => [ 257 [ 258 'key' => 'language', 259 'match' => [ 260 'value' => $lang 261 ], 262 ] 263 ] 264 ]; 265 } else { 266 $filter = null; 267 } 268 269 $data = $this->runQuery( 270 '/collections/' . $this->getCollection() . '/points/search', 271 [ 272 'vector' => $vector, 273 'limit' => (int)$limit, 274 'filter' => $filter, 275 'with_payload' => true, 276 'with_vector' => true, 277 ] 278 ); 279 280 $chunks = []; 281 foreach ($data as $point) { 282 $chunks[] = new Chunk( 283 $point['payload']['page'], 284 (int)$point['id'], 285 $point['payload']['text'], 286 $point['vector'], 287 $point['payload']['language'] ?? '', 288 (int)$point['payload']['created'], 289 $point['score'] 290 ); 291 } 292 return $chunks; 293 } 294 295 /** @inheritdoc */ 296 public function statistics() 297 { 298 299 $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET'); 300 $telemetry = $this->runQuery('/telemetry', '', 'GET'); 301 302 return [ 303 'qdrant_version' => $telemetry['app']['version'], 304 'vector_config' => $info['config']['params']['vectors'], 305 'chunks' => $info['vectors_count'], 306 'segments' => $info['segments_count'], 307 'status' => $info['status'], 308 ]; 309 } 310} 311