1*4c0099a8SAndreas Gohr<?php 2*4c0099a8SAndreas Gohr 3*4c0099a8SAndreas Gohrnamespace dokuwiki\plugin\aichat\Storage; 4*4c0099a8SAndreas Gohr 5*4c0099a8SAndreas Gohruse dokuwiki\HTTP\DokuHTTPClient; 6*4c0099a8SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 7*4c0099a8SAndreas Gohr 8*4c0099a8SAndreas Gohr/** 9*4c0099a8SAndreas Gohr * Implements the storage backend using a Chroma DB in server mode 10*4c0099a8SAndreas Gohr */ 11*4c0099a8SAndreas Gohrclass QdrantStorage extends AbstractStorage 12*4c0099a8SAndreas Gohr{ 13*4c0099a8SAndreas Gohr /** @var string URL to the qdrant server instance */ 14*4c0099a8SAndreas Gohr protected $baseurl; 15*4c0099a8SAndreas Gohr 16*4c0099a8SAndreas Gohr /** @var DokuHTTPClient http client */ 17*4c0099a8SAndreas Gohr protected $http; 18*4c0099a8SAndreas Gohr 19*4c0099a8SAndreas Gohr protected $collection = ''; 20*4c0099a8SAndreas Gohr protected $collectionName = ''; 21*4c0099a8SAndreas Gohr 22*4c0099a8SAndreas Gohr 23*4c0099a8SAndreas Gohr /** 24*4c0099a8SAndreas Gohr * QdrantStorage constructor. 25*4c0099a8SAndreas Gohr */ 26*4c0099a8SAndreas Gohr public function __construct() 27*4c0099a8SAndreas Gohr { 28*4c0099a8SAndreas Gohr $helper = plugin_load('helper', 'aichat'); 29*4c0099a8SAndreas Gohr 30*4c0099a8SAndreas Gohr $this->baseurl = $helper->getConf('qdrant_baseurl'); 31*4c0099a8SAndreas Gohr $this->collectionName = $helper->getConf('qdrant_collection'); 32*4c0099a8SAndreas Gohr 33*4c0099a8SAndreas Gohr $this->http = new DokuHTTPClient(); 34*4c0099a8SAndreas Gohr $this->http->headers['Content-Type'] = 'application/json'; 35*4c0099a8SAndreas Gohr $this->http->headers['Accept'] = 'application/json'; 36*4c0099a8SAndreas Gohr $this->http->keep_alive = false; 37*4c0099a8SAndreas Gohr $this->http->timeout = 30; 38*4c0099a8SAndreas Gohr 39*4c0099a8SAndreas Gohr if ($helper->getConf('qdrant_apikey')) { 40*4c0099a8SAndreas Gohr $this->http->headers['api-key'] = $helper->getConf('qdrant_apikey'); 41*4c0099a8SAndreas Gohr } 42*4c0099a8SAndreas Gohr } 43*4c0099a8SAndreas Gohr 44*4c0099a8SAndreas Gohr /** 45*4c0099a8SAndreas Gohr * Execute a query against the Qdrant API 46*4c0099a8SAndreas Gohr * 47*4c0099a8SAndreas Gohr * @param string $endpoint API endpoint, will be added to the base URL 48*4c0099a8SAndreas Gohr * @param mixed $data The data to send, will be JSON encoded 49*4c0099a8SAndreas Gohr * @param string $method POST|GET|PUT etc 50*4c0099a8SAndreas Gohr * @return mixed 51*4c0099a8SAndreas Gohr * @throws \Exception 52*4c0099a8SAndreas Gohr */ 53*4c0099a8SAndreas Gohr protected function runQuery($endpoint, mixed $data, $method = 'POST') 54*4c0099a8SAndreas Gohr { 55*4c0099a8SAndreas Gohr $endpoint = trim($endpoint, '/'); 56*4c0099a8SAndreas Gohr $url = $this->baseurl . '/' . $endpoint . '?wait=true'; 57*4c0099a8SAndreas Gohr 58*4c0099a8SAndreas Gohr if ($data === []) { 59*4c0099a8SAndreas Gohr $json = '{}'; 60*4c0099a8SAndreas Gohr } else { 61*4c0099a8SAndreas Gohr $json = json_encode($data, JSON_THROW_ON_ERROR); 62*4c0099a8SAndreas Gohr } 63*4c0099a8SAndreas Gohr 64*4c0099a8SAndreas Gohr $this->http->sendRequest($url, $json, $method); 65*4c0099a8SAndreas Gohr $response = $this->http->resp_body; 66*4c0099a8SAndreas Gohr 67*4c0099a8SAndreas Gohr if (!$response) { 68*4c0099a8SAndreas Gohr throw new \Exception( 69*4c0099a8SAndreas Gohr 'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status 70*4c0099a8SAndreas Gohr ); 71*4c0099a8SAndreas Gohr } 72*4c0099a8SAndreas Gohr 73*4c0099a8SAndreas Gohr try { 74*4c0099a8SAndreas Gohr $result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR); 75*4c0099a8SAndreas Gohr } catch (\Exception) { 76*4c0099a8SAndreas Gohr throw new \Exception('Qdrant API returned invalid JSON. ' . $response); 77*4c0099a8SAndreas Gohr } 78*4c0099a8SAndreas Gohr 79*4c0099a8SAndreas Gohr if ((int)$this->http->status !== 200) { 80*4c0099a8SAndreas Gohr $error = $result['status']['error'] ?? $this->http->error; 81*4c0099a8SAndreas Gohr throw new \Exception('Qdrant API returned error. ' . $error); 82*4c0099a8SAndreas Gohr } 83*4c0099a8SAndreas Gohr 84*4c0099a8SAndreas Gohr return $result['result'] ?? $result; 85*4c0099a8SAndreas Gohr } 86*4c0099a8SAndreas Gohr 87*4c0099a8SAndreas Gohr /** 88*4c0099a8SAndreas Gohr * Get the name of the collection to use 89*4c0099a8SAndreas Gohr * 90*4c0099a8SAndreas Gohr * Initializes the collection if it doesn't exist yet 91*4c0099a8SAndreas Gohr * 92*4c0099a8SAndreas Gohr * @return string 93*4c0099a8SAndreas Gohr * @throws \Exception 94*4c0099a8SAndreas Gohr */ 95*4c0099a8SAndreas Gohr public function getCollection() 96*4c0099a8SAndreas Gohr { 97*4c0099a8SAndreas Gohr if ($this->collection) return $this->collection; 98*4c0099a8SAndreas Gohr 99*4c0099a8SAndreas Gohr try { 100*4c0099a8SAndreas Gohr $this->runQuery('/collections/' . $this->collectionName, '', 'GET'); 101*4c0099a8SAndreas Gohr $this->collection = $this->collectionName; 102*4c0099a8SAndreas Gohr return $this->collection; // collection exists 103*4c0099a8SAndreas Gohr } catch (\Exception) { 104*4c0099a8SAndreas Gohr // collection seems not to exist 105*4c0099a8SAndreas Gohr } 106*4c0099a8SAndreas Gohr 107*4c0099a8SAndreas Gohr $data = [ 108*4c0099a8SAndreas Gohr 'vectors' => [ 109*4c0099a8SAndreas Gohr 'size' => 1536, // FIXME should not be hardcoded 110*4c0099a8SAndreas Gohr 'distance' => 'Cosine', 111*4c0099a8SAndreas Gohr ] 112*4c0099a8SAndreas Gohr ]; 113*4c0099a8SAndreas Gohr 114*4c0099a8SAndreas Gohr // create the collection 115*4c0099a8SAndreas Gohr $this->runQuery('/collections/' . $this->collectionName, $data, 'PUT'); 116*4c0099a8SAndreas Gohr $this->collection = $this->collectionName; 117*4c0099a8SAndreas Gohr 118*4c0099a8SAndreas Gohr return $this->collection; 119*4c0099a8SAndreas Gohr } 120*4c0099a8SAndreas Gohr 121*4c0099a8SAndreas Gohr /** @inheritdoc */ 122*4c0099a8SAndreas Gohr public function startCreation($clear = false) 123*4c0099a8SAndreas Gohr { 124*4c0099a8SAndreas Gohr if (!$clear) return; 125*4c0099a8SAndreas Gohr 126*4c0099a8SAndreas Gohr // if a collection exists, delete it 127*4c0099a8SAndreas Gohr $collection = $this->getCollection(); 128*4c0099a8SAndreas Gohr if ($collection) { 129*4c0099a8SAndreas Gohr $this->runQuery('/collections/' . $collection, '', 'DELETE'); 130*4c0099a8SAndreas Gohr $this->collection = ''; 131*4c0099a8SAndreas Gohr } 132*4c0099a8SAndreas Gohr } 133*4c0099a8SAndreas Gohr 134*4c0099a8SAndreas Gohr /** @inheritdoc */ 135*4c0099a8SAndreas Gohr public function getChunk($chunkID) 136*4c0099a8SAndreas Gohr { 137*4c0099a8SAndreas Gohr try { 138*4c0099a8SAndreas Gohr $data = $this->runQuery( 139*4c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points/' . $chunkID, 140*4c0099a8SAndreas Gohr '', 141*4c0099a8SAndreas Gohr 'GET' 142*4c0099a8SAndreas Gohr ); 143*4c0099a8SAndreas Gohr } catch (\Exception) { 144*4c0099a8SAndreas Gohr // no such point 145*4c0099a8SAndreas Gohr return null; 146*4c0099a8SAndreas Gohr } 147*4c0099a8SAndreas Gohr 148*4c0099a8SAndreas Gohr return new Chunk( 149*4c0099a8SAndreas Gohr $data['payload']['page'], 150*4c0099a8SAndreas Gohr (int)$data['id'], 151*4c0099a8SAndreas Gohr $data['payload']['text'], 152*4c0099a8SAndreas Gohr $data['vector'], 153*4c0099a8SAndreas Gohr $data['payload']['language'] ?? '', 154*4c0099a8SAndreas Gohr (int)$data['payload']['created'] 155*4c0099a8SAndreas Gohr ); 156*4c0099a8SAndreas Gohr } 157*4c0099a8SAndreas Gohr 158*4c0099a8SAndreas Gohr 159*4c0099a8SAndreas Gohr /** @inheritdoc */ 160*4c0099a8SAndreas Gohr public function reusePageChunks($page, $firstChunkID) 161*4c0099a8SAndreas Gohr { 162*4c0099a8SAndreas Gohr // no-op 163*4c0099a8SAndreas Gohr } 164*4c0099a8SAndreas Gohr 165*4c0099a8SAndreas Gohr /** @inheritdoc */ 166*4c0099a8SAndreas Gohr public function deletePageChunks($page, $firstChunkID) 167*4c0099a8SAndreas Gohr { 168*4c0099a8SAndreas Gohr // delete all possible chunk IDs 169*4c0099a8SAndreas Gohr $ids = range($firstChunkID, $firstChunkID + 99, 1); 170*4c0099a8SAndreas Gohr 171*4c0099a8SAndreas Gohr $this->runQuery( 172*4c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points/delete', 173*4c0099a8SAndreas Gohr [ 174*4c0099a8SAndreas Gohr 'points' => $ids 175*4c0099a8SAndreas Gohr ], 176*4c0099a8SAndreas Gohr 'POST' 177*4c0099a8SAndreas Gohr ); 178*4c0099a8SAndreas Gohr } 179*4c0099a8SAndreas Gohr 180*4c0099a8SAndreas Gohr /** @inheritdoc */ 181*4c0099a8SAndreas Gohr public function addPageChunks($chunks) 182*4c0099a8SAndreas Gohr { 183*4c0099a8SAndreas Gohr $points = []; 184*4c0099a8SAndreas Gohr foreach ($chunks as $chunk) { 185*4c0099a8SAndreas Gohr $points[] = [ 186*4c0099a8SAndreas Gohr 'id' => $chunk->getId(), 187*4c0099a8SAndreas Gohr 'vector' => $chunk->getEmbedding(), 188*4c0099a8SAndreas Gohr 'payload' => [ 189*4c0099a8SAndreas Gohr 'page' => $chunk->getPage(), 190*4c0099a8SAndreas Gohr 'text' => $chunk->getText(), 191*4c0099a8SAndreas Gohr 'created' => $chunk->getCreated(), 192*4c0099a8SAndreas Gohr 'language' => $chunk->getLanguage() 193*4c0099a8SAndreas Gohr ] 194*4c0099a8SAndreas Gohr ]; 195*4c0099a8SAndreas Gohr } 196*4c0099a8SAndreas Gohr 197*4c0099a8SAndreas Gohr $this->runQuery( 198*4c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points', 199*4c0099a8SAndreas Gohr [ 200*4c0099a8SAndreas Gohr 'points' => $points 201*4c0099a8SAndreas Gohr ], 202*4c0099a8SAndreas Gohr 'PUT' 203*4c0099a8SAndreas Gohr ); 204*4c0099a8SAndreas Gohr } 205*4c0099a8SAndreas Gohr 206*4c0099a8SAndreas Gohr /** @inheritdoc */ 207*4c0099a8SAndreas Gohr public function finalizeCreation() 208*4c0099a8SAndreas Gohr { 209*4c0099a8SAndreas Gohr // no-op 210*4c0099a8SAndreas Gohr } 211*4c0099a8SAndreas Gohr 212*4c0099a8SAndreas Gohr /** @inheritdoc */ 213*4c0099a8SAndreas Gohr public function runMaintenance() 214*4c0099a8SAndreas Gohr { 215*4c0099a8SAndreas Gohr // no-op 216*4c0099a8SAndreas Gohr } 217*4c0099a8SAndreas Gohr 218*4c0099a8SAndreas Gohr /** @inheritdoc */ 219*4c0099a8SAndreas Gohr public function getPageChunks($page, $firstChunkID) 220*4c0099a8SAndreas Gohr { 221*4c0099a8SAndreas Gohr $ids = range($firstChunkID, $firstChunkID + 99, 1); 222*4c0099a8SAndreas Gohr 223*4c0099a8SAndreas Gohr $data = $this->runQuery( 224*4c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points', 225*4c0099a8SAndreas Gohr [ 226*4c0099a8SAndreas Gohr 'ids' => $ids, 227*4c0099a8SAndreas Gohr 'with_payload' => true, 228*4c0099a8SAndreas Gohr 'with_vector' => true, 229*4c0099a8SAndreas Gohr ], 230*4c0099a8SAndreas Gohr 'POST' 231*4c0099a8SAndreas Gohr ); 232*4c0099a8SAndreas Gohr 233*4c0099a8SAndreas Gohr if (!$data) return []; 234*4c0099a8SAndreas Gohr 235*4c0099a8SAndreas Gohr $chunks = []; 236*4c0099a8SAndreas Gohr foreach ($data as $point) { 237*4c0099a8SAndreas Gohr $chunks[] = new Chunk( 238*4c0099a8SAndreas Gohr $point['payload']['page'], 239*4c0099a8SAndreas Gohr (int)$point['id'], 240*4c0099a8SAndreas Gohr $point['payload']['text'], 241*4c0099a8SAndreas Gohr $point['vector'], 242*4c0099a8SAndreas Gohr $point['payload']['language'] ?? '', 243*4c0099a8SAndreas Gohr (int)$point['payload']['created'] 244*4c0099a8SAndreas Gohr ); 245*4c0099a8SAndreas Gohr } 246*4c0099a8SAndreas Gohr return $chunks; 247*4c0099a8SAndreas Gohr } 248*4c0099a8SAndreas Gohr 249*4c0099a8SAndreas Gohr /** @inheritdoc */ 250*4c0099a8SAndreas Gohr public function getSimilarChunks($vector, $lang = '', $limit = 4) 251*4c0099a8SAndreas Gohr { 252*4c0099a8SAndreas Gohr $limit *= 2; // we can't check ACLs, so we return more than requested 253*4c0099a8SAndreas Gohr 254*4c0099a8SAndreas Gohr if ($lang) { 255*4c0099a8SAndreas Gohr $filter = [ 256*4c0099a8SAndreas Gohr 'must' => [ 257*4c0099a8SAndreas Gohr [ 258*4c0099a8SAndreas Gohr 'key' => 'language', 259*4c0099a8SAndreas Gohr 'match' => [ 260*4c0099a8SAndreas Gohr 'value' => $lang 261*4c0099a8SAndreas Gohr ], 262*4c0099a8SAndreas Gohr ] 263*4c0099a8SAndreas Gohr ] 264*4c0099a8SAndreas Gohr ]; 265*4c0099a8SAndreas Gohr } else { 266*4c0099a8SAndreas Gohr $filter = null; 267*4c0099a8SAndreas Gohr } 268*4c0099a8SAndreas Gohr 269*4c0099a8SAndreas Gohr $data = $this->runQuery( 270*4c0099a8SAndreas Gohr '/collections/' . $this->getCollection() . '/points/search', 271*4c0099a8SAndreas Gohr [ 272*4c0099a8SAndreas Gohr 'vector' => $vector, 273*4c0099a8SAndreas Gohr 'limit' => (int)$limit, 274*4c0099a8SAndreas Gohr 'filter' => $filter, 275*4c0099a8SAndreas Gohr 'with_payload' => true, 276*4c0099a8SAndreas Gohr 'with_vector' => true, 277*4c0099a8SAndreas Gohr ] 278*4c0099a8SAndreas Gohr ); 279*4c0099a8SAndreas Gohr 280*4c0099a8SAndreas Gohr $chunks = []; 281*4c0099a8SAndreas Gohr foreach ($data as $point) { 282*4c0099a8SAndreas Gohr $chunks[] = new Chunk( 283*4c0099a8SAndreas Gohr $point['payload']['page'], 284*4c0099a8SAndreas Gohr (int)$point['id'], 285*4c0099a8SAndreas Gohr $point['payload']['text'], 286*4c0099a8SAndreas Gohr $point['vector'], 287*4c0099a8SAndreas Gohr $point['payload']['language'] ?? '', 288*4c0099a8SAndreas Gohr (int)$point['payload']['created'], 289*4c0099a8SAndreas Gohr $point['score'] 290*4c0099a8SAndreas Gohr ); 291*4c0099a8SAndreas Gohr } 292*4c0099a8SAndreas Gohr return $chunks; 293*4c0099a8SAndreas Gohr } 294*4c0099a8SAndreas Gohr 295*4c0099a8SAndreas Gohr /** @inheritdoc */ 296*4c0099a8SAndreas Gohr public function statistics() 297*4c0099a8SAndreas Gohr { 298*4c0099a8SAndreas Gohr 299*4c0099a8SAndreas Gohr $info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET'); 300*4c0099a8SAndreas Gohr $telemetry = $this->runQuery('/telemetry', '', 'GET'); 301*4c0099a8SAndreas Gohr 302*4c0099a8SAndreas Gohr return [ 303*4c0099a8SAndreas Gohr 'qdrant_version' => $telemetry['app']['version'], 304*4c0099a8SAndreas Gohr 'vector_config' => $info['config']['params']['vectors'], 305*4c0099a8SAndreas Gohr 'chunks' => $info['vectors_count'], 306*4c0099a8SAndreas Gohr 'segments' => $info['segments_count'], 307*4c0099a8SAndreas Gohr 'status' => $info['status'], 308*4c0099a8SAndreas Gohr ]; 309*4c0099a8SAndreas Gohr } 310*4c0099a8SAndreas Gohr} 311