1*f6ef2e50SAndreas Gohr<?php 2*f6ef2e50SAndreas Gohr 3*f6ef2e50SAndreas Gohrnamespace dokuwiki\plugin\aichat\Storage; 4*f6ef2e50SAndreas Gohr 5*f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 6*f6ef2e50SAndreas Gohr 7*f6ef2e50SAndreas Gohr/** 8*f6ef2e50SAndreas Gohr * Defines a vector storage for page chunks and their embeddings 9*f6ef2e50SAndreas Gohr * 10*f6ef2e50SAndreas Gohr * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in 11*f6ef2e50SAndreas Gohr * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have 12*f6ef2e50SAndreas Gohr * chunks 1200, 1201, 1202, ... 13*f6ef2e50SAndreas Gohr */ 14*f6ef2e50SAndreas Gohrabstract class AbstractStorage 15*f6ef2e50SAndreas Gohr{ 16*f6ef2e50SAndreas Gohr 17*f6ef2e50SAndreas Gohr /** 18*f6ef2e50SAndreas Gohr * Get the chunk with the given ID 19*f6ef2e50SAndreas Gohr * 20*f6ef2e50SAndreas Gohr * @param int $chunkID 21*f6ef2e50SAndreas Gohr * @return Chunk|null 22*f6ef2e50SAndreas Gohr */ 23*f6ef2e50SAndreas Gohr abstract public function getChunk($chunkID); 24*f6ef2e50SAndreas Gohr 25*f6ef2e50SAndreas Gohr /** 26*f6ef2e50SAndreas Gohr * Called when the storage is about to be (re)built 27*f6ef2e50SAndreas Gohr * 28*f6ef2e50SAndreas Gohr * Storages may need to open a transaction or prepare other things here. 29*f6ef2e50SAndreas Gohr * 30*f6ef2e50SAndreas Gohr * @param bool $clear Should any existing data been thrown away? 31*f6ef2e50SAndreas Gohr * @return void 32*f6ef2e50SAndreas Gohr */ 33*f6ef2e50SAndreas Gohr abstract public function startCreation($clear = false); 34*f6ef2e50SAndreas Gohr 35*f6ef2e50SAndreas Gohr /** 36*f6ef2e50SAndreas Gohr * Called when the storage is (re)built and the existing chunks should be reused 37*f6ef2e50SAndreas Gohr * 38*f6ef2e50SAndreas Gohr * Storages that can be updated, may simply do nothing here 39*f6ef2e50SAndreas Gohr * 40*f6ef2e50SAndreas Gohr * @param string $page The page the chunks belong to 41*f6ef2e50SAndreas Gohr * @param int $firstChunkID The ID of the first chunk to reuse 42*f6ef2e50SAndreas Gohr * @return void 43*f6ef2e50SAndreas Gohr */ 44*f6ef2e50SAndreas Gohr abstract public function reusePageChunks($page, $firstChunkID); 45*f6ef2e50SAndreas Gohr 46*f6ef2e50SAndreas Gohr /** 47*f6ef2e50SAndreas Gohr * Delete all chunks associated with the given page 48*f6ef2e50SAndreas Gohr * 49*f6ef2e50SAndreas Gohr * @param string $page The page the chunks belong to 50*f6ef2e50SAndreas Gohr * @param int $firstChunkID The ID of the first chunk to reuse (may not exist) 51*f6ef2e50SAndreas Gohr * @return void 52*f6ef2e50SAndreas Gohr */ 53*f6ef2e50SAndreas Gohr abstract public function deletePageChunks($page, $firstChunkID); 54*f6ef2e50SAndreas Gohr 55*f6ef2e50SAndreas Gohr /** 56*f6ef2e50SAndreas Gohr * Add the given new Chunks to the storage 57*f6ef2e50SAndreas Gohr * 58*f6ef2e50SAndreas Gohr * @param Chunk[] $chunks 59*f6ef2e50SAndreas Gohr * @return void 60*f6ef2e50SAndreas Gohr */ 61*f6ef2e50SAndreas Gohr abstract public function addPageChunks($chunks); 62*f6ef2e50SAndreas Gohr 63*f6ef2e50SAndreas Gohr /** 64*f6ef2e50SAndreas Gohr * All chunks have been added, finalize the storage 65*f6ef2e50SAndreas Gohr * 66*f6ef2e50SAndreas Gohr * This is where transactions may be committed and or memory structures be written to disk. 67*f6ef2e50SAndreas Gohr * 68*f6ef2e50SAndreas Gohr * @return void 69*f6ef2e50SAndreas Gohr */ 70*f6ef2e50SAndreas Gohr abstract public function finalizeCreation(); 71*f6ef2e50SAndreas Gohr 72*f6ef2e50SAndreas Gohr /** 73*f6ef2e50SAndreas Gohr * Get the chunks most similar to the given vector, using a nearest neighbor search 74*f6ef2e50SAndreas Gohr * 75*f6ef2e50SAndreas Gohr * The returned chunks should be sorted by similarity, most similar first. 76*f6ef2e50SAndreas Gohr * 77*f6ef2e50SAndreas Gohr * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check). 78*f6ef2e50SAndreas Gohr * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones. 79*f6ef2e50SAndreas Gohr * 80*f6ef2e50SAndreas Gohr * @param float[] $vector The vector to compare to 81*f6ef2e50SAndreas Gohr * @param int $limit The number of results to return, see note above 82*f6ef2e50SAndreas Gohr * @return Chunk[] 83*f6ef2e50SAndreas Gohr */ 84*f6ef2e50SAndreas Gohr abstract public function getSimilarChunks($vector, $limit = 4); 85*f6ef2e50SAndreas Gohr 86*f6ef2e50SAndreas Gohr /** 87*f6ef2e50SAndreas Gohr * Get information about the storage 88*f6ef2e50SAndreas Gohr * 89*f6ef2e50SAndreas Gohr * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory. 90*f6ef2e50SAndreas Gohr * 91*f6ef2e50SAndreas Gohr * @return string[] 92*f6ef2e50SAndreas Gohr */ 93*f6ef2e50SAndreas Gohr abstract public function statistics(); 94*f6ef2e50SAndreas Gohr} 95