1<?php 2 3namespace dokuwiki\plugin\aichat\Storage; 4 5use dokuwiki\plugin\aichat\Chunk; 6 7/** 8 * Defines a vector storage for page chunks and their embeddings 9 * 10 * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in 11 * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have 12 * chunks 1200, 1201, 1202, ... 13 */ 14abstract class AbstractStorage 15{ 16 17 /** 18 * Get the chunk with the given ID 19 * 20 * @param int $chunkID 21 * @return Chunk|null 22 */ 23 abstract public function getChunk($chunkID); 24 25 /** 26 * Called when the storage is about to be (re)built 27 * 28 * Storages may need to open a transaction or prepare other things here. 29 * 30 * @param bool $clear Should any existing data been thrown away? 31 * @return void 32 */ 33 abstract public function startCreation($clear = false); 34 35 /** 36 * Called when the storage is (re)built and the existing chunks should be reused 37 * 38 * Storages that can be updated, may simply do nothing here 39 * 40 * @param string $page The page the chunks belong to 41 * @param int $firstChunkID The ID of the first chunk to reuse 42 * @return void 43 */ 44 abstract public function reusePageChunks($page, $firstChunkID); 45 46 /** 47 * Delete all chunks associated with the given page 48 * 49 * @param string $page The page the chunks belong to 50 * @param int $firstChunkID The ID of the first chunk 51 * @return void 52 */ 53 abstract public function deletePageChunks($page, $firstChunkID); 54 55 /** 56 * Add the given new Chunks to the storage 57 * 58 * @param Chunk[] $chunks 59 * @return void 60 */ 61 abstract public function addPageChunks($chunks); 62 63 /** 64 * All chunks have been added, finalize the storage 65 * 66 * This is where transactions may be committed and or memory structures be written to disk. 67 * 68 * @return void 69 */ 70 abstract public function finalizeCreation(); 71 72 /** 73 * Get all chunks associated with the given page 74 * 75 * @param string $page The page the chunks belong to 76 * @param int $firstChunkID The ID of the first chunk 77 * @return Chunk[] 78 */ 79 abstract public function getPageChunks($page, $firstChunkID); 80 81 /** 82 * Get the chunks most similar to the given vector, using a nearest neighbor search 83 * 84 * The returned chunks should be sorted by similarity, most similar first. 85 * 86 * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check). 87 * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones. 88 * 89 * @param float[] $vector The vector to compare to 90 * @param int $limit The number of results to return, see note above 91 * @return Chunk[] 92 */ 93 abstract public function getSimilarChunks($vector, $limit = 4); 94 95 /** 96 * Get information about the storage 97 * 98 * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory. 99 * 100 * @return string[] 101 */ 102 abstract public function statistics(); 103} 104