1f6ef2e50SAndreas Gohr<?php 2f6ef2e50SAndreas Gohr 3f6ef2e50SAndreas Gohrnamespace dokuwiki\plugin\aichat\Storage; 4f6ef2e50SAndreas Gohr 5f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 6f6ef2e50SAndreas Gohr 7f6ef2e50SAndreas Gohr/** 8f6ef2e50SAndreas Gohr * Defines a vector storage for page chunks and their embeddings 9f6ef2e50SAndreas Gohr * 10f6ef2e50SAndreas Gohr * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in 11f6ef2e50SAndreas Gohr * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have 12f6ef2e50SAndreas Gohr * chunks 1200, 1201, 1202, ... 13f6ef2e50SAndreas Gohr */ 14f6ef2e50SAndreas Gohrabstract class AbstractStorage 15f6ef2e50SAndreas Gohr{ 16f6ef2e50SAndreas Gohr 17f6ef2e50SAndreas Gohr /** 18f6ef2e50SAndreas Gohr * Get the chunk with the given ID 19f6ef2e50SAndreas Gohr * 20f6ef2e50SAndreas Gohr * @param int $chunkID 21f6ef2e50SAndreas Gohr * @return Chunk|null 22f6ef2e50SAndreas Gohr */ 23f6ef2e50SAndreas Gohr abstract public function getChunk($chunkID); 24f6ef2e50SAndreas Gohr 25f6ef2e50SAndreas Gohr /** 26f6ef2e50SAndreas Gohr * Called when the storage is about to be (re)built 27f6ef2e50SAndreas Gohr * 28f6ef2e50SAndreas Gohr * Storages may need to open a transaction or prepare other things here. 29f6ef2e50SAndreas Gohr * 30f6ef2e50SAndreas Gohr * @param bool $clear Should any existing data been thrown away? 31f6ef2e50SAndreas Gohr * @return void 32f6ef2e50SAndreas Gohr */ 33f6ef2e50SAndreas Gohr abstract public function startCreation($clear = false); 34f6ef2e50SAndreas Gohr 35f6ef2e50SAndreas Gohr /** 36f6ef2e50SAndreas Gohr * Called when the storage is (re)built and the existing chunks should be reused 37f6ef2e50SAndreas Gohr * 38f6ef2e50SAndreas Gohr * Storages that can be updated, may simply do nothing here 39f6ef2e50SAndreas Gohr * 40f6ef2e50SAndreas Gohr * @param string $page The page the chunks belong to 41f6ef2e50SAndreas Gohr * @param int $firstChunkID The ID of the first chunk to reuse 42f6ef2e50SAndreas Gohr * @return void 43f6ef2e50SAndreas Gohr */ 44f6ef2e50SAndreas Gohr abstract public function reusePageChunks($page, $firstChunkID); 45f6ef2e50SAndreas Gohr 46f6ef2e50SAndreas Gohr /** 47f6ef2e50SAndreas Gohr * Delete all chunks associated with the given page 48f6ef2e50SAndreas Gohr * 49f6ef2e50SAndreas Gohr * @param string $page The page the chunks belong to 50*01f06932SAndreas Gohr * @param int $firstChunkID The ID of the first chunk 51f6ef2e50SAndreas Gohr * @return void 52f6ef2e50SAndreas Gohr */ 53f6ef2e50SAndreas Gohr abstract public function deletePageChunks($page, $firstChunkID); 54f6ef2e50SAndreas Gohr 55f6ef2e50SAndreas Gohr /** 56f6ef2e50SAndreas Gohr * Add the given new Chunks to the storage 57f6ef2e50SAndreas Gohr * 58f6ef2e50SAndreas Gohr * @param Chunk[] $chunks 59f6ef2e50SAndreas Gohr * @return void 60f6ef2e50SAndreas Gohr */ 61f6ef2e50SAndreas Gohr abstract public function addPageChunks($chunks); 62f6ef2e50SAndreas Gohr 63f6ef2e50SAndreas Gohr /** 64f6ef2e50SAndreas Gohr * All chunks have been added, finalize the storage 65f6ef2e50SAndreas Gohr * 66f6ef2e50SAndreas Gohr * This is where transactions may be committed and or memory structures be written to disk. 67f6ef2e50SAndreas Gohr * 68f6ef2e50SAndreas Gohr * @return void 69f6ef2e50SAndreas Gohr */ 70f6ef2e50SAndreas Gohr abstract public function finalizeCreation(); 71f6ef2e50SAndreas Gohr 72f6ef2e50SAndreas Gohr /** 73*01f06932SAndreas Gohr * Get all chunks associated with the given page 74*01f06932SAndreas Gohr * 75*01f06932SAndreas Gohr * @param string $page The page the chunks belong to 76*01f06932SAndreas Gohr * @param int $firstChunkID The ID of the first chunk 77*01f06932SAndreas Gohr * @return Chunk[] 78*01f06932SAndreas Gohr */ 79*01f06932SAndreas Gohr abstract public function getPageChunks($page, $firstChunkID); 80*01f06932SAndreas Gohr 81*01f06932SAndreas Gohr /** 82f6ef2e50SAndreas Gohr * Get the chunks most similar to the given vector, using a nearest neighbor search 83f6ef2e50SAndreas Gohr * 84f6ef2e50SAndreas Gohr * The returned chunks should be sorted by similarity, most similar first. 85f6ef2e50SAndreas Gohr * 86f6ef2e50SAndreas Gohr * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check). 87f6ef2e50SAndreas Gohr * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones. 88f6ef2e50SAndreas Gohr * 89f6ef2e50SAndreas Gohr * @param float[] $vector The vector to compare to 90f6ef2e50SAndreas Gohr * @param int $limit The number of results to return, see note above 91f6ef2e50SAndreas Gohr * @return Chunk[] 92f6ef2e50SAndreas Gohr */ 93f6ef2e50SAndreas Gohr abstract public function getSimilarChunks($vector, $limit = 4); 94f6ef2e50SAndreas Gohr 95f6ef2e50SAndreas Gohr /** 96f6ef2e50SAndreas Gohr * Get information about the storage 97f6ef2e50SAndreas Gohr * 98f6ef2e50SAndreas Gohr * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory. 99f6ef2e50SAndreas Gohr * 100f6ef2e50SAndreas Gohr * @return string[] 101f6ef2e50SAndreas Gohr */ 102f6ef2e50SAndreas Gohr abstract public function statistics(); 103f6ef2e50SAndreas Gohr} 104