1f6ef2e50SAndreas Gohr<?php 2f6ef2e50SAndreas Gohr 3f6ef2e50SAndreas Gohrnamespace dokuwiki\plugin\aichat\Storage; 4f6ef2e50SAndreas Gohr 53379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin; 6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk; 7f6ef2e50SAndreas Gohr 8f6ef2e50SAndreas Gohr/** 9f6ef2e50SAndreas Gohr * Defines a vector storage for page chunks and their embeddings 10f6ef2e50SAndreas Gohr * 11f6ef2e50SAndreas Gohr * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in 12f6ef2e50SAndreas Gohr * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have 13f6ef2e50SAndreas Gohr * chunks 1200, 1201, 1202, ... 14f6ef2e50SAndreas Gohr */ 15f6ef2e50SAndreas Gohrabstract class AbstractStorage 16f6ef2e50SAndreas Gohr{ 173379af09SAndreas Gohr /** @var CLIPlugin $logger */ 183379af09SAndreas Gohr protected $logger; 193379af09SAndreas Gohr 203379af09SAndreas Gohr /** 21*04afb84fSAndreas Gohr * @param array $config The plugin's configuration 22*04afb84fSAndreas Gohr */ 23*04afb84fSAndreas Gohr abstract public function __construct(array $config); 24*04afb84fSAndreas Gohr 25*04afb84fSAndreas Gohr /** 263379af09SAndreas Gohr * @param CLIPlugin $logger 273379af09SAndreas Gohr * @return void 283379af09SAndreas Gohr */ 297ebc7895Ssplitbrain public function setLogger($logger) 307ebc7895Ssplitbrain { 313379af09SAndreas Gohr $this->logger = $logger; 323379af09SAndreas Gohr } 33f6ef2e50SAndreas Gohr 34f6ef2e50SAndreas Gohr /** 35f6ef2e50SAndreas Gohr * Get the chunk with the given ID 36f6ef2e50SAndreas Gohr * 37f6ef2e50SAndreas Gohr * @param int $chunkID 38f6ef2e50SAndreas Gohr * @return Chunk|null 39f6ef2e50SAndreas Gohr */ 40f6ef2e50SAndreas Gohr abstract public function getChunk($chunkID); 41f6ef2e50SAndreas Gohr 42f6ef2e50SAndreas Gohr /** 43f6ef2e50SAndreas Gohr * Called when the storage is about to be (re)built 44f6ef2e50SAndreas Gohr * 45f6ef2e50SAndreas Gohr * Storages may need to open a transaction or prepare other things here. 46f6ef2e50SAndreas Gohr * 47f6ef2e50SAndreas Gohr * @param bool $clear Should any existing data been thrown away? 48f6ef2e50SAndreas Gohr * @return void 49f6ef2e50SAndreas Gohr */ 50f6ef2e50SAndreas Gohr abstract public function startCreation($clear = false); 51f6ef2e50SAndreas Gohr 52f6ef2e50SAndreas Gohr /** 53f6ef2e50SAndreas Gohr * Called when the storage is (re)built and the existing chunks should be reused 54f6ef2e50SAndreas Gohr * 55f6ef2e50SAndreas Gohr * Storages that can be updated, may simply do nothing here 56f6ef2e50SAndreas Gohr * 57f6ef2e50SAndreas Gohr * @param string $page The page the chunks belong to 58f6ef2e50SAndreas Gohr * @param int $firstChunkID The ID of the first chunk to reuse 59f6ef2e50SAndreas Gohr * @return void 60f6ef2e50SAndreas Gohr */ 61f6ef2e50SAndreas Gohr abstract public function reusePageChunks($page, $firstChunkID); 62f6ef2e50SAndreas Gohr 63f6ef2e50SAndreas Gohr /** 64f6ef2e50SAndreas Gohr * Delete all chunks associated with the given page 65f6ef2e50SAndreas Gohr * 66f6ef2e50SAndreas Gohr * @param string $page The page the chunks belong to 6701f06932SAndreas Gohr * @param int $firstChunkID The ID of the first chunk 68f6ef2e50SAndreas Gohr * @return void 69f6ef2e50SAndreas Gohr */ 70f6ef2e50SAndreas Gohr abstract public function deletePageChunks($page, $firstChunkID); 71f6ef2e50SAndreas Gohr 72f6ef2e50SAndreas Gohr /** 73f6ef2e50SAndreas Gohr * Add the given new Chunks to the storage 74f6ef2e50SAndreas Gohr * 75f6ef2e50SAndreas Gohr * @param Chunk[] $chunks 76f6ef2e50SAndreas Gohr * @return void 77f6ef2e50SAndreas Gohr */ 78f6ef2e50SAndreas Gohr abstract public function addPageChunks($chunks); 79f6ef2e50SAndreas Gohr 80f6ef2e50SAndreas Gohr /** 81f6ef2e50SAndreas Gohr * All chunks have been added, finalize the storage 82f6ef2e50SAndreas Gohr * 83f6ef2e50SAndreas Gohr * This is where transactions may be committed and or memory structures be written to disk. 84f6ef2e50SAndreas Gohr * 85f6ef2e50SAndreas Gohr * @return void 86f6ef2e50SAndreas Gohr */ 87f6ef2e50SAndreas Gohr abstract public function finalizeCreation(); 88f6ef2e50SAndreas Gohr 89f6ef2e50SAndreas Gohr /** 903379af09SAndreas Gohr * Run maintenance tasks on the storage 913379af09SAndreas Gohr * 923379af09SAndreas Gohr * Each storage can decide on it's own what to do here. Documentation should explain 933379af09SAndreas Gohr * how often this should be run. 943379af09SAndreas Gohr * 953379af09SAndreas Gohr * @return void 963379af09SAndreas Gohr */ 973379af09SAndreas Gohr abstract public function runMaintenance(); 983379af09SAndreas Gohr 993379af09SAndreas Gohr /** 10001f06932SAndreas Gohr * Get all chunks associated with the given page 10101f06932SAndreas Gohr * 10201f06932SAndreas Gohr * @param string $page The page the chunks belong to 10301f06932SAndreas Gohr * @param int $firstChunkID The ID of the first chunk 10401f06932SAndreas Gohr * @return Chunk[] 10501f06932SAndreas Gohr */ 10601f06932SAndreas Gohr abstract public function getPageChunks($page, $firstChunkID); 10701f06932SAndreas Gohr 10801f06932SAndreas Gohr /** 109f6ef2e50SAndreas Gohr * Get the chunks most similar to the given vector, using a nearest neighbor search 110f6ef2e50SAndreas Gohr * 111f6ef2e50SAndreas Gohr * The returned chunks should be sorted by similarity, most similar first. 112f6ef2e50SAndreas Gohr * 113f6ef2e50SAndreas Gohr * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check). 114f6ef2e50SAndreas Gohr * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones. 115f6ef2e50SAndreas Gohr * 116f6ef2e50SAndreas Gohr * @param float[] $vector The vector to compare to 117e33a1d7aSAndreas Gohr * @param string $lang Limit results to this language. When empty consider all languages 118f6ef2e50SAndreas Gohr * @param int $limit The number of results to return, see note above 119f6ef2e50SAndreas Gohr * @return Chunk[] 120f6ef2e50SAndreas Gohr */ 121e33a1d7aSAndreas Gohr abstract public function getSimilarChunks($vector, $lang = '', $limit = 4); 122f6ef2e50SAndreas Gohr 123f6ef2e50SAndreas Gohr /** 124f6ef2e50SAndreas Gohr * Get information about the storage 125f6ef2e50SAndreas Gohr * 126f6ef2e50SAndreas Gohr * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory. 127f6ef2e50SAndreas Gohr * 128f6ef2e50SAndreas Gohr * @return string[] 129f6ef2e50SAndreas Gohr */ 130f6ef2e50SAndreas Gohr abstract public function statistics(); 1318c8b7ba6SAndreas Gohr 1328c8b7ba6SAndreas Gohr /** 1338c8b7ba6SAndreas Gohr * Writes TSV files for visualizing with http://projector.tensorflow.org/ 1348c8b7ba6SAndreas Gohr * 1358c8b7ba6SAndreas Gohr * @param string $vectorfile path to the file with the vectors 1368c8b7ba6SAndreas Gohr * @param string $metafile path to the file with the metadata 1378c8b7ba6SAndreas Gohr * @return void 1388c8b7ba6SAndreas Gohr */ 1397ebc7895Ssplitbrain public function dumpTSV($vectorfile, $metafile) 1407ebc7895Ssplitbrain { 1418c8b7ba6SAndreas Gohr throw new \RuntimeException('Not implemented for current storage'); 1428c8b7ba6SAndreas Gohr } 143f6ef2e50SAndreas Gohr} 144