1<?php 2 3namespace dokuwiki\plugin\aichat\Storage; 4 5use dokuwiki\Extension\CLIPlugin; 6use dokuwiki\plugin\aichat\Chunk; 7 8/** 9 * Defines a vector storage for page chunks and their embeddings 10 * 11 * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in 12 * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have 13 * chunks 1200, 1201, 1202, ... 14 */ 15abstract class AbstractStorage 16{ 17 /** @var CLIPlugin $logger */ 18 protected $logger; 19 20 /** 21 * @param CLIPlugin $logger 22 * @return void 23 */ 24 public function setLogger($logger) { 25 $this->logger = $logger; 26 } 27 28 /** 29 * Get the chunk with the given ID 30 * 31 * @param int $chunkID 32 * @return Chunk|null 33 */ 34 abstract public function getChunk($chunkID); 35 36 /** 37 * Called when the storage is about to be (re)built 38 * 39 * Storages may need to open a transaction or prepare other things here. 40 * 41 * @param bool $clear Should any existing data been thrown away? 42 * @return void 43 */ 44 abstract public function startCreation($clear = false); 45 46 /** 47 * Called when the storage is (re)built and the existing chunks should be reused 48 * 49 * Storages that can be updated, may simply do nothing here 50 * 51 * @param string $page The page the chunks belong to 52 * @param int $firstChunkID The ID of the first chunk to reuse 53 * @return void 54 */ 55 abstract public function reusePageChunks($page, $firstChunkID); 56 57 /** 58 * Delete all chunks associated with the given page 59 * 60 * @param string $page The page the chunks belong to 61 * @param int $firstChunkID The ID of the first chunk 62 * @return void 63 */ 64 abstract public function deletePageChunks($page, $firstChunkID); 65 66 /** 67 * Add the given new Chunks to the storage 68 * 69 * @param Chunk[] $chunks 70 * @return void 71 */ 72 abstract public function addPageChunks($chunks); 73 74 /** 75 * All chunks have been added, finalize the storage 76 * 77 * This is where transactions may be committed and or memory structures be written to disk. 78 * 79 * @return void 80 */ 81 abstract public function finalizeCreation(); 82 83 /** 84 * Run maintenance tasks on the storage 85 * 86 * Each storage can decide on it's own what to do here. Documentation should explain 87 * how often this should be run. 88 * 89 * @return void 90 */ 91 abstract public function runMaintenance(); 92 93 /** 94 * Get all chunks associated with the given page 95 * 96 * @param string $page The page the chunks belong to 97 * @param int $firstChunkID The ID of the first chunk 98 * @return Chunk[] 99 */ 100 abstract public function getPageChunks($page, $firstChunkID); 101 102 /** 103 * Get the chunks most similar to the given vector, using a nearest neighbor search 104 * 105 * The returned chunks should be sorted by similarity, most similar first. 106 * 107 * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check). 108 * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones. 109 * 110 * @param float[] $vector The vector to compare to 111 * @param int $limit The number of results to return, see note above 112 * @return Chunk[] 113 */ 114 abstract public function getSimilarChunks($vector, $limit = 4); 115 116 /** 117 * Get information about the storage 118 * 119 * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory. 120 * 121 * @return string[] 122 */ 123 abstract public function statistics(); 124} 125