1<?php 2 3namespace dokuwiki\plugin\aichat\Storage; 4 5use dokuwiki\Extension\CLIPlugin; 6use dokuwiki\plugin\aichat\Chunk; 7 8/** 9 * Defines a vector storage for page chunks and their embeddings 10 * 11 * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in 12 * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have 13 * chunks 1200, 1201, 1202, ... 14 */ 15abstract class AbstractStorage 16{ 17 /** @var CLIPlugin $logger */ 18 protected $logger; 19 20 /** 21 * @param CLIPlugin $logger 22 * @return void 23 */ 24 public function setLogger($logger) 25 { 26 $this->logger = $logger; 27 } 28 29 /** 30 * Get the chunk with the given ID 31 * 32 * @param int $chunkID 33 * @return Chunk|null 34 */ 35 abstract public function getChunk($chunkID); 36 37 /** 38 * Called when the storage is about to be (re)built 39 * 40 * Storages may need to open a transaction or prepare other things here. 41 * 42 * @param bool $clear Should any existing data been thrown away? 43 * @return void 44 */ 45 abstract public function startCreation($clear = false); 46 47 /** 48 * Called when the storage is (re)built and the existing chunks should be reused 49 * 50 * Storages that can be updated, may simply do nothing here 51 * 52 * @param string $page The page the chunks belong to 53 * @param int $firstChunkID The ID of the first chunk to reuse 54 * @return void 55 */ 56 abstract public function reusePageChunks($page, $firstChunkID); 57 58 /** 59 * Delete all chunks associated with the given page 60 * 61 * @param string $page The page the chunks belong to 62 * @param int $firstChunkID The ID of the first chunk 63 * @return void 64 */ 65 abstract public function deletePageChunks($page, $firstChunkID); 66 67 /** 68 * Add the given new Chunks to the storage 69 * 70 * @param Chunk[] $chunks 71 * @return void 72 */ 73 abstract public function addPageChunks($chunks); 74 75 /** 76 * All chunks have been added, finalize the storage 77 * 78 * This is where transactions may be committed and or memory structures be written to disk. 79 * 80 * @return void 81 */ 82 abstract public function finalizeCreation(); 83 84 /** 85 * Run maintenance tasks on the storage 86 * 87 * Each storage can decide on it's own what to do here. Documentation should explain 88 * how often this should be run. 89 * 90 * @return void 91 */ 92 abstract public function runMaintenance(); 93 94 /** 95 * Get all chunks associated with the given page 96 * 97 * @param string $page The page the chunks belong to 98 * @param int $firstChunkID The ID of the first chunk 99 * @return Chunk[] 100 */ 101 abstract public function getPageChunks($page, $firstChunkID); 102 103 /** 104 * Get the chunks most similar to the given vector, using a nearest neighbor search 105 * 106 * The returned chunks should be sorted by similarity, most similar first. 107 * 108 * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check). 109 * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones. 110 * 111 * @param float[] $vector The vector to compare to 112 * @param string $lang Limit results to this language. When empty consider all languages 113 * @param int $limit The number of results to return, see note above 114 * @return Chunk[] 115 */ 116 abstract public function getSimilarChunks($vector, $lang = '', $limit = 4); 117 118 /** 119 * Get information about the storage 120 * 121 * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory. 122 * 123 * @return string[] 124 */ 125 abstract public function statistics(); 126 127 /** 128 * Writes TSV files for visualizing with http://projector.tensorflow.org/ 129 * 130 * @param string $vectorfile path to the file with the vectors 131 * @param string $metafile path to the file with the metadata 132 * @return void 133 */ 134 public function dumpTSV($vectorfile, $metafile) 135 { 136 throw new \RuntimeException('Not implemented for current storage'); 137 } 138} 139