1<?php 2 3namespace dokuwiki\plugin\aichat\Storage; 4 5use dokuwiki\Extension\CLIPlugin; 6use dokuwiki\plugin\aichat\Chunk; 7 8/** 9 * Defines a vector storage for page chunks and their embeddings 10 * 11 * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in 12 * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have 13 * chunks 1200, 1201, 1202, ... 14 */ 15abstract class AbstractStorage 16{ 17 /** @var CLIPlugin $logger */ 18 protected $logger; 19 20 /** 21 * @param array $config The plugin's configuration 22 */ 23 abstract public function __construct(array $config); 24 25 /** 26 * @param CLIPlugin $logger 27 * @return void 28 */ 29 public function setLogger($logger) 30 { 31 $this->logger = $logger; 32 } 33 34 /** 35 * Get the chunk with the given ID 36 * 37 * @param int $chunkID 38 * @return Chunk|null 39 */ 40 abstract public function getChunk($chunkID); 41 42 /** 43 * Called when the storage is about to be (re)built 44 * 45 * Storages may need to open a transaction or prepare other things here. 46 * 47 * @param bool $clear Should any existing data been thrown away? 48 * @return void 49 */ 50 abstract public function startCreation($clear = false); 51 52 /** 53 * Called when the storage is (re)built and the existing chunks should be reused 54 * 55 * Storages that can be updated, may simply do nothing here 56 * 57 * @param string $page The page the chunks belong to 58 * @param int $firstChunkID The ID of the first chunk to reuse 59 * @return void 60 */ 61 abstract public function reusePageChunks($page, $firstChunkID); 62 63 /** 64 * Delete all chunks associated with the given page 65 * 66 * @param string $page The page the chunks belong to 67 * @param int $firstChunkID The ID of the first chunk 68 * @return void 69 */ 70 abstract public function deletePageChunks($page, $firstChunkID); 71 72 /** 73 * Add the given new Chunks to the storage 74 * 75 * @param Chunk[] $chunks 76 * @return void 77 */ 78 abstract public function addPageChunks($chunks); 79 80 /** 81 * All chunks have been added, finalize the storage 82 * 83 * This is where transactions may be committed and or memory structures be written to disk. 84 * 85 * @return void 86 */ 87 abstract public function finalizeCreation(); 88 89 /** 90 * Run maintenance tasks on the storage 91 * 92 * Each storage can decide on it's own what to do here. Documentation should explain 93 * how often this should be run. 94 * 95 * @return void 96 */ 97 abstract public function runMaintenance(); 98 99 /** 100 * Get all chunks associated with the given page 101 * 102 * @param string $page The page the chunks belong to 103 * @param int $firstChunkID The ID of the first chunk 104 * @return Chunk[] 105 */ 106 abstract public function getPageChunks($page, $firstChunkID); 107 108 /** 109 * Get the chunks most similar to the given vector, using a nearest neighbor search 110 * 111 * The returned chunks should be sorted by similarity, most similar first. 112 * 113 * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check). 114 * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones. 115 * 116 * @param float[] $vector The vector to compare to 117 * @param string $lang Limit results to this language. When empty consider all languages 118 * @param int $limit The number of results to return, see note above 119 * @return Chunk[] 120 */ 121 abstract public function getSimilarChunks($vector, $lang = '', $limit = 4); 122 123 /** 124 * Get information about the storage 125 * 126 * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory. 127 * 128 * @return string[] 129 */ 130 abstract public function statistics(); 131 132 /** 133 * Writes TSV files for visualizing with http://projector.tensorflow.org/ 134 * 135 * @param string $vectorfile path to the file with the vectors 136 * @param string $metafile path to the file with the metadata 137 * @return void 138 */ 139 public function dumpTSV($vectorfile, $metafile) 140 { 141 throw new \RuntimeException('Not implemented for current storage'); 142 } 143} 144