xref: /plugin/aichat/Storage/AbstractStorage.php (revision 7ebc78955c65af90e7ee0afbd07adc15271113ba)
1<?php
2
3namespace dokuwiki\plugin\aichat\Storage;
4
5use dokuwiki\Extension\CLIPlugin;
6use dokuwiki\plugin\aichat\Chunk;
7
8/**
9 * Defines a vector storage for page chunks and their embeddings
10 *
11 * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in
12 * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have
13 * chunks 1200, 1201, 1202, ...
14 */
15abstract class AbstractStorage
16{
17    /** @var CLIPlugin $logger */
18    protected $logger;
19
20    /**
21     * @param CLIPlugin $logger
22     * @return void
23     */
24    public function setLogger($logger)
25    {
26        $this->logger = $logger;
27    }
28
29    /**
30     * Get the chunk with the given ID
31     *
32     * @param int $chunkID
33     * @return Chunk|null
34     */
35    abstract public function getChunk($chunkID);
36
37    /**
38     * Called when the storage is about to be (re)built
39     *
40     * Storages may need to open a transaction or prepare other things here.
41     *
42     * @param bool $clear Should any existing data been thrown away?
43     * @return void
44     */
45    abstract public function startCreation($clear = false);
46
47    /**
48     * Called when the storage is (re)built and the existing chunks should be reused
49     *
50     * Storages that can be updated, may simply do nothing here
51     *
52     * @param string $page The page the chunks belong to
53     * @param int $firstChunkID The ID of the first chunk to reuse
54     * @return void
55     */
56    abstract public function reusePageChunks($page, $firstChunkID);
57
58    /**
59     * Delete all chunks associated with the given page
60     *
61     * @param string $page The page the chunks belong to
62     * @param int $firstChunkID The ID of the first chunk
63     * @return void
64     */
65    abstract public function deletePageChunks($page, $firstChunkID);
66
67    /**
68     * Add the given new Chunks to the storage
69     *
70     * @param Chunk[] $chunks
71     * @return void
72     */
73    abstract public function addPageChunks($chunks);
74
75    /**
76     * All chunks have been added, finalize the storage
77     *
78     * This is where transactions may be committed and or memory structures be written to disk.
79     *
80     * @return void
81     */
82    abstract public function finalizeCreation();
83
84    /**
85     * Run maintenance tasks on the storage
86     *
87     * Each storage can decide on it's own what to do here. Documentation should explain
88     * how often this should be run.
89     *
90     * @return void
91     */
92    abstract public function runMaintenance();
93
94    /**
95     * Get all chunks associated with the given page
96     *
97     * @param string $page The page the chunks belong to
98     * @param int $firstChunkID The ID of the first chunk
99     * @return Chunk[]
100     */
101    abstract public function getPageChunks($page, $firstChunkID);
102
103    /**
104     * Get the chunks most similar to the given vector, using a nearest neighbor search
105     *
106     * The returned chunks should be sorted by similarity, most similar first.
107     *
108     * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check).
109     * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones.
110     *
111     * @param float[] $vector The vector to compare to
112     * @param string $lang Limit results to this language. When empty consider all languages
113     * @param int $limit The number of results to return, see note above
114     * @return Chunk[]
115     */
116    abstract public function getSimilarChunks($vector, $lang = '', $limit = 4);
117
118    /**
119     * Get information about the storage
120     *
121     * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory.
122     *
123     * @return string[]
124     */
125    abstract public function statistics();
126
127    /**
128     * Writes TSV files for visualizing with http://projector.tensorflow.org/
129     *
130     * @param string $vectorfile path to the file with the vectors
131     * @param string $metafile path to the file with the metadata
132     * @return void
133     */
134    public function dumpTSV($vectorfile, $metafile)
135    {
136        throw new \RuntimeException('Not implemented for current storage');
137    }
138}
139