1<?php
2
3namespace dokuwiki\plugin\aichat\Storage;
4
5use dokuwiki\Extension\CLIPlugin;
6use dokuwiki\plugin\aichat\Chunk;
7
8/**
9 * Defines a vector storage for page chunks and their embeddings
10 *
11 * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in
12 * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have
13 * chunks 1200, 1201, 1202, ...
14 */
15abstract class AbstractStorage
16{
17    /** @var CLIPlugin $logger */
18    protected $logger;
19
20    /**
21     * @param array $config The plugin's configuration
22     */
23    abstract public function __construct(array $config);
24
25    /**
26     * @param CLIPlugin $logger
27     * @return void
28     */
29    public function setLogger($logger)
30    {
31        $this->logger = $logger;
32    }
33
34    /**
35     * Get the chunk with the given ID
36     *
37     * @param int $chunkID
38     * @return Chunk|null
39     */
40    abstract public function getChunk($chunkID);
41
42    /**
43     * Called when the storage is about to be (re)built
44     *
45     * Storages may need to open a transaction or prepare other things here.
46     *
47     * @param bool $clear Should any existing data been thrown away?
48     * @return void
49     */
50    abstract public function startCreation($clear = false);
51
52    /**
53     * Called when the storage is (re)built and the existing chunks should be reused
54     *
55     * Storages that can be updated, may simply do nothing here
56     *
57     * @param string $page The page the chunks belong to
58     * @param int $firstChunkID The ID of the first chunk to reuse
59     * @return void
60     */
61    abstract public function reusePageChunks($page, $firstChunkID);
62
63    /**
64     * Delete all chunks associated with the given page
65     *
66     * @param string $page The page the chunks belong to
67     * @param int $firstChunkID The ID of the first chunk
68     * @return void
69     */
70    abstract public function deletePageChunks($page, $firstChunkID);
71
72    /**
73     * Add the given new Chunks to the storage
74     *
75     * @param Chunk[] $chunks
76     * @return void
77     */
78    abstract public function addPageChunks($chunks);
79
80    /**
81     * All chunks have been added, finalize the storage
82     *
83     * This is where transactions may be committed and or memory structures be written to disk.
84     *
85     * @return void
86     */
87    abstract public function finalizeCreation();
88
89    /**
90     * Run maintenance tasks on the storage
91     *
92     * Each storage can decide on it's own what to do here. Documentation should explain
93     * how often this should be run.
94     *
95     * @return void
96     */
97    abstract public function runMaintenance();
98
99    /**
100     * Get all chunks associated with the given page
101     *
102     * @param string $page The page the chunks belong to
103     * @param int $firstChunkID The ID of the first chunk
104     * @return Chunk[]
105     */
106    abstract public function getPageChunks($page, $firstChunkID);
107
108    /**
109     * Get the chunks most similar to the given vector, using a nearest neighbor search
110     *
111     * The returned chunks should be sorted by similarity, most similar first.
112     *
113     * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check).
114     * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones.
115     *
116     * @param float[] $vector The vector to compare to
117     * @param string $lang Limit results to this language. When empty consider all languages
118     * @param int $limit The number of results to return, see note above
119     * @return Chunk[]
120     */
121    abstract public function getSimilarChunks($vector, $lang = '', $limit = 4);
122
123    /**
124     * Get information about the storage
125     *
126     * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory.
127     *
128     * @return string[]
129     */
130    abstract public function statistics();
131
132    /**
133     * Writes TSV files for visualizing with http://projector.tensorflow.org/
134     *
135     * @param string $vectorfile path to the file with the vectors
136     * @param string $metafile path to the file with the metadata
137     * @return void
138     */
139    public function dumpTSV($vectorfile, $metafile)
140    {
141        throw new \RuntimeException('Not implemented for current storage');
142    }
143}
144