xref: /plugin/aichat/Storage/AbstractStorage.php (revision 3379af09b7ec10f96a8d4f23b1563bd7f9ae79ac)
1<?php
2
3namespace dokuwiki\plugin\aichat\Storage;
4
5use dokuwiki\Extension\CLIPlugin;
6use dokuwiki\plugin\aichat\Chunk;
7
8/**
9 * Defines a vector storage for page chunks and their embeddings
10 *
11 * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in
12 * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have
13 * chunks 1200, 1201, 1202, ...
14 */
15abstract class AbstractStorage
16{
17    /** @var CLIPlugin $logger */
18    protected $logger;
19
20    /**
21     * @param CLIPlugin $logger
22     * @return void
23     */
24    public function setLogger($logger) {
25        $this->logger = $logger;
26    }
27
28    /**
29     * Get the chunk with the given ID
30     *
31     * @param int $chunkID
32     * @return Chunk|null
33     */
34    abstract public function getChunk($chunkID);
35
36    /**
37     * Called when the storage is about to be (re)built
38     *
39     * Storages may need to open a transaction or prepare other things here.
40     *
41     * @param bool $clear Should any existing data been thrown away?
42     * @return void
43     */
44    abstract public function startCreation($clear = false);
45
46    /**
47     * Called when the storage is (re)built and the existing chunks should be reused
48     *
49     * Storages that can be updated, may simply do nothing here
50     *
51     * @param string $page The page the chunks belong to
52     * @param int $firstChunkID The ID of the first chunk to reuse
53     * @return void
54     */
55    abstract public function reusePageChunks($page, $firstChunkID);
56
57    /**
58     * Delete all chunks associated with the given page
59     *
60     * @param string $page The page the chunks belong to
61     * @param int $firstChunkID The ID of the first chunk
62     * @return void
63     */
64    abstract public function deletePageChunks($page, $firstChunkID);
65
66    /**
67     * Add the given new Chunks to the storage
68     *
69     * @param Chunk[] $chunks
70     * @return void
71     */
72    abstract public function addPageChunks($chunks);
73
74    /**
75     * All chunks have been added, finalize the storage
76     *
77     * This is where transactions may be committed and or memory structures be written to disk.
78     *
79     * @return void
80     */
81    abstract public function finalizeCreation();
82
83    /**
84     * Run maintenance tasks on the storage
85     *
86     * Each storage can decide on it's own what to do here. Documentation should explain
87     * how often this should be run.
88     *
89     * @return void
90     */
91    abstract public function runMaintenance();
92
93    /**
94     * Get all chunks associated with the given page
95     *
96     * @param string $page The page the chunks belong to
97     * @param int $firstChunkID The ID of the first chunk
98     * @return Chunk[]
99     */
100    abstract public function getPageChunks($page, $firstChunkID);
101
102    /**
103     * Get the chunks most similar to the given vector, using a nearest neighbor search
104     *
105     * The returned chunks should be sorted by similarity, most similar first.
106     *
107     * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check).
108     * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones.
109     *
110     * @param float[] $vector The vector to compare to
111     * @param int $limit The number of results to return, see note above
112     * @return Chunk[]
113     */
114    abstract public function getSimilarChunks($vector, $limit = 4);
115
116    /**
117     * Get information about the storage
118     *
119     * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory.
120     *
121     * @return string[]
122     */
123    abstract public function statistics();
124}
125