xref: /plugin/aichat/Storage/AbstractStorage.php (revision 01f06932bbd74c60ea6c93ab68b0d6cf32d05aea)
1<?php
2
3namespace dokuwiki\plugin\aichat\Storage;
4
5use dokuwiki\plugin\aichat\Chunk;
6
7/**
8 * Defines a vector storage for page chunks and their embeddings
9 *
10 * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in
11 * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have
12 * chunks 1200, 1201, 1202, ...
13 */
14abstract class AbstractStorage
15{
16
17    /**
18     * Get the chunk with the given ID
19     *
20     * @param int $chunkID
21     * @return Chunk|null
22     */
23    abstract public function getChunk($chunkID);
24
25    /**
26     * Called when the storage is about to be (re)built
27     *
28     * Storages may need to open a transaction or prepare other things here.
29     *
30     * @param bool $clear Should any existing data been thrown away?
31     * @return void
32     */
33    abstract public function startCreation($clear = false);
34
35    /**
36     * Called when the storage is (re)built and the existing chunks should be reused
37     *
38     * Storages that can be updated, may simply do nothing here
39     *
40     * @param string $page The page the chunks belong to
41     * @param int $firstChunkID The ID of the first chunk to reuse
42     * @return void
43     */
44    abstract public function reusePageChunks($page, $firstChunkID);
45
46    /**
47     * Delete all chunks associated with the given page
48     *
49     * @param string $page The page the chunks belong to
50     * @param int $firstChunkID The ID of the first chunk
51     * @return void
52     */
53    abstract public function deletePageChunks($page, $firstChunkID);
54
55    /**
56     * Add the given new Chunks to the storage
57     *
58     * @param Chunk[] $chunks
59     * @return void
60     */
61    abstract public function addPageChunks($chunks);
62
63    /**
64     * All chunks have been added, finalize the storage
65     *
66     * This is where transactions may be committed and or memory structures be written to disk.
67     *
68     * @return void
69     */
70    abstract public function finalizeCreation();
71
72    /**
73     * Get all chunks associated with the given page
74     *
75     * @param string $page The page the chunks belong to
76     * @param int $firstChunkID The ID of the first chunk
77     * @return Chunk[]
78     */
79    abstract public function getPageChunks($page, $firstChunkID);
80
81    /**
82     * Get the chunks most similar to the given vector, using a nearest neighbor search
83     *
84     * The returned chunks should be sorted by similarity, most similar first.
85     *
86     * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check).
87     * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones.
88     *
89     * @param float[] $vector The vector to compare to
90     * @param int $limit The number of results to return, see note above
91     * @return Chunk[]
92     */
93    abstract public function getSimilarChunks($vector, $limit = 4);
94
95    /**
96     * Get information about the storage
97     *
98     * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory.
99     *
100     * @return string[]
101     */
102    abstract public function statistics();
103}
104