xref: /plugin/aichat/Storage/AbstractStorage.php (revision f6ef2e505783ac17f756e44bf15c66238362377a)
1<?php
2
3namespace dokuwiki\plugin\aichat\Storage;
4
5use dokuwiki\plugin\aichat\Chunk;
6
7/**
8 * Defines a vector storage for page chunks and their embeddings
9 *
10 * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in
11 * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have
12 * chunks 1200, 1201, 1202, ...
13 */
14abstract class AbstractStorage
15{
16
17    /**
18     * Get the chunk with the given ID
19     *
20     * @param int $chunkID
21     * @return Chunk|null
22     */
23    abstract public function getChunk($chunkID);
24
25    /**
26     * Called when the storage is about to be (re)built
27     *
28     * Storages may need to open a transaction or prepare other things here.
29     *
30     * @param bool $clear Should any existing data been thrown away?
31     * @return void
32     */
33    abstract public function startCreation($clear = false);
34
35    /**
36     * Called when the storage is (re)built and the existing chunks should be reused
37     *
38     * Storages that can be updated, may simply do nothing here
39     *
40     * @param string $page The page the chunks belong to
41     * @param int $firstChunkID The ID of the first chunk to reuse
42     * @return void
43     */
44    abstract public function reusePageChunks($page, $firstChunkID);
45
46    /**
47     * Delete all chunks associated with the given page
48     *
49     * @param string $page The page the chunks belong to
50     * @param int $firstChunkID The ID of the first chunk to reuse (may not exist)
51     * @return void
52     */
53    abstract public function deletePageChunks($page, $firstChunkID);
54
55    /**
56     * Add the given new Chunks to the storage
57     *
58     * @param Chunk[] $chunks
59     * @return void
60     */
61    abstract public function addPageChunks($chunks);
62
63    /**
64     * All chunks have been added, finalize the storage
65     *
66     * This is where transactions may be committed and or memory structures be written to disk.
67     *
68     * @return void
69     */
70    abstract public function finalizeCreation();
71
72    /**
73     * Get the chunks most similar to the given vector, using a nearest neighbor search
74     *
75     * The returned chunks should be sorted by similarity, most similar first.
76     *
77     * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check).
78     * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones.
79     *
80     * @param float[] $vector The vector to compare to
81     * @param int $limit The number of results to return, see note above
82     * @return Chunk[]
83     */
84    abstract public function getSimilarChunks($vector, $limit = 4);
85
86    /**
87     * Get information about the storage
88     *
89     * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory.
90     *
91     * @return string[]
92     */
93    abstract public function statistics();
94}
95