xref: /plugin/aichat/Storage/AbstractStorage.php (revision 3379af09b7ec10f96a8d4f23b1563bd7f9ae79ac)
1f6ef2e50SAndreas Gohr<?php
2f6ef2e50SAndreas Gohr
3f6ef2e50SAndreas Gohrnamespace dokuwiki\plugin\aichat\Storage;
4f6ef2e50SAndreas Gohr
5*3379af09SAndreas Gohruse dokuwiki\Extension\CLIPlugin;
6f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
7f6ef2e50SAndreas Gohr
8f6ef2e50SAndreas Gohr/**
9f6ef2e50SAndreas Gohr * Defines a vector storage for page chunks and their embeddings
10f6ef2e50SAndreas Gohr *
11f6ef2e50SAndreas Gohr * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in
12f6ef2e50SAndreas Gohr * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have
13f6ef2e50SAndreas Gohr * chunks 1200, 1201, 1202, ...
14f6ef2e50SAndreas Gohr */
15f6ef2e50SAndreas Gohrabstract class AbstractStorage
16f6ef2e50SAndreas Gohr{
17*3379af09SAndreas Gohr    /** @var CLIPlugin $logger */
18*3379af09SAndreas Gohr    protected $logger;
19*3379af09SAndreas Gohr
20*3379af09SAndreas Gohr    /**
21*3379af09SAndreas Gohr     * @param CLIPlugin $logger
22*3379af09SAndreas Gohr     * @return void
23*3379af09SAndreas Gohr     */
24*3379af09SAndreas Gohr    public function setLogger($logger) {
25*3379af09SAndreas Gohr        $this->logger = $logger;
26*3379af09SAndreas Gohr    }
27f6ef2e50SAndreas Gohr
28f6ef2e50SAndreas Gohr    /**
29f6ef2e50SAndreas Gohr     * Get the chunk with the given ID
30f6ef2e50SAndreas Gohr     *
31f6ef2e50SAndreas Gohr     * @param int $chunkID
32f6ef2e50SAndreas Gohr     * @return Chunk|null
33f6ef2e50SAndreas Gohr     */
34f6ef2e50SAndreas Gohr    abstract public function getChunk($chunkID);
35f6ef2e50SAndreas Gohr
36f6ef2e50SAndreas Gohr    /**
37f6ef2e50SAndreas Gohr     * Called when the storage is about to be (re)built
38f6ef2e50SAndreas Gohr     *
39f6ef2e50SAndreas Gohr     * Storages may need to open a transaction or prepare other things here.
40f6ef2e50SAndreas Gohr     *
41f6ef2e50SAndreas Gohr     * @param bool $clear Should any existing data been thrown away?
42f6ef2e50SAndreas Gohr     * @return void
43f6ef2e50SAndreas Gohr     */
44f6ef2e50SAndreas Gohr    abstract public function startCreation($clear = false);
45f6ef2e50SAndreas Gohr
46f6ef2e50SAndreas Gohr    /**
47f6ef2e50SAndreas Gohr     * Called when the storage is (re)built and the existing chunks should be reused
48f6ef2e50SAndreas Gohr     *
49f6ef2e50SAndreas Gohr     * Storages that can be updated, may simply do nothing here
50f6ef2e50SAndreas Gohr     *
51f6ef2e50SAndreas Gohr     * @param string $page The page the chunks belong to
52f6ef2e50SAndreas Gohr     * @param int $firstChunkID The ID of the first chunk to reuse
53f6ef2e50SAndreas Gohr     * @return void
54f6ef2e50SAndreas Gohr     */
55f6ef2e50SAndreas Gohr    abstract public function reusePageChunks($page, $firstChunkID);
56f6ef2e50SAndreas Gohr
57f6ef2e50SAndreas Gohr    /**
58f6ef2e50SAndreas Gohr     * Delete all chunks associated with the given page
59f6ef2e50SAndreas Gohr     *
60f6ef2e50SAndreas Gohr     * @param string $page The page the chunks belong to
6101f06932SAndreas Gohr     * @param int $firstChunkID The ID of the first chunk
62f6ef2e50SAndreas Gohr     * @return void
63f6ef2e50SAndreas Gohr     */
64f6ef2e50SAndreas Gohr    abstract public function deletePageChunks($page, $firstChunkID);
65f6ef2e50SAndreas Gohr
66f6ef2e50SAndreas Gohr    /**
67f6ef2e50SAndreas Gohr     * Add the given new Chunks to the storage
68f6ef2e50SAndreas Gohr     *
69f6ef2e50SAndreas Gohr     * @param Chunk[] $chunks
70f6ef2e50SAndreas Gohr     * @return void
71f6ef2e50SAndreas Gohr     */
72f6ef2e50SAndreas Gohr    abstract public function addPageChunks($chunks);
73f6ef2e50SAndreas Gohr
74f6ef2e50SAndreas Gohr    /**
75f6ef2e50SAndreas Gohr     * All chunks have been added, finalize the storage
76f6ef2e50SAndreas Gohr     *
77f6ef2e50SAndreas Gohr     * This is where transactions may be committed and or memory structures be written to disk.
78f6ef2e50SAndreas Gohr     *
79f6ef2e50SAndreas Gohr     * @return void
80f6ef2e50SAndreas Gohr     */
81f6ef2e50SAndreas Gohr    abstract public function finalizeCreation();
82f6ef2e50SAndreas Gohr
83f6ef2e50SAndreas Gohr    /**
84*3379af09SAndreas Gohr     * Run maintenance tasks on the storage
85*3379af09SAndreas Gohr     *
86*3379af09SAndreas Gohr     * Each storage can decide on it's own what to do here. Documentation should explain
87*3379af09SAndreas Gohr     * how often this should be run.
88*3379af09SAndreas Gohr     *
89*3379af09SAndreas Gohr     * @return void
90*3379af09SAndreas Gohr     */
91*3379af09SAndreas Gohr    abstract public function runMaintenance();
92*3379af09SAndreas Gohr
93*3379af09SAndreas Gohr    /**
9401f06932SAndreas Gohr     * Get all chunks associated with the given page
9501f06932SAndreas Gohr     *
9601f06932SAndreas Gohr     * @param string $page The page the chunks belong to
9701f06932SAndreas Gohr     * @param int $firstChunkID The ID of the first chunk
9801f06932SAndreas Gohr     * @return Chunk[]
9901f06932SAndreas Gohr     */
10001f06932SAndreas Gohr    abstract public function getPageChunks($page, $firstChunkID);
10101f06932SAndreas Gohr
10201f06932SAndreas Gohr    /**
103f6ef2e50SAndreas Gohr     * Get the chunks most similar to the given vector, using a nearest neighbor search
104f6ef2e50SAndreas Gohr     *
105f6ef2e50SAndreas Gohr     * The returned chunks should be sorted by similarity, most similar first.
106f6ef2e50SAndreas Gohr     *
107f6ef2e50SAndreas Gohr     * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check).
108f6ef2e50SAndreas Gohr     * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones.
109f6ef2e50SAndreas Gohr     *
110f6ef2e50SAndreas Gohr     * @param float[] $vector The vector to compare to
111f6ef2e50SAndreas Gohr     * @param int $limit The number of results to return, see note above
112f6ef2e50SAndreas Gohr     * @return Chunk[]
113f6ef2e50SAndreas Gohr     */
114f6ef2e50SAndreas Gohr    abstract public function getSimilarChunks($vector, $limit = 4);
115f6ef2e50SAndreas Gohr
116f6ef2e50SAndreas Gohr    /**
117f6ef2e50SAndreas Gohr     * Get information about the storage
118f6ef2e50SAndreas Gohr     *
119f6ef2e50SAndreas Gohr     * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory.
120f6ef2e50SAndreas Gohr     *
121f6ef2e50SAndreas Gohr     * @return string[]
122f6ef2e50SAndreas Gohr     */
123f6ef2e50SAndreas Gohr    abstract public function statistics();
124f6ef2e50SAndreas Gohr}
125