xref: /plugin/aichat/Storage/AbstractStorage.php (revision 01f06932bbd74c60ea6c93ab68b0d6cf32d05aea)
1f6ef2e50SAndreas Gohr<?php
2f6ef2e50SAndreas Gohr
3f6ef2e50SAndreas Gohrnamespace dokuwiki\plugin\aichat\Storage;
4f6ef2e50SAndreas Gohr
5f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
6f6ef2e50SAndreas Gohr
7f6ef2e50SAndreas Gohr/**
8f6ef2e50SAndreas Gohr * Defines a vector storage for page chunks and their embeddings
9f6ef2e50SAndreas Gohr *
10f6ef2e50SAndreas Gohr * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in
11f6ef2e50SAndreas Gohr * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have
12f6ef2e50SAndreas Gohr * chunks 1200, 1201, 1202, ...
13f6ef2e50SAndreas Gohr */
14f6ef2e50SAndreas Gohrabstract class AbstractStorage
15f6ef2e50SAndreas Gohr{
16f6ef2e50SAndreas Gohr
17f6ef2e50SAndreas Gohr    /**
18f6ef2e50SAndreas Gohr     * Get the chunk with the given ID
19f6ef2e50SAndreas Gohr     *
20f6ef2e50SAndreas Gohr     * @param int $chunkID
21f6ef2e50SAndreas Gohr     * @return Chunk|null
22f6ef2e50SAndreas Gohr     */
23f6ef2e50SAndreas Gohr    abstract public function getChunk($chunkID);
24f6ef2e50SAndreas Gohr
25f6ef2e50SAndreas Gohr    /**
26f6ef2e50SAndreas Gohr     * Called when the storage is about to be (re)built
27f6ef2e50SAndreas Gohr     *
28f6ef2e50SAndreas Gohr     * Storages may need to open a transaction or prepare other things here.
29f6ef2e50SAndreas Gohr     *
30f6ef2e50SAndreas Gohr     * @param bool $clear Should any existing data been thrown away?
31f6ef2e50SAndreas Gohr     * @return void
32f6ef2e50SAndreas Gohr     */
33f6ef2e50SAndreas Gohr    abstract public function startCreation($clear = false);
34f6ef2e50SAndreas Gohr
35f6ef2e50SAndreas Gohr    /**
36f6ef2e50SAndreas Gohr     * Called when the storage is (re)built and the existing chunks should be reused
37f6ef2e50SAndreas Gohr     *
38f6ef2e50SAndreas Gohr     * Storages that can be updated, may simply do nothing here
39f6ef2e50SAndreas Gohr     *
40f6ef2e50SAndreas Gohr     * @param string $page The page the chunks belong to
41f6ef2e50SAndreas Gohr     * @param int $firstChunkID The ID of the first chunk to reuse
42f6ef2e50SAndreas Gohr     * @return void
43f6ef2e50SAndreas Gohr     */
44f6ef2e50SAndreas Gohr    abstract public function reusePageChunks($page, $firstChunkID);
45f6ef2e50SAndreas Gohr
46f6ef2e50SAndreas Gohr    /**
47f6ef2e50SAndreas Gohr     * Delete all chunks associated with the given page
48f6ef2e50SAndreas Gohr     *
49f6ef2e50SAndreas Gohr     * @param string $page The page the chunks belong to
50*01f06932SAndreas Gohr     * @param int $firstChunkID The ID of the first chunk
51f6ef2e50SAndreas Gohr     * @return void
52f6ef2e50SAndreas Gohr     */
53f6ef2e50SAndreas Gohr    abstract public function deletePageChunks($page, $firstChunkID);
54f6ef2e50SAndreas Gohr
55f6ef2e50SAndreas Gohr    /**
56f6ef2e50SAndreas Gohr     * Add the given new Chunks to the storage
57f6ef2e50SAndreas Gohr     *
58f6ef2e50SAndreas Gohr     * @param Chunk[] $chunks
59f6ef2e50SAndreas Gohr     * @return void
60f6ef2e50SAndreas Gohr     */
61f6ef2e50SAndreas Gohr    abstract public function addPageChunks($chunks);
62f6ef2e50SAndreas Gohr
63f6ef2e50SAndreas Gohr    /**
64f6ef2e50SAndreas Gohr     * All chunks have been added, finalize the storage
65f6ef2e50SAndreas Gohr     *
66f6ef2e50SAndreas Gohr     * This is where transactions may be committed and or memory structures be written to disk.
67f6ef2e50SAndreas Gohr     *
68f6ef2e50SAndreas Gohr     * @return void
69f6ef2e50SAndreas Gohr     */
70f6ef2e50SAndreas Gohr    abstract public function finalizeCreation();
71f6ef2e50SAndreas Gohr
72f6ef2e50SAndreas Gohr    /**
73*01f06932SAndreas Gohr     * Get all chunks associated with the given page
74*01f06932SAndreas Gohr     *
75*01f06932SAndreas Gohr     * @param string $page The page the chunks belong to
76*01f06932SAndreas Gohr     * @param int $firstChunkID The ID of the first chunk
77*01f06932SAndreas Gohr     * @return Chunk[]
78*01f06932SAndreas Gohr     */
79*01f06932SAndreas Gohr    abstract public function getPageChunks($page, $firstChunkID);
80*01f06932SAndreas Gohr
81*01f06932SAndreas Gohr    /**
82f6ef2e50SAndreas Gohr     * Get the chunks most similar to the given vector, using a nearest neighbor search
83f6ef2e50SAndreas Gohr     *
84f6ef2e50SAndreas Gohr     * The returned chunks should be sorted by similarity, most similar first.
85f6ef2e50SAndreas Gohr     *
86f6ef2e50SAndreas Gohr     * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check).
87f6ef2e50SAndreas Gohr     * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones.
88f6ef2e50SAndreas Gohr     *
89f6ef2e50SAndreas Gohr     * @param float[] $vector The vector to compare to
90f6ef2e50SAndreas Gohr     * @param int $limit The number of results to return, see note above
91f6ef2e50SAndreas Gohr     * @return Chunk[]
92f6ef2e50SAndreas Gohr     */
93f6ef2e50SAndreas Gohr    abstract public function getSimilarChunks($vector, $limit = 4);
94f6ef2e50SAndreas Gohr
95f6ef2e50SAndreas Gohr    /**
96f6ef2e50SAndreas Gohr     * Get information about the storage
97f6ef2e50SAndreas Gohr     *
98f6ef2e50SAndreas Gohr     * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory.
99f6ef2e50SAndreas Gohr     *
100f6ef2e50SAndreas Gohr     * @return string[]
101f6ef2e50SAndreas Gohr     */
102f6ef2e50SAndreas Gohr    abstract public function statistics();
103f6ef2e50SAndreas Gohr}
104