xref: /plugin/aichat/Storage/AbstractStorage.php (revision f6ef2e505783ac17f756e44bf15c66238362377a)
1*f6ef2e50SAndreas Gohr<?php
2*f6ef2e50SAndreas Gohr
3*f6ef2e50SAndreas Gohrnamespace dokuwiki\plugin\aichat\Storage;
4*f6ef2e50SAndreas Gohr
5*f6ef2e50SAndreas Gohruse dokuwiki\plugin\aichat\Chunk;
6*f6ef2e50SAndreas Gohr
7*f6ef2e50SAndreas Gohr/**
8*f6ef2e50SAndreas Gohr * Defines a vector storage for page chunks and their embeddings
9*f6ef2e50SAndreas Gohr *
10*f6ef2e50SAndreas Gohr * Please not that chunkIDs are created outside of the storage. They reference the Page's ID in
11*f6ef2e50SAndreas Gohr * DokuWiki's fulltext index. ChunkIDs count from the page's id*100 upwards. Eg. Page 12 will have
12*f6ef2e50SAndreas Gohr * chunks 1200, 1201, 1202, ...
13*f6ef2e50SAndreas Gohr */
14*f6ef2e50SAndreas Gohrabstract class AbstractStorage
15*f6ef2e50SAndreas Gohr{
16*f6ef2e50SAndreas Gohr
17*f6ef2e50SAndreas Gohr    /**
18*f6ef2e50SAndreas Gohr     * Get the chunk with the given ID
19*f6ef2e50SAndreas Gohr     *
20*f6ef2e50SAndreas Gohr     * @param int $chunkID
21*f6ef2e50SAndreas Gohr     * @return Chunk|null
22*f6ef2e50SAndreas Gohr     */
23*f6ef2e50SAndreas Gohr    abstract public function getChunk($chunkID);
24*f6ef2e50SAndreas Gohr
25*f6ef2e50SAndreas Gohr    /**
26*f6ef2e50SAndreas Gohr     * Called when the storage is about to be (re)built
27*f6ef2e50SAndreas Gohr     *
28*f6ef2e50SAndreas Gohr     * Storages may need to open a transaction or prepare other things here.
29*f6ef2e50SAndreas Gohr     *
30*f6ef2e50SAndreas Gohr     * @param bool $clear Should any existing data been thrown away?
31*f6ef2e50SAndreas Gohr     * @return void
32*f6ef2e50SAndreas Gohr     */
33*f6ef2e50SAndreas Gohr    abstract public function startCreation($clear = false);
34*f6ef2e50SAndreas Gohr
35*f6ef2e50SAndreas Gohr    /**
36*f6ef2e50SAndreas Gohr     * Called when the storage is (re)built and the existing chunks should be reused
37*f6ef2e50SAndreas Gohr     *
38*f6ef2e50SAndreas Gohr     * Storages that can be updated, may simply do nothing here
39*f6ef2e50SAndreas Gohr     *
40*f6ef2e50SAndreas Gohr     * @param string $page The page the chunks belong to
41*f6ef2e50SAndreas Gohr     * @param int $firstChunkID The ID of the first chunk to reuse
42*f6ef2e50SAndreas Gohr     * @return void
43*f6ef2e50SAndreas Gohr     */
44*f6ef2e50SAndreas Gohr    abstract public function reusePageChunks($page, $firstChunkID);
45*f6ef2e50SAndreas Gohr
46*f6ef2e50SAndreas Gohr    /**
47*f6ef2e50SAndreas Gohr     * Delete all chunks associated with the given page
48*f6ef2e50SAndreas Gohr     *
49*f6ef2e50SAndreas Gohr     * @param string $page The page the chunks belong to
50*f6ef2e50SAndreas Gohr     * @param int $firstChunkID The ID of the first chunk to reuse (may not exist)
51*f6ef2e50SAndreas Gohr     * @return void
52*f6ef2e50SAndreas Gohr     */
53*f6ef2e50SAndreas Gohr    abstract public function deletePageChunks($page, $firstChunkID);
54*f6ef2e50SAndreas Gohr
55*f6ef2e50SAndreas Gohr    /**
56*f6ef2e50SAndreas Gohr     * Add the given new Chunks to the storage
57*f6ef2e50SAndreas Gohr     *
58*f6ef2e50SAndreas Gohr     * @param Chunk[] $chunks
59*f6ef2e50SAndreas Gohr     * @return void
60*f6ef2e50SAndreas Gohr     */
61*f6ef2e50SAndreas Gohr    abstract public function addPageChunks($chunks);
62*f6ef2e50SAndreas Gohr
63*f6ef2e50SAndreas Gohr    /**
64*f6ef2e50SAndreas Gohr     * All chunks have been added, finalize the storage
65*f6ef2e50SAndreas Gohr     *
66*f6ef2e50SAndreas Gohr     * This is where transactions may be committed and or memory structures be written to disk.
67*f6ef2e50SAndreas Gohr     *
68*f6ef2e50SAndreas Gohr     * @return void
69*f6ef2e50SAndreas Gohr     */
70*f6ef2e50SAndreas Gohr    abstract public function finalizeCreation();
71*f6ef2e50SAndreas Gohr
72*f6ef2e50SAndreas Gohr    /**
73*f6ef2e50SAndreas Gohr     * Get the chunks most similar to the given vector, using a nearest neighbor search
74*f6ef2e50SAndreas Gohr     *
75*f6ef2e50SAndreas Gohr     * The returned chunks should be sorted by similarity, most similar first.
76*f6ef2e50SAndreas Gohr     *
77*f6ef2e50SAndreas Gohr     * If possible in an efficient way, only chunks readable by the current user should be returned (ACL check).
78*f6ef2e50SAndreas Gohr     * If not, the storage should return twice the $limit of chunks and the caller will filter out the readable ones.
79*f6ef2e50SAndreas Gohr     *
80*f6ef2e50SAndreas Gohr     * @param float[] $vector The vector to compare to
81*f6ef2e50SAndreas Gohr     * @param int $limit The number of results to return, see note above
82*f6ef2e50SAndreas Gohr     * @return Chunk[]
83*f6ef2e50SAndreas Gohr     */
84*f6ef2e50SAndreas Gohr    abstract public function getSimilarChunks($vector, $limit = 4);
85*f6ef2e50SAndreas Gohr
86*f6ef2e50SAndreas Gohr    /**
87*f6ef2e50SAndreas Gohr     * Get information about the storage
88*f6ef2e50SAndreas Gohr     *
89*f6ef2e50SAndreas Gohr     * Each storage can decide on it's own what to return here as key value pairs. Keys should be self explanatory.
90*f6ef2e50SAndreas Gohr     *
91*f6ef2e50SAndreas Gohr     * @return string[]
92*f6ef2e50SAndreas Gohr     */
93*f6ef2e50SAndreas Gohr    abstract public function statistics();
94*f6ef2e50SAndreas Gohr}
95