1<?php 2 3 4namespace dokuwiki\plugin\aichat\Storage; 5 6use dokuwiki\plugin\aichat\Chunk; 7use dokuwiki\plugin\sqlite\SQLiteDB; 8 9/** 10 * Implements the storage backend using a SQLite database 11 */ 12class SQLiteStorage extends AbstractStorage 13{ 14 /** @var SQLiteDB */ 15 protected $db; 16 17 /** 18 * Initializes the database connection and registers our custom function 19 * 20 * @throws \Exception 21 */ 22 public function __construct() 23 { 24 $this->db = new SQLiteDB('aichat', DOKU_PLUGIN . 'aichat/db/'); 25 $this->db->getPdo()->sqliteCreateFunction('COSIM', [$this, 'sqliteCosineSimilarityCallback'], 2); 26 } 27 28 /** @inheritdoc */ 29 public function getChunk($chunkID) 30 { 31 $record = $this->db->queryRecord('SELECT * FROM embeddings WHERE id = ?', [$chunkID]); 32 if (!$record) return null; 33 34 return new Chunk( 35 $record['page'], 36 $record['id'], 37 $record['chunk'], 38 json_decode($record['embedding'], true), 39 $record['created'] 40 ); 41 } 42 43 /** @inheritdoc */ 44 public function startCreation($clear = false) 45 { 46 if ($clear) { 47 /** @noinspection SqlWithoutWhere */ 48 $this->db->exec('DELETE FROM embeddings'); 49 } 50 } 51 52 /** @inheritdoc */ 53 public function reusePageChunks($page, $firstChunkID) 54 { 55 // no-op 56 } 57 58 /** @inheritdoc */ 59 public function deletePageChunks($page, $firstChunkID) 60 { 61 $this->db->exec('DELETE FROM embeddings WHERE page = ?', [$page]); 62 } 63 64 /** @inheritdoc */ 65 public function addPageChunks($chunks) 66 { 67 foreach ($chunks as $chunk) { 68 $this->db->saveRecord('embeddings', [ 69 'page' => $chunk->getPage(), 70 'id' => $chunk->getId(), 71 'chunk' => $chunk->getText(), 72 'embedding' => json_encode($chunk->getEmbedding()), 73 'created' => $chunk->getCreated() 74 ]); 75 } 76 } 77 78 /** @inheritdoc */ 79 public function finalizeCreation() 80 { 81 $this->db->exec('VACUUM'); 82 } 83 84 /** @inheritdoc */ 85 public function getSimilarChunks($vector, $limit = 4) 86 { 87 $result = $this->db->queryAll( 88 'SELECT *, COSIM(?, embedding) AS similarity 89 FROM embeddings 90 WHERE GETACCESSLEVEL(page) > 0 91 ORDER BY similarity DESC 92 LIMIT ?', 93 [json_encode($vector), $limit] 94 ); 95 $chunks = []; 96 foreach ($result as $record) { 97 $chunks[] = new Chunk( 98 $record['page'], 99 $record['id'], 100 $record['chunk'], 101 json_decode($record['embedding'], true), 102 $record['created'] 103 ); 104 } 105 return $chunks; 106 } 107 108 /** @inheritdoc */ 109 public function statistics() 110 { 111 $items = $this->db->queryValue('SELECT COUNT(*) FROM embeddings'); 112 $size = $this->db->queryValue( 113 'SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()' 114 ); 115 return [ 116 'storage type' => 'SQLite', 117 'chunks' => $items, 118 'db size' => filesize_h($size) 119 ]; 120 } 121 122 /** 123 * Method registered as SQLite callback to calculate the cosine similarity 124 * 125 * @param string $query JSON encoded vector array 126 * @param string $embedding JSON encoded vector array 127 * @return float 128 */ 129 public function sqliteCosineSimilarityCallback($query, $embedding) 130 { 131 return (float)$this->cosineSimilarity(json_decode($query), json_decode($embedding)); 132 } 133 134 /** 135 * Calculate the cosine similarity between two vectors 136 * 137 * @param float[] $queryVector The vector of the search phrase 138 * @param float[] $embedding The vector of the chunk 139 * @return float 140 * @link https://doku.wiki/src-cosine-similarity 141 */ 142 protected function cosineSimilarity($queryVector, $embedding) 143 { 144 $dotProduct = 0; 145 $queryEmbeddingLength = 0; 146 $embeddingLength = 0; 147 148 foreach ($queryVector as $key => $value) { 149 $dotProduct += $value * $embedding[$key]; 150 $queryEmbeddingLength += $value * $value; 151 $embeddingLength += $embedding[$key] * $embedding[$key]; 152 } 153 154 return $dotProduct / (sqrt($queryEmbeddingLength) * sqrt($embeddingLength)); 155 } 156} 157