1<?php 2 3namespace dokuwiki\test\Search\Collection; 4 5use dokuwiki\Search\Collection\PageMetaCollection; 6use dokuwiki\Search\Exception\IndexIntegrityException; 7use dokuwiki\Search\Exception\IndexLockException; 8use dokuwiki\Search\Index\MemoryIndex; 9 10class LookupCollectionTest extends \DokuWikiTest 11{ 12 /** 13 * Add data and directly check the underlying indexes for correctness 14 */ 15 public function testAddEntity() 16 { 17 $index = new MockLookupCollection('a_entity', 'a_token', 'a_freq', 'a_reverse'); 18 $index->lock(); 19 $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg', 'wiki:icon.svg']); 20 $index->unlock(); 21 22 // check entity index 23 $idxEntity = new MemoryIndex('a_entity'); 24 $this->assertEquals('wiki:start', $idxEntity->retrieveRow(0)); 25 26 // check token index (single file, no suffix) 27 $idxToken = new MemoryIndex('a_token'); 28 $this->assertEquals('wiki:logo.png', $idxToken->retrieveRow(0)); 29 $this->assertEquals('wiki:banner.jpg', $idxToken->retrieveRow(1)); 30 $this->assertEquals('wiki:icon.svg', $idxToken->retrieveRow(2)); 31 32 // check frequency index — all frequencies are 1 (written without *1) 33 $idxFreq = new MemoryIndex('a_freq'); 34 $this->assertEquals('0', $idxFreq->retrieveRow(0)); // entity 0 with implicit freq 1 35 $this->assertEquals('0', $idxFreq->retrieveRow(1)); 36 $this->assertEquals('0', $idxFreq->retrieveRow(2)); 37 38 // check reverse index 39 $idxRev = new MemoryIndex('a_reverse'); 40 $this->assertEquals('0:1:2', $idxRev->retrieveRow(0)); 41 } 42 43 /** 44 * Duplicate tokens should be deduplicated 45 */ 46 public function testAddEntityDedup() 47 { 48 $index = new MockLookupCollection('b_entity', 'b_token', 'b_freq', 'b_reverse'); 49 $index->lock(); 50 $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:logo.png', 'wiki:banner.jpg']); 51 $index->unlock(); 52 53 $idxToken = new MemoryIndex('b_token'); 54 $this->assertEquals('wiki:logo.png', $idxToken->retrieveRow(0)); 55 $this->assertEquals('wiki:banner.jpg', $idxToken->retrieveRow(1)); 56 57 $idxRev = new MemoryIndex('b_reverse'); 58 $this->assertEquals('0:1', $idxRev->retrieveRow(0)); 59 } 60 61 /** 62 * Updating an entity should remove old tokens and add new ones 63 */ 64 public function testUpdateEntity() 65 { 66 $index = new MockLookupCollection('c_entity', 'c_token', 'c_freq', 'c_reverse'); 67 68 // initial add 69 $index->lock(); 70 $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg']); 71 $index->unlock(); 72 73 // update: remove logo, keep banner, add icon 74 $index->lock(); 75 $index->addEntity('wiki:start', ['wiki:banner.jpg', 'wiki:icon.svg']); 76 $index->unlock(); 77 78 // logo should be removed from frequency index 79 $idxFreq = new MemoryIndex('c_freq'); 80 $this->assertEquals('', $idxFreq->retrieveRow(0)); // logo removed 81 $this->assertEquals('0', $idxFreq->retrieveRow(1)); // banner still on entity 0 82 $this->assertEquals('0', $idxFreq->retrieveRow(2)); // icon added on entity 0 83 84 // reverse index should only have banner and icon 85 $idxRev = new MemoryIndex('c_reverse'); 86 $this->assertEquals('1:2', $idxRev->retrieveRow(0)); 87 } 88 89 /** 90 * Test reverse assignments returns two-level structure with empty group key 91 */ 92 public function testReverseAssignments() 93 { 94 $index = new MockLookupCollection('d_entity', 'd_token', 'd_freq', 'd_reverse'); 95 $index->lock(); 96 $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg']); 97 $index->unlock(); 98 99 $result = $index->getReverseAssignments('wiki:start'); 100 $this->assertEquals([0 => [0 => 0, 1 => 0]], $result); 101 } 102 103 /** 104 * Adding entity without lock should throw exception 105 */ 106 public function testAddEntityWithoutLock() 107 { 108 $this->expectException(IndexLockException::class); 109 110 $index = new MockLookupCollection(); 111 $index->addEntity('wiki:start', ['wiki:logo.png']); 112 } 113 114 /** 115 * Adding empty token list should clear entity from indexes 116 */ 117 public function testEmptyTokens() 118 { 119 $index = new MockLookupCollection('f_entity', 'f_token', 'f_freq', 'f_reverse'); 120 121 // add some tokens first 122 $index->lock(); 123 $index->addEntity('wiki:start', ['wiki:logo.png']); 124 $index->unlock(); 125 126 // now clear 127 $index->lock(); 128 $index->addEntity('wiki:start', []); 129 $index->unlock(); 130 131 // frequency index should be empty for this token 132 $idxFreq = new MemoryIndex('f_freq'); 133 $this->assertEquals('', $idxFreq->retrieveRow(0)); 134 135 // reverse index should be empty 136 $idxRev = new MemoryIndex('f_reverse'); 137 $this->assertEquals('', $idxRev->retrieveRow(0)); 138 } 139 140 /** 141 * Test that PageMetaCollection('relation_media') uses correct index names 142 */ 143 public function testMediaCollection() 144 { 145 $index = new PageMetaCollection('relation_media'); 146 $index->lock(); 147 $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg']); 148 $index->unlock(); 149 150 $idxToken = new MemoryIndex('relation_media_w'); 151 $this->assertEquals('wiki:logo.png', $idxToken->retrieveRow(0)); 152 $this->assertEquals('wiki:banner.jpg', $idxToken->retrieveRow(1)); 153 154 $idxRev = new MemoryIndex('relation_media_p'); 155 $this->assertEquals('0:1', $idxRev->retrieveRow(0)); 156 } 157 158 /** 159 * Test that PageMetaCollection('relation_references') uses correct index names 160 */ 161 public function testReferencesCollection() 162 { 163 $index = new PageMetaCollection('relation_references'); 164 $index->lock(); 165 $index->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']); 166 $index->unlock(); 167 168 $idxToken = new MemoryIndex('relation_references_w'); 169 $this->assertEquals('wiki:syntax', $idxToken->retrieveRow(0)); 170 $this->assertEquals('wiki:welcome', $idxToken->retrieveRow(1)); 171 172 $idxRev = new MemoryIndex('relation_references_p'); 173 $this->assertEquals('0:1', $idxRev->retrieveRow(0)); 174 175 $result = $index->getReverseAssignments('wiki:start'); 176 $this->assertEquals([0 => [0 => 0, 1 => 0]], $result); 177 } 178 179 /** 180 * resolveTokens should deduplicate and assign frequency 1 under group 0 181 */ 182 public function testResolveTokens() 183 { 184 $index = new MockLookupCollection('rt_entity', 'rt_token', 'rt_freq', 'rt_reverse'); 185 $index->lock(); 186 187 $result = $this->callInaccessibleMethod($index, 'resolveTokens', [ 188 ['wiki:logo.png', 'wiki:banner.jpg', 'wiki:logo.png'], 189 ]); 190 191 // all tokens under group 0 (non-split) 192 $this->assertArrayHasKey(0, $result); 193 $this->assertCount(2, $result[0]); // deduplicated 194 195 // token IDs are sequential: logo=0, banner=1 196 $this->assertEquals(1, $result[0][0]); // logo freq=1 197 $this->assertEquals(1, $result[0][1]); // banner freq=1 198 } 199 200 /** 201 * resolveTokens with empty input should return empty array 202 */ 203 public function testResolveTokensEmpty() 204 { 205 $index = new MockLookupCollection('rte_entity', 'rte_token', 'rte_freq', 'rte_reverse'); 206 $index->lock(); 207 208 $result = $this->callInaccessibleMethod($index, 'resolveTokens', [[]]); 209 210 $this->assertEmpty($result); 211 } 212 213 /** 214 * countTokens should deduplicate and assign frequency 1 215 */ 216 public function testCountTokens() 217 { 218 $index = new MockLookupCollection(); 219 220 $result = $this->callInaccessibleMethod($index, 'countTokens', [ 221 ['wiki:logo.png', 'wiki:banner.jpg', 'wiki:logo.png'], 222 ]); 223 224 $this->assertEquals([ 225 'wiki:logo.png' => 1, 226 'wiki:banner.jpg' => 1, 227 ], $result); 228 } 229 230 /** 231 * getEntitiesWithData returns entities that have frequency data 232 */ 233 public function testGetEntitiesWithData() 234 { 235 $index = new MockLookupCollection('ewd_entity', 'ewd_token', 'ewd_freq', 'ewd_reverse'); 236 $index->lock(); 237 $index->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']); 238 $index->addEntity('wiki:other', ['wiki:syntax']); 239 $index->addEntity('wiki:empty', []); 240 $index->unlock(); 241 242 $result = $index->getEntitiesWithData(); 243 sort($result); 244 $this->assertEquals(['wiki:other', 'wiki:start'], $result); 245 } 246 247 /** 248 * resolveTokenFrequencies returns entity frequencies for given token IDs 249 */ 250 public function testResolveTokenFrequencies() 251 { 252 $index = new MockLookupCollection('rtf_entity', 'rtf_token', 'rtf_freq', 'rtf_reverse'); 253 $index->lock(); 254 $index->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']); 255 $index->addEntity('wiki:other', ['wiki:syntax']); 256 $index->unlock(); 257 258 // token ID 0 = wiki:syntax, referenced by both entities 259 $result = $index->resolveTokenFrequencies(0, [0]); 260 $this->assertArrayHasKey(0, $result); 261 $this->assertCount(2, $result[0]); // two entities have this token 262 } 263 264 /** 265 * checkIntegrity passes on a healthy non-split collection 266 */ 267 public function testCheckIntegrityHealthy() 268 { 269 $index = new MockLookupCollection('cih_entity', 'cih_token', 'cih_freq', 'cih_reverse'); 270 $index->lock(); 271 $index->addEntity('wiki:start', ['wiki:syntax']); 272 $index->unlock(); 273 274 $index->checkIntegrity(); // should not throw 275 $this->assertTrue(true); 276 } 277 278 /** 279 * checkIntegrity passes on an empty non-split collection 280 */ 281 public function testCheckIntegrityEmpty() 282 { 283 $index = new MockLookupCollection('cie_entity', 'cie_token', 'cie_freq', 'cie_reverse'); 284 $index->checkIntegrity(); // should not throw 285 $this->assertTrue(true); 286 } 287 288 /** 289 * checkIntegrity detects token/frequency mismatch on non-split collection 290 */ 291 public function testCheckIntegrityTokenFreqMismatch() 292 { 293 global $conf; 294 $index = new MockLookupCollection('cim_entity', 'cim_token', 'cim_freq', 'cim_reverse'); 295 $index->lock(); 296 $index->addEntity('wiki:start', ['wiki:syntax']); 297 $index->unlock(); 298 299 // corrupt: add extra line to token index 300 file_put_contents($conf['indexdir'] . '/cim_token.idx', "extra\n", FILE_APPEND); 301 302 $this->expectException(IndexIntegrityException::class); 303 (new MockLookupCollection('cim_entity', 'cim_token', 'cim_freq', 'cim_reverse'))->checkIntegrity(); 304 } 305 306 /** 307 * checkIntegrity detects entity/reverse mismatch on non-split collection 308 */ 309 public function testCheckIntegrityEntityReverseMismatch() 310 { 311 global $conf; 312 $index = new MockLookupCollection('cir_entity', 'cir_token', 'cir_freq', 'cir_reverse'); 313 $index->lock(); 314 $index->addEntity('wiki:start', ['wiki:syntax']); 315 $index->unlock(); 316 317 // corrupt: add extra line to reverse index 318 file_put_contents($conf['indexdir'] . '/cir_reverse.idx', "0\n", FILE_APPEND); 319 320 $this->expectException(IndexIntegrityException::class); 321 (new MockLookupCollection('cir_entity', 'cir_token', 'cir_freq', 'cir_reverse'))->checkIntegrity(); 322 } 323 324 /** 325 * checkIntegrity detects missing frequency index when token index exists 326 */ 327 public function testCheckIntegrityMissingFreqIndex() 328 { 329 global $conf; 330 $index = new MockLookupCollection('cimf_entity', 'cimf_token', 'cimf_freq', 'cimf_reverse'); 331 $index->lock(); 332 $index->addEntity('wiki:start', ['wiki:syntax']); 333 $index->unlock(); 334 335 // corrupt: delete frequency index 336 @unlink($conf['indexdir'] . '/cimf_freq.idx'); 337 338 $this->expectException(IndexIntegrityException::class); 339 (new MockLookupCollection('cimf_entity', 'cimf_token', 'cimf_freq', 'cimf_reverse'))->checkIntegrity(); 340 } 341 342 /** 343 * groupToSuffix throws on non-0 group for non-split collection 344 */ 345 public function testGroupToSuffixValidation() 346 { 347 $this->expectException(\dokuwiki\Search\Exception\IndexUsageException::class); 348 349 $index = new MockLookupCollection('gs_entity', 'gs_token', 'gs_freq', 'gs_reverse'); 350 // non-split collection should reject group 5 351 $index->getTokenIndex(5); 352 } 353} 354