1ede46466SAndreas Gohr<?php 2ede46466SAndreas Gohr 3ede46466SAndreas Gohrnamespace dokuwiki\test\Search\Collection; 4ede46466SAndreas Gohr 5ede46466SAndreas Gohruse dokuwiki\Search\Collection\PageMetaCollection; 6ede46466SAndreas Gohruse dokuwiki\Search\Exception\IndexLockException; 7ede46466SAndreas Gohruse dokuwiki\Search\Index\MemoryIndex; 8ede46466SAndreas Gohr 9ede46466SAndreas Gohrclass LookupCollectionTest extends \DokuWikiTest 10ede46466SAndreas Gohr{ 11ede46466SAndreas Gohr /** 12ede46466SAndreas Gohr * Add data and directly check the underlying indexes for correctness 13ede46466SAndreas Gohr */ 14ede46466SAndreas Gohr public function testAddEntity() 15ede46466SAndreas Gohr { 16ede46466SAndreas Gohr $index = new MockLookupCollection('a_entity', 'a_token', 'a_freq', 'a_reverse'); 17ede46466SAndreas Gohr $index->lock(); 18ede46466SAndreas Gohr $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg', 'wiki:icon.svg']); 19ede46466SAndreas Gohr $index->unlock(); 20ede46466SAndreas Gohr 21ede46466SAndreas Gohr // check entity index 22ede46466SAndreas Gohr $idxEntity = new MemoryIndex('a_entity'); 23ede46466SAndreas Gohr $this->assertEquals('wiki:start', $idxEntity->retrieveRow(0)); 24ede46466SAndreas Gohr 25ede46466SAndreas Gohr // check token index (single file, no suffix) 26ede46466SAndreas Gohr $idxToken = new MemoryIndex('a_token'); 27ede46466SAndreas Gohr $this->assertEquals('wiki:logo.png', $idxToken->retrieveRow(0)); 28ede46466SAndreas Gohr $this->assertEquals('wiki:banner.jpg', $idxToken->retrieveRow(1)); 29ede46466SAndreas Gohr $this->assertEquals('wiki:icon.svg', $idxToken->retrieveRow(2)); 30ede46466SAndreas Gohr 31ede46466SAndreas Gohr // check frequency index — all frequencies are 1 (written without *1) 32ede46466SAndreas Gohr $idxFreq = new MemoryIndex('a_freq'); 33ede46466SAndreas Gohr $this->assertEquals('0', $idxFreq->retrieveRow(0)); // entity 0 with implicit freq 1 34ede46466SAndreas Gohr $this->assertEquals('0', $idxFreq->retrieveRow(1)); 35ede46466SAndreas Gohr $this->assertEquals('0', $idxFreq->retrieveRow(2)); 36ede46466SAndreas Gohr 37ede46466SAndreas Gohr // check reverse index 38ede46466SAndreas Gohr $idxRev = new MemoryIndex('a_reverse'); 39ede46466SAndreas Gohr $this->assertEquals('0:1:2', $idxRev->retrieveRow(0)); 40ede46466SAndreas Gohr } 41ede46466SAndreas Gohr 42ede46466SAndreas Gohr /** 43ede46466SAndreas Gohr * Duplicate tokens should be deduplicated 44ede46466SAndreas Gohr */ 45ede46466SAndreas Gohr public function testAddEntityDedup() 46ede46466SAndreas Gohr { 47ede46466SAndreas Gohr $index = new MockLookupCollection('b_entity', 'b_token', 'b_freq', 'b_reverse'); 48ede46466SAndreas Gohr $index->lock(); 49ede46466SAndreas Gohr $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:logo.png', 'wiki:banner.jpg']); 50ede46466SAndreas Gohr $index->unlock(); 51ede46466SAndreas Gohr 52ede46466SAndreas Gohr $idxToken = new MemoryIndex('b_token'); 53ede46466SAndreas Gohr $this->assertEquals('wiki:logo.png', $idxToken->retrieveRow(0)); 54ede46466SAndreas Gohr $this->assertEquals('wiki:banner.jpg', $idxToken->retrieveRow(1)); 55ede46466SAndreas Gohr 56ede46466SAndreas Gohr $idxRev = new MemoryIndex('b_reverse'); 57ede46466SAndreas Gohr $this->assertEquals('0:1', $idxRev->retrieveRow(0)); 58ede46466SAndreas Gohr } 59ede46466SAndreas Gohr 60ede46466SAndreas Gohr /** 61ede46466SAndreas Gohr * Updating an entity should remove old tokens and add new ones 62ede46466SAndreas Gohr */ 63ede46466SAndreas Gohr public function testUpdateEntity() 64ede46466SAndreas Gohr { 65ede46466SAndreas Gohr $index = new MockLookupCollection('c_entity', 'c_token', 'c_freq', 'c_reverse'); 66ede46466SAndreas Gohr 67ede46466SAndreas Gohr // initial add 68ede46466SAndreas Gohr $index->lock(); 69ede46466SAndreas Gohr $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg']); 70ede46466SAndreas Gohr $index->unlock(); 71ede46466SAndreas Gohr 72ede46466SAndreas Gohr // update: remove logo, keep banner, add icon 73ede46466SAndreas Gohr $index->lock(); 74ede46466SAndreas Gohr $index->addEntity('wiki:start', ['wiki:banner.jpg', 'wiki:icon.svg']); 75ede46466SAndreas Gohr $index->unlock(); 76ede46466SAndreas Gohr 77ede46466SAndreas Gohr // logo should be removed from frequency index 78ede46466SAndreas Gohr $idxFreq = new MemoryIndex('c_freq'); 79ede46466SAndreas Gohr $this->assertEquals('', $idxFreq->retrieveRow(0)); // logo removed 80ede46466SAndreas Gohr $this->assertEquals('0', $idxFreq->retrieveRow(1)); // banner still on entity 0 81ede46466SAndreas Gohr $this->assertEquals('0', $idxFreq->retrieveRow(2)); // icon added on entity 0 82ede46466SAndreas Gohr 83ede46466SAndreas Gohr // reverse index should only have banner and icon 84ede46466SAndreas Gohr $idxRev = new MemoryIndex('c_reverse'); 85ede46466SAndreas Gohr $this->assertEquals('1:2', $idxRev->retrieveRow(0)); 86ede46466SAndreas Gohr } 87ede46466SAndreas Gohr 88ede46466SAndreas Gohr /** 89ede46466SAndreas Gohr * Test reverse assignments returns two-level structure with empty group key 90ede46466SAndreas Gohr */ 91ede46466SAndreas Gohr public function testReverseAssignments() 92ede46466SAndreas Gohr { 93ede46466SAndreas Gohr $index = new MockLookupCollection('d_entity', 'd_token', 'd_freq', 'd_reverse'); 94ede46466SAndreas Gohr $index->lock(); 95ede46466SAndreas Gohr $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg']); 96ede46466SAndreas Gohr $index->unlock(); 97ede46466SAndreas Gohr 98ede46466SAndreas Gohr $result = $index->getReverseAssignments('wiki:start'); 99*6734bb8cSAndreas Gohr $this->assertEquals([0 => [0 => 0, 1 => 0]], $result); 100ede46466SAndreas Gohr } 101ede46466SAndreas Gohr 102ede46466SAndreas Gohr /** 103ede46466SAndreas Gohr * Adding entity without lock should throw exception 104ede46466SAndreas Gohr */ 105ede46466SAndreas Gohr public function testAddEntityWithoutLock() 106ede46466SAndreas Gohr { 107ede46466SAndreas Gohr $this->expectException(IndexLockException::class); 108ede46466SAndreas Gohr 109ede46466SAndreas Gohr $index = new MockLookupCollection(); 110ede46466SAndreas Gohr $index->addEntity('wiki:start', ['wiki:logo.png']); 111ede46466SAndreas Gohr } 112ede46466SAndreas Gohr 113ede46466SAndreas Gohr /** 114ede46466SAndreas Gohr * Adding empty token list should clear entity from indexes 115ede46466SAndreas Gohr */ 116ede46466SAndreas Gohr public function testEmptyTokens() 117ede46466SAndreas Gohr { 118ede46466SAndreas Gohr $index = new MockLookupCollection('f_entity', 'f_token', 'f_freq', 'f_reverse'); 119ede46466SAndreas Gohr 120ede46466SAndreas Gohr // add some tokens first 121ede46466SAndreas Gohr $index->lock(); 122ede46466SAndreas Gohr $index->addEntity('wiki:start', ['wiki:logo.png']); 123ede46466SAndreas Gohr $index->unlock(); 124ede46466SAndreas Gohr 125ede46466SAndreas Gohr // now clear 126ede46466SAndreas Gohr $index->lock(); 127ede46466SAndreas Gohr $index->addEntity('wiki:start', []); 128ede46466SAndreas Gohr $index->unlock(); 129ede46466SAndreas Gohr 130ede46466SAndreas Gohr // frequency index should be empty for this token 131ede46466SAndreas Gohr $idxFreq = new MemoryIndex('f_freq'); 132ede46466SAndreas Gohr $this->assertEquals('', $idxFreq->retrieveRow(0)); 133ede46466SAndreas Gohr 134ede46466SAndreas Gohr // reverse index should be empty 135ede46466SAndreas Gohr $idxRev = new MemoryIndex('f_reverse'); 136ede46466SAndreas Gohr $this->assertEquals('', $idxRev->retrieveRow(0)); 137ede46466SAndreas Gohr } 138ede46466SAndreas Gohr 139ede46466SAndreas Gohr /** 140ede46466SAndreas Gohr * Test that PageMetaCollection('relation_media') uses correct index names 141ede46466SAndreas Gohr */ 142ede46466SAndreas Gohr public function testMediaCollection() 143ede46466SAndreas Gohr { 144ede46466SAndreas Gohr $index = new PageMetaCollection('relation_media'); 145ede46466SAndreas Gohr $index->lock(); 146ede46466SAndreas Gohr $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg']); 147ede46466SAndreas Gohr $index->unlock(); 148ede46466SAndreas Gohr 149ede46466SAndreas Gohr $idxToken = new MemoryIndex('relation_media_w'); 150ede46466SAndreas Gohr $this->assertEquals('wiki:logo.png', $idxToken->retrieveRow(0)); 151ede46466SAndreas Gohr $this->assertEquals('wiki:banner.jpg', $idxToken->retrieveRow(1)); 152ede46466SAndreas Gohr 153ede46466SAndreas Gohr $idxRev = new MemoryIndex('relation_media_p'); 154ede46466SAndreas Gohr $this->assertEquals('0:1', $idxRev->retrieveRow(0)); 155ede46466SAndreas Gohr } 156ede46466SAndreas Gohr 157ede46466SAndreas Gohr /** 158ede46466SAndreas Gohr * Test that PageMetaCollection('relation_references') uses correct index names 159ede46466SAndreas Gohr */ 160ede46466SAndreas Gohr public function testReferencesCollection() 161ede46466SAndreas Gohr { 162ede46466SAndreas Gohr $index = new PageMetaCollection('relation_references'); 163ede46466SAndreas Gohr $index->lock(); 164ede46466SAndreas Gohr $index->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']); 165ede46466SAndreas Gohr $index->unlock(); 166ede46466SAndreas Gohr 167ede46466SAndreas Gohr $idxToken = new MemoryIndex('relation_references_w'); 168ede46466SAndreas Gohr $this->assertEquals('wiki:syntax', $idxToken->retrieveRow(0)); 169ede46466SAndreas Gohr $this->assertEquals('wiki:welcome', $idxToken->retrieveRow(1)); 170ede46466SAndreas Gohr 171ede46466SAndreas Gohr $idxRev = new MemoryIndex('relation_references_p'); 172ede46466SAndreas Gohr $this->assertEquals('0:1', $idxRev->retrieveRow(0)); 173ede46466SAndreas Gohr 174ede46466SAndreas Gohr $result = $index->getReverseAssignments('wiki:start'); 175*6734bb8cSAndreas Gohr $this->assertEquals([0 => [0 => 0, 1 => 0]], $result); 176ede46466SAndreas Gohr } 177ede46466SAndreas Gohr 178ede46466SAndreas Gohr /** 179*6734bb8cSAndreas Gohr * resolveTokens should deduplicate and assign frequency 1 under group 0 180ede46466SAndreas Gohr */ 181ede46466SAndreas Gohr public function testResolveTokens() 182ede46466SAndreas Gohr { 183ede46466SAndreas Gohr $index = new MockLookupCollection('rt_entity', 'rt_token', 'rt_freq', 'rt_reverse'); 184ede46466SAndreas Gohr $index->lock(); 185ede46466SAndreas Gohr 186ede46466SAndreas Gohr $result = $this->callInaccessibleMethod($index, 'resolveTokens', [ 187ede46466SAndreas Gohr ['wiki:logo.png', 'wiki:banner.jpg', 'wiki:logo.png'], 188ede46466SAndreas Gohr ]); 189ede46466SAndreas Gohr 190*6734bb8cSAndreas Gohr // all tokens under group 0 (non-split) 191*6734bb8cSAndreas Gohr $this->assertArrayHasKey(0, $result); 192*6734bb8cSAndreas Gohr $this->assertCount(2, $result[0]); // deduplicated 193ede46466SAndreas Gohr 194ede46466SAndreas Gohr // token IDs are sequential: logo=0, banner=1 195*6734bb8cSAndreas Gohr $this->assertEquals(1, $result[0][0]); // logo freq=1 196*6734bb8cSAndreas Gohr $this->assertEquals(1, $result[0][1]); // banner freq=1 197ede46466SAndreas Gohr } 198ede46466SAndreas Gohr 199ede46466SAndreas Gohr /** 200ede46466SAndreas Gohr * resolveTokens with empty input should return empty array 201ede46466SAndreas Gohr */ 202ede46466SAndreas Gohr public function testResolveTokensEmpty() 203ede46466SAndreas Gohr { 204ede46466SAndreas Gohr $index = new MockLookupCollection('rte_entity', 'rte_token', 'rte_freq', 'rte_reverse'); 205ede46466SAndreas Gohr $index->lock(); 206ede46466SAndreas Gohr 207ede46466SAndreas Gohr $result = $this->callInaccessibleMethod($index, 'resolveTokens', [[]]); 208ede46466SAndreas Gohr 209ede46466SAndreas Gohr $this->assertEmpty($result); 210ede46466SAndreas Gohr } 211ede46466SAndreas Gohr 212ede46466SAndreas Gohr /** 213ede46466SAndreas Gohr * countTokens should deduplicate and assign frequency 1 214ede46466SAndreas Gohr */ 215ede46466SAndreas Gohr public function testCountTokens() 216ede46466SAndreas Gohr { 217ede46466SAndreas Gohr $index = new MockLookupCollection(); 218ede46466SAndreas Gohr 219ede46466SAndreas Gohr $result = $this->callInaccessibleMethod($index, 'countTokens', [ 220ede46466SAndreas Gohr ['wiki:logo.png', 'wiki:banner.jpg', 'wiki:logo.png'], 221ede46466SAndreas Gohr ]); 222ede46466SAndreas Gohr 223ede46466SAndreas Gohr $this->assertEquals([ 224ede46466SAndreas Gohr 'wiki:logo.png' => 1, 225ede46466SAndreas Gohr 'wiki:banner.jpg' => 1, 226ede46466SAndreas Gohr ], $result); 227ede46466SAndreas Gohr } 228*6734bb8cSAndreas Gohr 229*6734bb8cSAndreas Gohr /** 230*6734bb8cSAndreas Gohr * getEntitiesWithData returns entities that have frequency data 231*6734bb8cSAndreas Gohr */ 232*6734bb8cSAndreas Gohr public function testGetEntitiesWithData() 233*6734bb8cSAndreas Gohr { 234*6734bb8cSAndreas Gohr $index = new MockLookupCollection('ewd_entity', 'ewd_token', 'ewd_freq', 'ewd_reverse'); 235*6734bb8cSAndreas Gohr $index->lock(); 236*6734bb8cSAndreas Gohr $index->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']); 237*6734bb8cSAndreas Gohr $index->addEntity('wiki:other', ['wiki:syntax']); 238*6734bb8cSAndreas Gohr $index->addEntity('wiki:empty', []); 239*6734bb8cSAndreas Gohr $index->unlock(); 240*6734bb8cSAndreas Gohr 241*6734bb8cSAndreas Gohr $result = $index->getEntitiesWithData(); 242*6734bb8cSAndreas Gohr sort($result); 243*6734bb8cSAndreas Gohr $this->assertEquals(['wiki:other', 'wiki:start'], $result); 244*6734bb8cSAndreas Gohr } 245*6734bb8cSAndreas Gohr 246*6734bb8cSAndreas Gohr /** 247*6734bb8cSAndreas Gohr * resolveTokenFrequencies returns entity frequencies for given token IDs 248*6734bb8cSAndreas Gohr */ 249*6734bb8cSAndreas Gohr public function testResolveTokenFrequencies() 250*6734bb8cSAndreas Gohr { 251*6734bb8cSAndreas Gohr $index = new MockLookupCollection('rtf_entity', 'rtf_token', 'rtf_freq', 'rtf_reverse'); 252*6734bb8cSAndreas Gohr $index->lock(); 253*6734bb8cSAndreas Gohr $index->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']); 254*6734bb8cSAndreas Gohr $index->addEntity('wiki:other', ['wiki:syntax']); 255*6734bb8cSAndreas Gohr $index->unlock(); 256*6734bb8cSAndreas Gohr 257*6734bb8cSAndreas Gohr // token ID 0 = wiki:syntax, referenced by both entities 258*6734bb8cSAndreas Gohr $result = $index->resolveTokenFrequencies(0, [0]); 259*6734bb8cSAndreas Gohr $this->assertArrayHasKey(0, $result); 260*6734bb8cSAndreas Gohr $this->assertCount(2, $result[0]); // two entities have this token 261*6734bb8cSAndreas Gohr } 262*6734bb8cSAndreas Gohr 263*6734bb8cSAndreas Gohr /** 264*6734bb8cSAndreas Gohr * groupToSuffix throws on non-0 group for non-split collection 265*6734bb8cSAndreas Gohr */ 266*6734bb8cSAndreas Gohr public function testGroupToSuffixValidation() 267*6734bb8cSAndreas Gohr { 268*6734bb8cSAndreas Gohr $this->expectException(\dokuwiki\Search\Exception\IndexUsageException::class); 269*6734bb8cSAndreas Gohr 270*6734bb8cSAndreas Gohr $index = new MockLookupCollection('gs_entity', 'gs_token', 'gs_freq', 'gs_reverse'); 271*6734bb8cSAndreas Gohr // non-split collection should reject group 5 272*6734bb8cSAndreas Gohr $index->getTokenIndex(5); 273*6734bb8cSAndreas Gohr } 274ede46466SAndreas Gohr} 275