1<?php 2 3namespace dokuwiki\test\Search\Collection; 4 5use dokuwiki\Search\Exception\IndexIntegrityException; 6use dokuwiki\Search\Index\MemoryIndex; 7 8class FrequencyCollectionTest extends \DokuWikiTest 9{ 10 11 /** 12 * Add data and directly check the underlying indexes for correctness 13 */ 14 public function testDirectly() 15 { 16 $index = new MockFrequencyCollection('entity', 'token', 'freq', 'reverse'); 17 18 $tokens = ['one', 'two', 'three', 'four', 'two']; 19 $index->lock(); 20 $index->addEntity('test', $tokens); 21 $index->unlock(); 22 23 $idxEntity = new MemoryIndex('entity'); 24 $this->assertEquals('test', $idxEntity->retrieveRow(0)); 25 26 $idxToken = new MemoryIndex('token', '3'); 27 $this->assertEquals('one', $idxToken->retrieveRow(0)); 28 $this->assertEquals('two', $idxToken->retrieveRow(1)); 29 30 $idxFreq = new MemoryIndex('freq', '3'); 31 $this->assertEquals('0', $idxFreq->retrieveRow(0)); // one is 1x on page 0 (written without *1) 32 $this->assertEquals('0*2', $idxFreq->retrieveRow(1)); // two is 2x on page 0 33 34 $idxRev = new MemoryIndex('reverse'); 35 $this->assertEquals('3*0:3*1:5*0:4*0', $idxRev->retrieveRow(0)); 36 37 // remove one of the tokens 38 $tokens = ['two', 'three', 'four', 'two']; 39 $index->lock(); 40 $index->addEntity('test', $tokens); 41 $index->unlock(); 42 43 $idxFreq = new MemoryIndex('freq', '3'); 44 $this->assertEquals('', $idxFreq->retrieveRow(0)); // one is not on page 0 45 } 46 47 /** 48 * Test reverse lookup 49 * 50 * A lookup for the page should return the word frequencies 51 */ 52 public function testReverse() 53 { 54 $index = new MockFrequencyCollection('page', 'word', 'w', 'pageword'); 55 $index->lock(); 56 $index->addEntity('wiki:syntax', ['dokuwiki']); 57 $index->unlock(); 58 59 $len = strlen('dokuwiki'); 60 $this->assertEquals([$len => [0 => 0]], $index->getReverseAssignments('wiki:syntax')); 61 } 62 63 /** 64 * resolveTokens should count frequencies and group by token length 65 */ 66 public function testResolveTokens() 67 { 68 $index = new MockFrequencyCollection('rt_entity', 'rt_token', 'rt_freq', 'rt_reverse'); 69 $index->lock(); 70 71 $result = $this->callInaccessibleMethod($index, 'resolveTokens', [ 72 ['one', 'two', 'two', 'three'], 73 ]); 74 75 // 'one' and 'two' are 3 chars, 'three' is 5 chars 76 $this->assertArrayHasKey(3, $result); 77 $this->assertArrayHasKey(5, $result); 78 79 // token IDs are sequential: one=0, two=1, three=0 (in its own length group) 80 $this->assertEquals(1, $result[3][0]); // 'one' appears once 81 $this->assertEquals(2, $result[3][1]); // 'two' appears twice 82 $this->assertEquals(1, $result[5][0]); // 'three' appears once 83 } 84 85 /** 86 * resolveTokens with empty input should return empty array 87 */ 88 public function testResolveTokensEmpty() 89 { 90 $index = new MockFrequencyCollection('rte_entity', 'rte_token', 'rte_freq', 'rte_reverse'); 91 $index->lock(); 92 93 $result = $this->callInaccessibleMethod($index, 'resolveTokens', [[]]); 94 95 $this->assertEmpty($result); 96 } 97 98 /** 99 * countTokens should return occurrence counts 100 */ 101 public function testCountTokens() 102 { 103 $index = new MockFrequencyCollection(); 104 105 $result = $this->callInaccessibleMethod($index, 'countTokens', [ 106 ['one', 'two', 'two', 'three', 'three', 'three'], 107 ]); 108 109 $this->assertEquals([ 110 'one' => 1, 111 'two' => 2, 112 'three' => 3, 113 ], $result); 114 } 115 116 /** 117 * getEntitiesWithData on a split FrequencyCollection 118 */ 119 public function testGetEntitiesWithData() 120 { 121 $index = new MockFrequencyCollection('ewd_page', 'ewd_w', 'ewd_i', 'ewd_pw'); 122 $index->lock(); 123 $index->addEntity('page1', ['dokuwiki', 'wiki']); 124 $index->addEntity('page2', ['other', 'words']); 125 $index->unlock(); 126 127 $result = $index->getEntitiesWithData(); 128 sort($result); 129 $this->assertEquals(['page1', 'page2'], $result); 130 } 131 132 /** 133 * getEntitiesWithData on an empty split collection returns empty array 134 */ 135 public function testGetEntitiesWithDataEmpty() 136 { 137 $index = new MockFrequencyCollection('empty_page', 'empty_w', 'empty_i', 'empty_pw'); 138 $result = $index->getEntitiesWithData(); 139 $this->assertEquals([], $result); 140 } 141 142 /** 143 * checkIntegrity on an empty split collection does not throw 144 */ 145 public function testCheckIntegrityEmpty() 146 { 147 $index = new MockFrequencyCollection('ci_page', 'ci_w', 'ci_i', 'ci_pw'); 148 $index->checkIntegrity(); 149 $this->assertTrue(true); // no exception thrown 150 } 151 152 /** 153 * checkIntegrity passes on a healthy split collection 154 */ 155 public function testCheckIntegrityHealthy() 156 { 157 $index = new MockFrequencyCollection('cih_page', 'cih_w', 'cih_i', 'cih_pw'); 158 $index->lock(); 159 $index->addEntity('page1', ['dokuwiki', 'wiki']); 160 $index->unlock(); 161 162 $index->checkIntegrity(); // should not throw 163 $this->assertTrue(true); 164 } 165 166 /** 167 * checkIntegrity detects missing frequency index for a group 168 */ 169 public function testCheckIntegrityMissingFreqIndex() 170 { 171 global $conf; 172 $index = new MockFrequencyCollection('cimf_page', 'cimf_w', 'cimf_i', 'cimf_pw'); 173 $index->lock(); 174 $index->addEntity('page1', ['dokuwiki', 'wiki']); 175 $index->unlock(); 176 177 // find a group that exists and delete its frequency index 178 $max = $index->getTokenIndexMaximum(); 179 @unlink($conf['indexdir'] . '/cimf_i' . $max . '.idx'); 180 181 $this->expectException(IndexIntegrityException::class); 182 (new MockFrequencyCollection('cimf_page', 'cimf_w', 'cimf_i', 'cimf_pw'))->checkIntegrity(); 183 } 184 185 /** 186 * checkIntegrity detects missing token index for a group 187 */ 188 public function testCheckIntegrityMissingTokenIndex() 189 { 190 global $conf; 191 $index = new MockFrequencyCollection('cimt_page', 'cimt_w', 'cimt_i', 'cimt_pw'); 192 $index->lock(); 193 // use words of different lengths to create multiple groups 194 $index->addEntity('page1', ['hi', 'dokuwiki', 'wiki']); 195 $index->unlock(); 196 197 // delete the token index for the shortest group (not the max) 198 @unlink($conf['indexdir'] . '/cimt_w2.idx'); 199 200 $this->expectException(IndexIntegrityException::class); 201 (new MockFrequencyCollection('cimt_page', 'cimt_w', 'cimt_i', 'cimt_pw'))->checkIntegrity(); 202 } 203 204 /** 205 * groupToSuffix throws on group 0 for split collection 206 */ 207 public function testGroupToSuffixValidationSplit() 208 { 209 $this->expectException(\dokuwiki\Search\Exception\IndexUsageException::class); 210 211 $index = new MockFrequencyCollection('gs_page', 'gs_w', 'gs_i', 'gs_pw'); 212 // split collection should reject group 0 213 $index->getTokenIndex(0); 214 } 215} 216