1ede46466SAndreas Gohr<?php 2ede46466SAndreas Gohr 3ede46466SAndreas Gohrnamespace dokuwiki\test\Search\Collection; 4ede46466SAndreas Gohr 5ede46466SAndreas Gohruse dokuwiki\Search\Index\MemoryIndex; 6ede46466SAndreas Gohr 7ede46466SAndreas Gohrclass FrequencyCollectionTest extends \DokuWikiTest 8ede46466SAndreas Gohr{ 9ede46466SAndreas Gohr 10ede46466SAndreas Gohr /** 11ede46466SAndreas Gohr * Add data and directly check the underlying indexes for correctness 12ede46466SAndreas Gohr */ 13ede46466SAndreas Gohr public function testDirectly() 14ede46466SAndreas Gohr { 15ede46466SAndreas Gohr $index = new MockFrequencyCollection('entity', 'token', 'freq', 'reverse'); 16ede46466SAndreas Gohr 17ede46466SAndreas Gohr $tokens = ['one', 'two', 'three', 'four', 'two']; 18ede46466SAndreas Gohr $index->lock(); 19ede46466SAndreas Gohr $index->addEntity('test', $tokens); 20ede46466SAndreas Gohr $index->unlock(); 21ede46466SAndreas Gohr 22ede46466SAndreas Gohr $idxEntity = new MemoryIndex('entity'); 23ede46466SAndreas Gohr $this->assertEquals('test', $idxEntity->retrieveRow(0)); 24ede46466SAndreas Gohr 25ede46466SAndreas Gohr $idxToken = new MemoryIndex('token', '3'); 26ede46466SAndreas Gohr $this->assertEquals('one', $idxToken->retrieveRow(0)); 27ede46466SAndreas Gohr $this->assertEquals('two', $idxToken->retrieveRow(1)); 28ede46466SAndreas Gohr 29ede46466SAndreas Gohr $idxFreq = new MemoryIndex('freq', '3'); 30ede46466SAndreas Gohr $this->assertEquals('0', $idxFreq->retrieveRow(0)); // one is 1x on page 0 (written without *1) 31ede46466SAndreas Gohr $this->assertEquals('0*2', $idxFreq->retrieveRow(1)); // two is 2x on page 0 32ede46466SAndreas Gohr 33ede46466SAndreas Gohr $idxRev = new MemoryIndex('reverse'); 34ede46466SAndreas Gohr $this->assertEquals('3*0:3*1:5*0:4*0', $idxRev->retrieveRow(0)); 35ede46466SAndreas Gohr 36ede46466SAndreas Gohr // remove one of the tokens 37ede46466SAndreas Gohr $tokens = ['two', 'three', 'four', 'two']; 38ede46466SAndreas Gohr $index->lock(); 39ede46466SAndreas Gohr $index->addEntity('test', $tokens); 40ede46466SAndreas Gohr $index->unlock(); 41ede46466SAndreas Gohr 42ede46466SAndreas Gohr $idxFreq = new MemoryIndex('freq', '3'); 43ede46466SAndreas Gohr $this->assertEquals('', $idxFreq->retrieveRow(0)); // one is not on page 0 44ede46466SAndreas Gohr } 45ede46466SAndreas Gohr 46ede46466SAndreas Gohr /** 47ede46466SAndreas Gohr * Test reverse lookup 48ede46466SAndreas Gohr * 49ede46466SAndreas Gohr * A lookup for the page should return the word frequencies 50ede46466SAndreas Gohr */ 51ede46466SAndreas Gohr public function testReverse() 52ede46466SAndreas Gohr { 53ede46466SAndreas Gohr $index = new MockFrequencyCollection('page', 'word', 'w', 'pageword'); 54ede46466SAndreas Gohr $index->lock(); 55ede46466SAndreas Gohr $index->addEntity('wiki:syntax', ['dokuwiki']); 56ede46466SAndreas Gohr $index->unlock(); 57ede46466SAndreas Gohr 58ede46466SAndreas Gohr $len = strlen('dokuwiki'); 59ede46466SAndreas Gohr $this->assertEquals([$len => [0 => 0]], $index->getReverseAssignments('wiki:syntax')); 60ede46466SAndreas Gohr } 61ede46466SAndreas Gohr 62ede46466SAndreas Gohr /** 63ede46466SAndreas Gohr * resolveTokens should count frequencies and group by token length 64ede46466SAndreas Gohr */ 65ede46466SAndreas Gohr public function testResolveTokens() 66ede46466SAndreas Gohr { 67ede46466SAndreas Gohr $index = new MockFrequencyCollection('rt_entity', 'rt_token', 'rt_freq', 'rt_reverse'); 68ede46466SAndreas Gohr $index->lock(); 69ede46466SAndreas Gohr 70ede46466SAndreas Gohr $result = $this->callInaccessibleMethod($index, 'resolveTokens', [ 71ede46466SAndreas Gohr ['one', 'two', 'two', 'three'], 72ede46466SAndreas Gohr ]); 73ede46466SAndreas Gohr 74ede46466SAndreas Gohr // 'one' and 'two' are 3 chars, 'three' is 5 chars 75ede46466SAndreas Gohr $this->assertArrayHasKey(3, $result); 76ede46466SAndreas Gohr $this->assertArrayHasKey(5, $result); 77ede46466SAndreas Gohr 78ede46466SAndreas Gohr // token IDs are sequential: one=0, two=1, three=0 (in its own length group) 79ede46466SAndreas Gohr $this->assertEquals(1, $result[3][0]); // 'one' appears once 80ede46466SAndreas Gohr $this->assertEquals(2, $result[3][1]); // 'two' appears twice 81ede46466SAndreas Gohr $this->assertEquals(1, $result[5][0]); // 'three' appears once 82ede46466SAndreas Gohr } 83ede46466SAndreas Gohr 84ede46466SAndreas Gohr /** 85ede46466SAndreas Gohr * resolveTokens with empty input should return empty array 86ede46466SAndreas Gohr */ 87ede46466SAndreas Gohr public function testResolveTokensEmpty() 88ede46466SAndreas Gohr { 89ede46466SAndreas Gohr $index = new MockFrequencyCollection('rte_entity', 'rte_token', 'rte_freq', 'rte_reverse'); 90ede46466SAndreas Gohr $index->lock(); 91ede46466SAndreas Gohr 92ede46466SAndreas Gohr $result = $this->callInaccessibleMethod($index, 'resolveTokens', [[]]); 93ede46466SAndreas Gohr 94ede46466SAndreas Gohr $this->assertEmpty($result); 95ede46466SAndreas Gohr } 96ede46466SAndreas Gohr 97ede46466SAndreas Gohr /** 98ede46466SAndreas Gohr * countTokens should return occurrence counts 99ede46466SAndreas Gohr */ 100ede46466SAndreas Gohr public function testCountTokens() 101ede46466SAndreas Gohr { 102ede46466SAndreas Gohr $index = new MockFrequencyCollection(); 103ede46466SAndreas Gohr 104ede46466SAndreas Gohr $result = $this->callInaccessibleMethod($index, 'countTokens', [ 105ede46466SAndreas Gohr ['one', 'two', 'two', 'three', 'three', 'three'], 106ede46466SAndreas Gohr ]); 107ede46466SAndreas Gohr 108ede46466SAndreas Gohr $this->assertEquals([ 109ede46466SAndreas Gohr 'one' => 1, 110ede46466SAndreas Gohr 'two' => 2, 111ede46466SAndreas Gohr 'three' => 3, 112ede46466SAndreas Gohr ], $result); 113ede46466SAndreas Gohr } 114*6734bb8cSAndreas Gohr 115*6734bb8cSAndreas Gohr /** 116*6734bb8cSAndreas Gohr * getEntitiesWithData on a split FrequencyCollection 117*6734bb8cSAndreas Gohr */ 118*6734bb8cSAndreas Gohr public function testGetEntitiesWithData() 119*6734bb8cSAndreas Gohr { 120*6734bb8cSAndreas Gohr $index = new MockFrequencyCollection('ewd_page', 'ewd_w', 'ewd_i', 'ewd_pw'); 121*6734bb8cSAndreas Gohr $index->lock(); 122*6734bb8cSAndreas Gohr $index->addEntity('page1', ['dokuwiki', 'wiki']); 123*6734bb8cSAndreas Gohr $index->addEntity('page2', ['other', 'words']); 124*6734bb8cSAndreas Gohr $index->unlock(); 125*6734bb8cSAndreas Gohr 126*6734bb8cSAndreas Gohr $result = $index->getEntitiesWithData(); 127*6734bb8cSAndreas Gohr sort($result); 128*6734bb8cSAndreas Gohr $this->assertEquals(['page1', 'page2'], $result); 129*6734bb8cSAndreas Gohr } 130*6734bb8cSAndreas Gohr 131*6734bb8cSAndreas Gohr /** 132*6734bb8cSAndreas Gohr * groupToSuffix throws on group 0 for split collection 133*6734bb8cSAndreas Gohr */ 134*6734bb8cSAndreas Gohr public function testGroupToSuffixValidationSplit() 135*6734bb8cSAndreas Gohr { 136*6734bb8cSAndreas Gohr $this->expectException(\dokuwiki\Search\Exception\IndexUsageException::class); 137*6734bb8cSAndreas Gohr 138*6734bb8cSAndreas Gohr $index = new MockFrequencyCollection('gs_page', 'gs_w', 'gs_i', 'gs_pw'); 139*6734bb8cSAndreas Gohr // split collection should reject group 0 140*6734bb8cSAndreas Gohr $index->getTokenIndex(0); 141*6734bb8cSAndreas Gohr } 142ede46466SAndreas Gohr} 143