1*ede46466SAndreas Gohr<?php 2*ede46466SAndreas Gohr 3*ede46466SAndreas Gohrnamespace dokuwiki\test\Search\Collection; 4*ede46466SAndreas Gohr 5*ede46466SAndreas Gohruse dokuwiki\Search\Index\MemoryIndex; 6*ede46466SAndreas Gohr 7*ede46466SAndreas Gohrclass FrequencyCollectionTest extends \DokuWikiTest 8*ede46466SAndreas Gohr{ 9*ede46466SAndreas Gohr 10*ede46466SAndreas Gohr /** 11*ede46466SAndreas Gohr * Add data and directly check the underlying indexes for correctness 12*ede46466SAndreas Gohr */ 13*ede46466SAndreas Gohr public function testDirectly() 14*ede46466SAndreas Gohr { 15*ede46466SAndreas Gohr $index = new MockFrequencyCollection('entity', 'token', 'freq', 'reverse'); 16*ede46466SAndreas Gohr 17*ede46466SAndreas Gohr $tokens = ['one', 'two', 'three', 'four', 'two']; 18*ede46466SAndreas Gohr $index->lock(); 19*ede46466SAndreas Gohr $index->addEntity('test', $tokens); 20*ede46466SAndreas Gohr $index->unlock(); 21*ede46466SAndreas Gohr 22*ede46466SAndreas Gohr $idxEntity = new MemoryIndex('entity'); 23*ede46466SAndreas Gohr $this->assertEquals('test', $idxEntity->retrieveRow(0)); 24*ede46466SAndreas Gohr 25*ede46466SAndreas Gohr $idxToken = new MemoryIndex('token', '3'); 26*ede46466SAndreas Gohr $this->assertEquals('one', $idxToken->retrieveRow(0)); 27*ede46466SAndreas Gohr $this->assertEquals('two', $idxToken->retrieveRow(1)); 28*ede46466SAndreas Gohr 29*ede46466SAndreas Gohr $idxFreq = new MemoryIndex('freq', '3'); 30*ede46466SAndreas Gohr $this->assertEquals('0', $idxFreq->retrieveRow(0)); // one is 1x on page 0 (written without *1) 31*ede46466SAndreas Gohr $this->assertEquals('0*2', $idxFreq->retrieveRow(1)); // two is 2x on page 0 32*ede46466SAndreas Gohr 33*ede46466SAndreas Gohr $idxRev = new MemoryIndex('reverse'); 34*ede46466SAndreas Gohr $this->assertEquals('3*0:3*1:5*0:4*0', $idxRev->retrieveRow(0)); 35*ede46466SAndreas Gohr 36*ede46466SAndreas Gohr // remove one of the tokens 37*ede46466SAndreas Gohr $tokens = ['two', 'three', 'four', 'two']; 38*ede46466SAndreas Gohr $index->lock(); 39*ede46466SAndreas Gohr $index->addEntity('test', $tokens); 40*ede46466SAndreas Gohr $index->unlock(); 41*ede46466SAndreas Gohr 42*ede46466SAndreas Gohr $idxFreq = new MemoryIndex('freq', '3'); 43*ede46466SAndreas Gohr $this->assertEquals('', $idxFreq->retrieveRow(0)); // one is not on page 0 44*ede46466SAndreas Gohr } 45*ede46466SAndreas Gohr 46*ede46466SAndreas Gohr /** 47*ede46466SAndreas Gohr * Test reverse lookup 48*ede46466SAndreas Gohr * 49*ede46466SAndreas Gohr * A lookup for the page should return the word frequencies 50*ede46466SAndreas Gohr */ 51*ede46466SAndreas Gohr public function testReverse() 52*ede46466SAndreas Gohr { 53*ede46466SAndreas Gohr $index = new MockFrequencyCollection('page', 'word', 'w', 'pageword'); 54*ede46466SAndreas Gohr $index->lock(); 55*ede46466SAndreas Gohr $index->addEntity('wiki:syntax', ['dokuwiki']); 56*ede46466SAndreas Gohr $index->unlock(); 57*ede46466SAndreas Gohr 58*ede46466SAndreas Gohr $len = strlen('dokuwiki'); 59*ede46466SAndreas Gohr $this->assertEquals([$len => [0 => 0]], $index->getReverseAssignments('wiki:syntax')); 60*ede46466SAndreas Gohr } 61*ede46466SAndreas Gohr 62*ede46466SAndreas Gohr /** 63*ede46466SAndreas Gohr * resolveTokens should count frequencies and group by token length 64*ede46466SAndreas Gohr */ 65*ede46466SAndreas Gohr public function testResolveTokens() 66*ede46466SAndreas Gohr { 67*ede46466SAndreas Gohr $index = new MockFrequencyCollection('rt_entity', 'rt_token', 'rt_freq', 'rt_reverse'); 68*ede46466SAndreas Gohr $index->lock(); 69*ede46466SAndreas Gohr 70*ede46466SAndreas Gohr $result = $this->callInaccessibleMethod($index, 'resolveTokens', [ 71*ede46466SAndreas Gohr ['one', 'two', 'two', 'three'], 72*ede46466SAndreas Gohr ]); 73*ede46466SAndreas Gohr 74*ede46466SAndreas Gohr // 'one' and 'two' are 3 chars, 'three' is 5 chars 75*ede46466SAndreas Gohr $this->assertArrayHasKey(3, $result); 76*ede46466SAndreas Gohr $this->assertArrayHasKey(5, $result); 77*ede46466SAndreas Gohr 78*ede46466SAndreas Gohr // token IDs are sequential: one=0, two=1, three=0 (in its own length group) 79*ede46466SAndreas Gohr $this->assertEquals(1, $result[3][0]); // 'one' appears once 80*ede46466SAndreas Gohr $this->assertEquals(2, $result[3][1]); // 'two' appears twice 81*ede46466SAndreas Gohr $this->assertEquals(1, $result[5][0]); // 'three' appears once 82*ede46466SAndreas Gohr } 83*ede46466SAndreas Gohr 84*ede46466SAndreas Gohr /** 85*ede46466SAndreas Gohr * resolveTokens with empty input should return empty array 86*ede46466SAndreas Gohr */ 87*ede46466SAndreas Gohr public function testResolveTokensEmpty() 88*ede46466SAndreas Gohr { 89*ede46466SAndreas Gohr $index = new MockFrequencyCollection('rte_entity', 'rte_token', 'rte_freq', 'rte_reverse'); 90*ede46466SAndreas Gohr $index->lock(); 91*ede46466SAndreas Gohr 92*ede46466SAndreas Gohr $result = $this->callInaccessibleMethod($index, 'resolveTokens', [[]]); 93*ede46466SAndreas Gohr 94*ede46466SAndreas Gohr $this->assertEmpty($result); 95*ede46466SAndreas Gohr } 96*ede46466SAndreas Gohr 97*ede46466SAndreas Gohr /** 98*ede46466SAndreas Gohr * countTokens should return occurrence counts 99*ede46466SAndreas Gohr */ 100*ede46466SAndreas Gohr public function testCountTokens() 101*ede46466SAndreas Gohr { 102*ede46466SAndreas Gohr $index = new MockFrequencyCollection(); 103*ede46466SAndreas Gohr 104*ede46466SAndreas Gohr $result = $this->callInaccessibleMethod($index, 'countTokens', [ 105*ede46466SAndreas Gohr ['one', 'two', 'two', 'three', 'three', 'three'], 106*ede46466SAndreas Gohr ]); 107*ede46466SAndreas Gohr 108*ede46466SAndreas Gohr $this->assertEquals([ 109*ede46466SAndreas Gohr 'one' => 1, 110*ede46466SAndreas Gohr 'two' => 2, 111*ede46466SAndreas Gohr 'three' => 3, 112*ede46466SAndreas Gohr ], $result); 113*ede46466SAndreas Gohr } 114*ede46466SAndreas Gohr} 115