1<?php 2 3namespace dokuwiki\test\Search\Collection; 4 5use dokuwiki\Search\Index\MemoryIndex; 6 7class FrequencyCollectionTest extends \DokuWikiTest 8{ 9 10 /** 11 * Add data and directly check the underlying indexes for correctness 12 */ 13 public function testDirectly() 14 { 15 $index = new MockFrequencyCollection('entity', 'token', 'freq', 'reverse'); 16 17 $tokens = ['one', 'two', 'three', 'four', 'two']; 18 $index->lock(); 19 $index->addEntity('test', $tokens); 20 $index->unlock(); 21 22 $idxEntity = new MemoryIndex('entity'); 23 $this->assertEquals('test', $idxEntity->retrieveRow(0)); 24 25 $idxToken = new MemoryIndex('token', '3'); 26 $this->assertEquals('one', $idxToken->retrieveRow(0)); 27 $this->assertEquals('two', $idxToken->retrieveRow(1)); 28 29 $idxFreq = new MemoryIndex('freq', '3'); 30 $this->assertEquals('0', $idxFreq->retrieveRow(0)); // one is 1x on page 0 (written without *1) 31 $this->assertEquals('0*2', $idxFreq->retrieveRow(1)); // two is 2x on page 0 32 33 $idxRev = new MemoryIndex('reverse'); 34 $this->assertEquals('3*0:3*1:5*0:4*0', $idxRev->retrieveRow(0)); 35 36 // remove one of the tokens 37 $tokens = ['two', 'three', 'four', 'two']; 38 $index->lock(); 39 $index->addEntity('test', $tokens); 40 $index->unlock(); 41 42 $idxFreq = new MemoryIndex('freq', '3'); 43 $this->assertEquals('', $idxFreq->retrieveRow(0)); // one is not on page 0 44 } 45 46 /** 47 * Test reverse lookup 48 * 49 * A lookup for the page should return the word frequencies 50 */ 51 public function testReverse() 52 { 53 $index = new MockFrequencyCollection('page', 'word', 'w', 'pageword'); 54 $index->lock(); 55 $index->addEntity('wiki:syntax', ['dokuwiki']); 56 $index->unlock(); 57 58 $len = strlen('dokuwiki'); 59 $this->assertEquals([$len => [0 => 0]], $index->getReverseAssignments('wiki:syntax')); 60 } 61 62 /** 63 * resolveTokens should count frequencies and group by token length 64 */ 65 public function testResolveTokens() 66 { 67 $index = new MockFrequencyCollection('rt_entity', 'rt_token', 'rt_freq', 'rt_reverse'); 68 $index->lock(); 69 70 $result = $this->callInaccessibleMethod($index, 'resolveTokens', [ 71 ['one', 'two', 'two', 'three'], 72 ]); 73 74 // 'one' and 'two' are 3 chars, 'three' is 5 chars 75 $this->assertArrayHasKey(3, $result); 76 $this->assertArrayHasKey(5, $result); 77 78 // token IDs are sequential: one=0, two=1, three=0 (in its own length group) 79 $this->assertEquals(1, $result[3][0]); // 'one' appears once 80 $this->assertEquals(2, $result[3][1]); // 'two' appears twice 81 $this->assertEquals(1, $result[5][0]); // 'three' appears once 82 } 83 84 /** 85 * resolveTokens with empty input should return empty array 86 */ 87 public function testResolveTokensEmpty() 88 { 89 $index = new MockFrequencyCollection('rte_entity', 'rte_token', 'rte_freq', 'rte_reverse'); 90 $index->lock(); 91 92 $result = $this->callInaccessibleMethod($index, 'resolveTokens', [[]]); 93 94 $this->assertEmpty($result); 95 } 96 97 /** 98 * countTokens should return occurrence counts 99 */ 100 public function testCountTokens() 101 { 102 $index = new MockFrequencyCollection(); 103 104 $result = $this->callInaccessibleMethod($index, 'countTokens', [ 105 ['one', 'two', 'two', 'three', 'three', 'three'], 106 ]); 107 108 $this->assertEquals([ 109 'one' => 1, 110 'two' => 2, 111 'three' => 3, 112 ], $result); 113 } 114} 115