xref: /dokuwiki/_test/tests/Search/Collection/FrequencyCollectionTest.php (revision ede4646658cf51245060332d97a319a39c788ea1)
1*ede46466SAndreas Gohr<?php
2*ede46466SAndreas Gohr
3*ede46466SAndreas Gohrnamespace dokuwiki\test\Search\Collection;
4*ede46466SAndreas Gohr
5*ede46466SAndreas Gohruse dokuwiki\Search\Index\MemoryIndex;
6*ede46466SAndreas Gohr
7*ede46466SAndreas Gohrclass FrequencyCollectionTest extends \DokuWikiTest
8*ede46466SAndreas Gohr{
9*ede46466SAndreas Gohr
10*ede46466SAndreas Gohr    /**
11*ede46466SAndreas Gohr     * Add data and directly check the underlying indexes for correctness
12*ede46466SAndreas Gohr     */
13*ede46466SAndreas Gohr    public function testDirectly()
14*ede46466SAndreas Gohr    {
15*ede46466SAndreas Gohr        $index = new MockFrequencyCollection('entity', 'token', 'freq', 'reverse');
16*ede46466SAndreas Gohr
17*ede46466SAndreas Gohr        $tokens = ['one', 'two', 'three', 'four', 'two'];
18*ede46466SAndreas Gohr        $index->lock();
19*ede46466SAndreas Gohr        $index->addEntity('test', $tokens);
20*ede46466SAndreas Gohr        $index->unlock();
21*ede46466SAndreas Gohr
22*ede46466SAndreas Gohr        $idxEntity = new MemoryIndex('entity');
23*ede46466SAndreas Gohr        $this->assertEquals('test', $idxEntity->retrieveRow(0));
24*ede46466SAndreas Gohr
25*ede46466SAndreas Gohr        $idxToken = new MemoryIndex('token', '3');
26*ede46466SAndreas Gohr        $this->assertEquals('one', $idxToken->retrieveRow(0));
27*ede46466SAndreas Gohr        $this->assertEquals('two', $idxToken->retrieveRow(1));
28*ede46466SAndreas Gohr
29*ede46466SAndreas Gohr        $idxFreq = new MemoryIndex('freq', '3');
30*ede46466SAndreas Gohr        $this->assertEquals('0', $idxFreq->retrieveRow(0)); // one is 1x on page 0 (written without *1)
31*ede46466SAndreas Gohr        $this->assertEquals('0*2', $idxFreq->retrieveRow(1)); // two is 2x on page 0
32*ede46466SAndreas Gohr
33*ede46466SAndreas Gohr        $idxRev = new MemoryIndex('reverse');
34*ede46466SAndreas Gohr        $this->assertEquals('3*0:3*1:5*0:4*0', $idxRev->retrieveRow(0));
35*ede46466SAndreas Gohr
36*ede46466SAndreas Gohr        // remove one of the tokens
37*ede46466SAndreas Gohr        $tokens = ['two', 'three', 'four', 'two'];
38*ede46466SAndreas Gohr        $index->lock();
39*ede46466SAndreas Gohr        $index->addEntity('test', $tokens);
40*ede46466SAndreas Gohr        $index->unlock();
41*ede46466SAndreas Gohr
42*ede46466SAndreas Gohr        $idxFreq = new MemoryIndex('freq', '3');
43*ede46466SAndreas Gohr        $this->assertEquals('', $idxFreq->retrieveRow(0)); // one is not on page 0
44*ede46466SAndreas Gohr    }
45*ede46466SAndreas Gohr
46*ede46466SAndreas Gohr    /**
47*ede46466SAndreas Gohr     * Test reverse lookup
48*ede46466SAndreas Gohr     *
49*ede46466SAndreas Gohr     * A lookup for the page should return the word frequencies
50*ede46466SAndreas Gohr     */
51*ede46466SAndreas Gohr    public function testReverse()
52*ede46466SAndreas Gohr    {
53*ede46466SAndreas Gohr        $index = new MockFrequencyCollection('page', 'word', 'w', 'pageword');
54*ede46466SAndreas Gohr        $index->lock();
55*ede46466SAndreas Gohr        $index->addEntity('wiki:syntax', ['dokuwiki']);
56*ede46466SAndreas Gohr        $index->unlock();
57*ede46466SAndreas Gohr
58*ede46466SAndreas Gohr        $len = strlen('dokuwiki');
59*ede46466SAndreas Gohr        $this->assertEquals([$len => [0 => 0]], $index->getReverseAssignments('wiki:syntax'));
60*ede46466SAndreas Gohr    }
61*ede46466SAndreas Gohr
62*ede46466SAndreas Gohr    /**
63*ede46466SAndreas Gohr     * resolveTokens should count frequencies and group by token length
64*ede46466SAndreas Gohr     */
65*ede46466SAndreas Gohr    public function testResolveTokens()
66*ede46466SAndreas Gohr    {
67*ede46466SAndreas Gohr        $index = new MockFrequencyCollection('rt_entity', 'rt_token', 'rt_freq', 'rt_reverse');
68*ede46466SAndreas Gohr        $index->lock();
69*ede46466SAndreas Gohr
70*ede46466SAndreas Gohr        $result = $this->callInaccessibleMethod($index, 'resolveTokens', [
71*ede46466SAndreas Gohr            ['one', 'two', 'two', 'three'],
72*ede46466SAndreas Gohr        ]);
73*ede46466SAndreas Gohr
74*ede46466SAndreas Gohr        // 'one' and 'two' are 3 chars, 'three' is 5 chars
75*ede46466SAndreas Gohr        $this->assertArrayHasKey(3, $result);
76*ede46466SAndreas Gohr        $this->assertArrayHasKey(5, $result);
77*ede46466SAndreas Gohr
78*ede46466SAndreas Gohr        // token IDs are sequential: one=0, two=1, three=0 (in its own length group)
79*ede46466SAndreas Gohr        $this->assertEquals(1, $result[3][0]); // 'one' appears once
80*ede46466SAndreas Gohr        $this->assertEquals(2, $result[3][1]); // 'two' appears twice
81*ede46466SAndreas Gohr        $this->assertEquals(1, $result[5][0]); // 'three' appears once
82*ede46466SAndreas Gohr    }
83*ede46466SAndreas Gohr
84*ede46466SAndreas Gohr    /**
85*ede46466SAndreas Gohr     * resolveTokens with empty input should return empty array
86*ede46466SAndreas Gohr     */
87*ede46466SAndreas Gohr    public function testResolveTokensEmpty()
88*ede46466SAndreas Gohr    {
89*ede46466SAndreas Gohr        $index = new MockFrequencyCollection('rte_entity', 'rte_token', 'rte_freq', 'rte_reverse');
90*ede46466SAndreas Gohr        $index->lock();
91*ede46466SAndreas Gohr
92*ede46466SAndreas Gohr        $result = $this->callInaccessibleMethod($index, 'resolveTokens', [[]]);
93*ede46466SAndreas Gohr
94*ede46466SAndreas Gohr        $this->assertEmpty($result);
95*ede46466SAndreas Gohr    }
96*ede46466SAndreas Gohr
97*ede46466SAndreas Gohr    /**
98*ede46466SAndreas Gohr     * countTokens should return occurrence counts
99*ede46466SAndreas Gohr     */
100*ede46466SAndreas Gohr    public function testCountTokens()
101*ede46466SAndreas Gohr    {
102*ede46466SAndreas Gohr        $index = new MockFrequencyCollection();
103*ede46466SAndreas Gohr
104*ede46466SAndreas Gohr        $result = $this->callInaccessibleMethod($index, 'countTokens', [
105*ede46466SAndreas Gohr            ['one', 'two', 'two', 'three', 'three', 'three'],
106*ede46466SAndreas Gohr        ]);
107*ede46466SAndreas Gohr
108*ede46466SAndreas Gohr        $this->assertEquals([
109*ede46466SAndreas Gohr            'one' => 1,
110*ede46466SAndreas Gohr            'two' => 2,
111*ede46466SAndreas Gohr            'three' => 3,
112*ede46466SAndreas Gohr        ], $result);
113*ede46466SAndreas Gohr    }
114*ede46466SAndreas Gohr}
115