xref: /dokuwiki/_test/tests/Search/Collection/FrequencyCollectionTest.php (revision 6734bb8cef71e8b4af23e627d4db5430304d55a2)
1ede46466SAndreas Gohr<?php
2ede46466SAndreas Gohr
3ede46466SAndreas Gohrnamespace dokuwiki\test\Search\Collection;
4ede46466SAndreas Gohr
5ede46466SAndreas Gohruse dokuwiki\Search\Index\MemoryIndex;
6ede46466SAndreas Gohr
7ede46466SAndreas Gohrclass FrequencyCollectionTest extends \DokuWikiTest
8ede46466SAndreas Gohr{
9ede46466SAndreas Gohr
10ede46466SAndreas Gohr    /**
11ede46466SAndreas Gohr     * Add data and directly check the underlying indexes for correctness
12ede46466SAndreas Gohr     */
13ede46466SAndreas Gohr    public function testDirectly()
14ede46466SAndreas Gohr    {
15ede46466SAndreas Gohr        $index = new MockFrequencyCollection('entity', 'token', 'freq', 'reverse');
16ede46466SAndreas Gohr
17ede46466SAndreas Gohr        $tokens = ['one', 'two', 'three', 'four', 'two'];
18ede46466SAndreas Gohr        $index->lock();
19ede46466SAndreas Gohr        $index->addEntity('test', $tokens);
20ede46466SAndreas Gohr        $index->unlock();
21ede46466SAndreas Gohr
22ede46466SAndreas Gohr        $idxEntity = new MemoryIndex('entity');
23ede46466SAndreas Gohr        $this->assertEquals('test', $idxEntity->retrieveRow(0));
24ede46466SAndreas Gohr
25ede46466SAndreas Gohr        $idxToken = new MemoryIndex('token', '3');
26ede46466SAndreas Gohr        $this->assertEquals('one', $idxToken->retrieveRow(0));
27ede46466SAndreas Gohr        $this->assertEquals('two', $idxToken->retrieveRow(1));
28ede46466SAndreas Gohr
29ede46466SAndreas Gohr        $idxFreq = new MemoryIndex('freq', '3');
30ede46466SAndreas Gohr        $this->assertEquals('0', $idxFreq->retrieveRow(0)); // one is 1x on page 0 (written without *1)
31ede46466SAndreas Gohr        $this->assertEquals('0*2', $idxFreq->retrieveRow(1)); // two is 2x on page 0
32ede46466SAndreas Gohr
33ede46466SAndreas Gohr        $idxRev = new MemoryIndex('reverse');
34ede46466SAndreas Gohr        $this->assertEquals('3*0:3*1:5*0:4*0', $idxRev->retrieveRow(0));
35ede46466SAndreas Gohr
36ede46466SAndreas Gohr        // remove one of the tokens
37ede46466SAndreas Gohr        $tokens = ['two', 'three', 'four', 'two'];
38ede46466SAndreas Gohr        $index->lock();
39ede46466SAndreas Gohr        $index->addEntity('test', $tokens);
40ede46466SAndreas Gohr        $index->unlock();
41ede46466SAndreas Gohr
42ede46466SAndreas Gohr        $idxFreq = new MemoryIndex('freq', '3');
43ede46466SAndreas Gohr        $this->assertEquals('', $idxFreq->retrieveRow(0)); // one is not on page 0
44ede46466SAndreas Gohr    }
45ede46466SAndreas Gohr
46ede46466SAndreas Gohr    /**
47ede46466SAndreas Gohr     * Test reverse lookup
48ede46466SAndreas Gohr     *
49ede46466SAndreas Gohr     * A lookup for the page should return the word frequencies
50ede46466SAndreas Gohr     */
51ede46466SAndreas Gohr    public function testReverse()
52ede46466SAndreas Gohr    {
53ede46466SAndreas Gohr        $index = new MockFrequencyCollection('page', 'word', 'w', 'pageword');
54ede46466SAndreas Gohr        $index->lock();
55ede46466SAndreas Gohr        $index->addEntity('wiki:syntax', ['dokuwiki']);
56ede46466SAndreas Gohr        $index->unlock();
57ede46466SAndreas Gohr
58ede46466SAndreas Gohr        $len = strlen('dokuwiki');
59ede46466SAndreas Gohr        $this->assertEquals([$len => [0 => 0]], $index->getReverseAssignments('wiki:syntax'));
60ede46466SAndreas Gohr    }
61ede46466SAndreas Gohr
62ede46466SAndreas Gohr    /**
63ede46466SAndreas Gohr     * resolveTokens should count frequencies and group by token length
64ede46466SAndreas Gohr     */
65ede46466SAndreas Gohr    public function testResolveTokens()
66ede46466SAndreas Gohr    {
67ede46466SAndreas Gohr        $index = new MockFrequencyCollection('rt_entity', 'rt_token', 'rt_freq', 'rt_reverse');
68ede46466SAndreas Gohr        $index->lock();
69ede46466SAndreas Gohr
70ede46466SAndreas Gohr        $result = $this->callInaccessibleMethod($index, 'resolveTokens', [
71ede46466SAndreas Gohr            ['one', 'two', 'two', 'three'],
72ede46466SAndreas Gohr        ]);
73ede46466SAndreas Gohr
74ede46466SAndreas Gohr        // 'one' and 'two' are 3 chars, 'three' is 5 chars
75ede46466SAndreas Gohr        $this->assertArrayHasKey(3, $result);
76ede46466SAndreas Gohr        $this->assertArrayHasKey(5, $result);
77ede46466SAndreas Gohr
78ede46466SAndreas Gohr        // token IDs are sequential: one=0, two=1, three=0 (in its own length group)
79ede46466SAndreas Gohr        $this->assertEquals(1, $result[3][0]); // 'one' appears once
80ede46466SAndreas Gohr        $this->assertEquals(2, $result[3][1]); // 'two' appears twice
81ede46466SAndreas Gohr        $this->assertEquals(1, $result[5][0]); // 'three' appears once
82ede46466SAndreas Gohr    }
83ede46466SAndreas Gohr
84ede46466SAndreas Gohr    /**
85ede46466SAndreas Gohr     * resolveTokens with empty input should return empty array
86ede46466SAndreas Gohr     */
87ede46466SAndreas Gohr    public function testResolveTokensEmpty()
88ede46466SAndreas Gohr    {
89ede46466SAndreas Gohr        $index = new MockFrequencyCollection('rte_entity', 'rte_token', 'rte_freq', 'rte_reverse');
90ede46466SAndreas Gohr        $index->lock();
91ede46466SAndreas Gohr
92ede46466SAndreas Gohr        $result = $this->callInaccessibleMethod($index, 'resolveTokens', [[]]);
93ede46466SAndreas Gohr
94ede46466SAndreas Gohr        $this->assertEmpty($result);
95ede46466SAndreas Gohr    }
96ede46466SAndreas Gohr
97ede46466SAndreas Gohr    /**
98ede46466SAndreas Gohr     * countTokens should return occurrence counts
99ede46466SAndreas Gohr     */
100ede46466SAndreas Gohr    public function testCountTokens()
101ede46466SAndreas Gohr    {
102ede46466SAndreas Gohr        $index = new MockFrequencyCollection();
103ede46466SAndreas Gohr
104ede46466SAndreas Gohr        $result = $this->callInaccessibleMethod($index, 'countTokens', [
105ede46466SAndreas Gohr            ['one', 'two', 'two', 'three', 'three', 'three'],
106ede46466SAndreas Gohr        ]);
107ede46466SAndreas Gohr
108ede46466SAndreas Gohr        $this->assertEquals([
109ede46466SAndreas Gohr            'one' => 1,
110ede46466SAndreas Gohr            'two' => 2,
111ede46466SAndreas Gohr            'three' => 3,
112ede46466SAndreas Gohr        ], $result);
113ede46466SAndreas Gohr    }
114*6734bb8cSAndreas Gohr
115*6734bb8cSAndreas Gohr    /**
116*6734bb8cSAndreas Gohr     * getEntitiesWithData on a split FrequencyCollection
117*6734bb8cSAndreas Gohr     */
118*6734bb8cSAndreas Gohr    public function testGetEntitiesWithData()
119*6734bb8cSAndreas Gohr    {
120*6734bb8cSAndreas Gohr        $index = new MockFrequencyCollection('ewd_page', 'ewd_w', 'ewd_i', 'ewd_pw');
121*6734bb8cSAndreas Gohr        $index->lock();
122*6734bb8cSAndreas Gohr        $index->addEntity('page1', ['dokuwiki', 'wiki']);
123*6734bb8cSAndreas Gohr        $index->addEntity('page2', ['other', 'words']);
124*6734bb8cSAndreas Gohr        $index->unlock();
125*6734bb8cSAndreas Gohr
126*6734bb8cSAndreas Gohr        $result = $index->getEntitiesWithData();
127*6734bb8cSAndreas Gohr        sort($result);
128*6734bb8cSAndreas Gohr        $this->assertEquals(['page1', 'page2'], $result);
129*6734bb8cSAndreas Gohr    }
130*6734bb8cSAndreas Gohr
131*6734bb8cSAndreas Gohr    /**
132*6734bb8cSAndreas Gohr     * groupToSuffix throws on group 0 for split collection
133*6734bb8cSAndreas Gohr     */
134*6734bb8cSAndreas Gohr    public function testGroupToSuffixValidationSplit()
135*6734bb8cSAndreas Gohr    {
136*6734bb8cSAndreas Gohr        $this->expectException(\dokuwiki\Search\Exception\IndexUsageException::class);
137*6734bb8cSAndreas Gohr
138*6734bb8cSAndreas Gohr        $index = new MockFrequencyCollection('gs_page', 'gs_w', 'gs_i', 'gs_pw');
139*6734bb8cSAndreas Gohr        // split collection should reject group 0
140*6734bb8cSAndreas Gohr        $index->getTokenIndex(0);
141*6734bb8cSAndreas Gohr    }
142ede46466SAndreas Gohr}
143