xref: /dokuwiki/_test/tests/Search/Collection/FrequencyCollectionTest.php (revision 6734bb8cef71e8b4af23e627d4db5430304d55a2)
1<?php
2
3namespace dokuwiki\test\Search\Collection;
4
5use dokuwiki\Search\Index\MemoryIndex;
6
7class FrequencyCollectionTest extends \DokuWikiTest
8{
9
10    /**
11     * Add data and directly check the underlying indexes for correctness
12     */
13    public function testDirectly()
14    {
15        $index = new MockFrequencyCollection('entity', 'token', 'freq', 'reverse');
16
17        $tokens = ['one', 'two', 'three', 'four', 'two'];
18        $index->lock();
19        $index->addEntity('test', $tokens);
20        $index->unlock();
21
22        $idxEntity = new MemoryIndex('entity');
23        $this->assertEquals('test', $idxEntity->retrieveRow(0));
24
25        $idxToken = new MemoryIndex('token', '3');
26        $this->assertEquals('one', $idxToken->retrieveRow(0));
27        $this->assertEquals('two', $idxToken->retrieveRow(1));
28
29        $idxFreq = new MemoryIndex('freq', '3');
30        $this->assertEquals('0', $idxFreq->retrieveRow(0)); // one is 1x on page 0 (written without *1)
31        $this->assertEquals('0*2', $idxFreq->retrieveRow(1)); // two is 2x on page 0
32
33        $idxRev = new MemoryIndex('reverse');
34        $this->assertEquals('3*0:3*1:5*0:4*0', $idxRev->retrieveRow(0));
35
36        // remove one of the tokens
37        $tokens = ['two', 'three', 'four', 'two'];
38        $index->lock();
39        $index->addEntity('test', $tokens);
40        $index->unlock();
41
42        $idxFreq = new MemoryIndex('freq', '3');
43        $this->assertEquals('', $idxFreq->retrieveRow(0)); // one is not on page 0
44    }
45
46    /**
47     * Test reverse lookup
48     *
49     * A lookup for the page should return the word frequencies
50     */
51    public function testReverse()
52    {
53        $index = new MockFrequencyCollection('page', 'word', 'w', 'pageword');
54        $index->lock();
55        $index->addEntity('wiki:syntax', ['dokuwiki']);
56        $index->unlock();
57
58        $len = strlen('dokuwiki');
59        $this->assertEquals([$len => [0 => 0]], $index->getReverseAssignments('wiki:syntax'));
60    }
61
62    /**
63     * resolveTokens should count frequencies and group by token length
64     */
65    public function testResolveTokens()
66    {
67        $index = new MockFrequencyCollection('rt_entity', 'rt_token', 'rt_freq', 'rt_reverse');
68        $index->lock();
69
70        $result = $this->callInaccessibleMethod($index, 'resolveTokens', [
71            ['one', 'two', 'two', 'three'],
72        ]);
73
74        // 'one' and 'two' are 3 chars, 'three' is 5 chars
75        $this->assertArrayHasKey(3, $result);
76        $this->assertArrayHasKey(5, $result);
77
78        // token IDs are sequential: one=0, two=1, three=0 (in its own length group)
79        $this->assertEquals(1, $result[3][0]); // 'one' appears once
80        $this->assertEquals(2, $result[3][1]); // 'two' appears twice
81        $this->assertEquals(1, $result[5][0]); // 'three' appears once
82    }
83
84    /**
85     * resolveTokens with empty input should return empty array
86     */
87    public function testResolveTokensEmpty()
88    {
89        $index = new MockFrequencyCollection('rte_entity', 'rte_token', 'rte_freq', 'rte_reverse');
90        $index->lock();
91
92        $result = $this->callInaccessibleMethod($index, 'resolveTokens', [[]]);
93
94        $this->assertEmpty($result);
95    }
96
97    /**
98     * countTokens should return occurrence counts
99     */
100    public function testCountTokens()
101    {
102        $index = new MockFrequencyCollection();
103
104        $result = $this->callInaccessibleMethod($index, 'countTokens', [
105            ['one', 'two', 'two', 'three', 'three', 'three'],
106        ]);
107
108        $this->assertEquals([
109            'one' => 1,
110            'two' => 2,
111            'three' => 3,
112        ], $result);
113    }
114
115    /**
116     * getEntitiesWithData on a split FrequencyCollection
117     */
118    public function testGetEntitiesWithData()
119    {
120        $index = new MockFrequencyCollection('ewd_page', 'ewd_w', 'ewd_i', 'ewd_pw');
121        $index->lock();
122        $index->addEntity('page1', ['dokuwiki', 'wiki']);
123        $index->addEntity('page2', ['other', 'words']);
124        $index->unlock();
125
126        $result = $index->getEntitiesWithData();
127        sort($result);
128        $this->assertEquals(['page1', 'page2'], $result);
129    }
130
131    /**
132     * groupToSuffix throws on group 0 for split collection
133     */
134    public function testGroupToSuffixValidationSplit()
135    {
136        $this->expectException(\dokuwiki\Search\Exception\IndexUsageException::class);
137
138        $index = new MockFrequencyCollection('gs_page', 'gs_w', 'gs_i', 'gs_pw');
139        // split collection should reject group 0
140        $index->getTokenIndex(0);
141    }
142}
143