xref: /dokuwiki/_test/tests/Search/Collection/LookupCollectionTest.php (revision 6734bb8cef71e8b4af23e627d4db5430304d55a2)
1ede46466SAndreas Gohr<?php
2ede46466SAndreas Gohr
3ede46466SAndreas Gohrnamespace dokuwiki\test\Search\Collection;
4ede46466SAndreas Gohr
5ede46466SAndreas Gohruse dokuwiki\Search\Collection\PageMetaCollection;
6ede46466SAndreas Gohruse dokuwiki\Search\Exception\IndexLockException;
7ede46466SAndreas Gohruse dokuwiki\Search\Index\MemoryIndex;
8ede46466SAndreas Gohr
9ede46466SAndreas Gohrclass LookupCollectionTest extends \DokuWikiTest
10ede46466SAndreas Gohr{
11ede46466SAndreas Gohr    /**
12ede46466SAndreas Gohr     * Add data and directly check the underlying indexes for correctness
13ede46466SAndreas Gohr     */
14ede46466SAndreas Gohr    public function testAddEntity()
15ede46466SAndreas Gohr    {
16ede46466SAndreas Gohr        $index = new MockLookupCollection('a_entity', 'a_token', 'a_freq', 'a_reverse');
17ede46466SAndreas Gohr        $index->lock();
18ede46466SAndreas Gohr        $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg', 'wiki:icon.svg']);
19ede46466SAndreas Gohr        $index->unlock();
20ede46466SAndreas Gohr
21ede46466SAndreas Gohr        // check entity index
22ede46466SAndreas Gohr        $idxEntity = new MemoryIndex('a_entity');
23ede46466SAndreas Gohr        $this->assertEquals('wiki:start', $idxEntity->retrieveRow(0));
24ede46466SAndreas Gohr
25ede46466SAndreas Gohr        // check token index (single file, no suffix)
26ede46466SAndreas Gohr        $idxToken = new MemoryIndex('a_token');
27ede46466SAndreas Gohr        $this->assertEquals('wiki:logo.png', $idxToken->retrieveRow(0));
28ede46466SAndreas Gohr        $this->assertEquals('wiki:banner.jpg', $idxToken->retrieveRow(1));
29ede46466SAndreas Gohr        $this->assertEquals('wiki:icon.svg', $idxToken->retrieveRow(2));
30ede46466SAndreas Gohr
31ede46466SAndreas Gohr        // check frequency index — all frequencies are 1 (written without *1)
32ede46466SAndreas Gohr        $idxFreq = new MemoryIndex('a_freq');
33ede46466SAndreas Gohr        $this->assertEquals('0', $idxFreq->retrieveRow(0)); // entity 0 with implicit freq 1
34ede46466SAndreas Gohr        $this->assertEquals('0', $idxFreq->retrieveRow(1));
35ede46466SAndreas Gohr        $this->assertEquals('0', $idxFreq->retrieveRow(2));
36ede46466SAndreas Gohr
37ede46466SAndreas Gohr        // check reverse index
38ede46466SAndreas Gohr        $idxRev = new MemoryIndex('a_reverse');
39ede46466SAndreas Gohr        $this->assertEquals('0:1:2', $idxRev->retrieveRow(0));
40ede46466SAndreas Gohr    }
41ede46466SAndreas Gohr
42ede46466SAndreas Gohr    /**
43ede46466SAndreas Gohr     * Duplicate tokens should be deduplicated
44ede46466SAndreas Gohr     */
45ede46466SAndreas Gohr    public function testAddEntityDedup()
46ede46466SAndreas Gohr    {
47ede46466SAndreas Gohr        $index = new MockLookupCollection('b_entity', 'b_token', 'b_freq', 'b_reverse');
48ede46466SAndreas Gohr        $index->lock();
49ede46466SAndreas Gohr        $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:logo.png', 'wiki:banner.jpg']);
50ede46466SAndreas Gohr        $index->unlock();
51ede46466SAndreas Gohr
52ede46466SAndreas Gohr        $idxToken = new MemoryIndex('b_token');
53ede46466SAndreas Gohr        $this->assertEquals('wiki:logo.png', $idxToken->retrieveRow(0));
54ede46466SAndreas Gohr        $this->assertEquals('wiki:banner.jpg', $idxToken->retrieveRow(1));
55ede46466SAndreas Gohr
56ede46466SAndreas Gohr        $idxRev = new MemoryIndex('b_reverse');
57ede46466SAndreas Gohr        $this->assertEquals('0:1', $idxRev->retrieveRow(0));
58ede46466SAndreas Gohr    }
59ede46466SAndreas Gohr
60ede46466SAndreas Gohr    /**
61ede46466SAndreas Gohr     * Updating an entity should remove old tokens and add new ones
62ede46466SAndreas Gohr     */
63ede46466SAndreas Gohr    public function testUpdateEntity()
64ede46466SAndreas Gohr    {
65ede46466SAndreas Gohr        $index = new MockLookupCollection('c_entity', 'c_token', 'c_freq', 'c_reverse');
66ede46466SAndreas Gohr
67ede46466SAndreas Gohr        // initial add
68ede46466SAndreas Gohr        $index->lock();
69ede46466SAndreas Gohr        $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg']);
70ede46466SAndreas Gohr        $index->unlock();
71ede46466SAndreas Gohr
72ede46466SAndreas Gohr        // update: remove logo, keep banner, add icon
73ede46466SAndreas Gohr        $index->lock();
74ede46466SAndreas Gohr        $index->addEntity('wiki:start', ['wiki:banner.jpg', 'wiki:icon.svg']);
75ede46466SAndreas Gohr        $index->unlock();
76ede46466SAndreas Gohr
77ede46466SAndreas Gohr        // logo should be removed from frequency index
78ede46466SAndreas Gohr        $idxFreq = new MemoryIndex('c_freq');
79ede46466SAndreas Gohr        $this->assertEquals('', $idxFreq->retrieveRow(0)); // logo removed
80ede46466SAndreas Gohr        $this->assertEquals('0', $idxFreq->retrieveRow(1)); // banner still on entity 0
81ede46466SAndreas Gohr        $this->assertEquals('0', $idxFreq->retrieveRow(2)); // icon added on entity 0
82ede46466SAndreas Gohr
83ede46466SAndreas Gohr        // reverse index should only have banner and icon
84ede46466SAndreas Gohr        $idxRev = new MemoryIndex('c_reverse');
85ede46466SAndreas Gohr        $this->assertEquals('1:2', $idxRev->retrieveRow(0));
86ede46466SAndreas Gohr    }
87ede46466SAndreas Gohr
88ede46466SAndreas Gohr    /**
89ede46466SAndreas Gohr     * Test reverse assignments returns two-level structure with empty group key
90ede46466SAndreas Gohr     */
91ede46466SAndreas Gohr    public function testReverseAssignments()
92ede46466SAndreas Gohr    {
93ede46466SAndreas Gohr        $index = new MockLookupCollection('d_entity', 'd_token', 'd_freq', 'd_reverse');
94ede46466SAndreas Gohr        $index->lock();
95ede46466SAndreas Gohr        $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg']);
96ede46466SAndreas Gohr        $index->unlock();
97ede46466SAndreas Gohr
98ede46466SAndreas Gohr        $result = $index->getReverseAssignments('wiki:start');
99*6734bb8cSAndreas Gohr        $this->assertEquals([0 => [0 => 0, 1 => 0]], $result);
100ede46466SAndreas Gohr    }
101ede46466SAndreas Gohr
102ede46466SAndreas Gohr    /**
103ede46466SAndreas Gohr     * Adding entity without lock should throw exception
104ede46466SAndreas Gohr     */
105ede46466SAndreas Gohr    public function testAddEntityWithoutLock()
106ede46466SAndreas Gohr    {
107ede46466SAndreas Gohr        $this->expectException(IndexLockException::class);
108ede46466SAndreas Gohr
109ede46466SAndreas Gohr        $index = new MockLookupCollection();
110ede46466SAndreas Gohr        $index->addEntity('wiki:start', ['wiki:logo.png']);
111ede46466SAndreas Gohr    }
112ede46466SAndreas Gohr
113ede46466SAndreas Gohr    /**
114ede46466SAndreas Gohr     * Adding empty token list should clear entity from indexes
115ede46466SAndreas Gohr     */
116ede46466SAndreas Gohr    public function testEmptyTokens()
117ede46466SAndreas Gohr    {
118ede46466SAndreas Gohr        $index = new MockLookupCollection('f_entity', 'f_token', 'f_freq', 'f_reverse');
119ede46466SAndreas Gohr
120ede46466SAndreas Gohr        // add some tokens first
121ede46466SAndreas Gohr        $index->lock();
122ede46466SAndreas Gohr        $index->addEntity('wiki:start', ['wiki:logo.png']);
123ede46466SAndreas Gohr        $index->unlock();
124ede46466SAndreas Gohr
125ede46466SAndreas Gohr        // now clear
126ede46466SAndreas Gohr        $index->lock();
127ede46466SAndreas Gohr        $index->addEntity('wiki:start', []);
128ede46466SAndreas Gohr        $index->unlock();
129ede46466SAndreas Gohr
130ede46466SAndreas Gohr        // frequency index should be empty for this token
131ede46466SAndreas Gohr        $idxFreq = new MemoryIndex('f_freq');
132ede46466SAndreas Gohr        $this->assertEquals('', $idxFreq->retrieveRow(0));
133ede46466SAndreas Gohr
134ede46466SAndreas Gohr        // reverse index should be empty
135ede46466SAndreas Gohr        $idxRev = new MemoryIndex('f_reverse');
136ede46466SAndreas Gohr        $this->assertEquals('', $idxRev->retrieveRow(0));
137ede46466SAndreas Gohr    }
138ede46466SAndreas Gohr
139ede46466SAndreas Gohr    /**
140ede46466SAndreas Gohr     * Test that PageMetaCollection('relation_media') uses correct index names
141ede46466SAndreas Gohr     */
142ede46466SAndreas Gohr    public function testMediaCollection()
143ede46466SAndreas Gohr    {
144ede46466SAndreas Gohr        $index = new PageMetaCollection('relation_media');
145ede46466SAndreas Gohr        $index->lock();
146ede46466SAndreas Gohr        $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg']);
147ede46466SAndreas Gohr        $index->unlock();
148ede46466SAndreas Gohr
149ede46466SAndreas Gohr        $idxToken = new MemoryIndex('relation_media_w');
150ede46466SAndreas Gohr        $this->assertEquals('wiki:logo.png', $idxToken->retrieveRow(0));
151ede46466SAndreas Gohr        $this->assertEquals('wiki:banner.jpg', $idxToken->retrieveRow(1));
152ede46466SAndreas Gohr
153ede46466SAndreas Gohr        $idxRev = new MemoryIndex('relation_media_p');
154ede46466SAndreas Gohr        $this->assertEquals('0:1', $idxRev->retrieveRow(0));
155ede46466SAndreas Gohr    }
156ede46466SAndreas Gohr
157ede46466SAndreas Gohr    /**
158ede46466SAndreas Gohr     * Test that PageMetaCollection('relation_references') uses correct index names
159ede46466SAndreas Gohr     */
160ede46466SAndreas Gohr    public function testReferencesCollection()
161ede46466SAndreas Gohr    {
162ede46466SAndreas Gohr        $index = new PageMetaCollection('relation_references');
163ede46466SAndreas Gohr        $index->lock();
164ede46466SAndreas Gohr        $index->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']);
165ede46466SAndreas Gohr        $index->unlock();
166ede46466SAndreas Gohr
167ede46466SAndreas Gohr        $idxToken = new MemoryIndex('relation_references_w');
168ede46466SAndreas Gohr        $this->assertEquals('wiki:syntax', $idxToken->retrieveRow(0));
169ede46466SAndreas Gohr        $this->assertEquals('wiki:welcome', $idxToken->retrieveRow(1));
170ede46466SAndreas Gohr
171ede46466SAndreas Gohr        $idxRev = new MemoryIndex('relation_references_p');
172ede46466SAndreas Gohr        $this->assertEquals('0:1', $idxRev->retrieveRow(0));
173ede46466SAndreas Gohr
174ede46466SAndreas Gohr        $result = $index->getReverseAssignments('wiki:start');
175*6734bb8cSAndreas Gohr        $this->assertEquals([0 => [0 => 0, 1 => 0]], $result);
176ede46466SAndreas Gohr    }
177ede46466SAndreas Gohr
178ede46466SAndreas Gohr    /**
179*6734bb8cSAndreas Gohr     * resolveTokens should deduplicate and assign frequency 1 under group 0
180ede46466SAndreas Gohr     */
181ede46466SAndreas Gohr    public function testResolveTokens()
182ede46466SAndreas Gohr    {
183ede46466SAndreas Gohr        $index = new MockLookupCollection('rt_entity', 'rt_token', 'rt_freq', 'rt_reverse');
184ede46466SAndreas Gohr        $index->lock();
185ede46466SAndreas Gohr
186ede46466SAndreas Gohr        $result = $this->callInaccessibleMethod($index, 'resolveTokens', [
187ede46466SAndreas Gohr            ['wiki:logo.png', 'wiki:banner.jpg', 'wiki:logo.png'],
188ede46466SAndreas Gohr        ]);
189ede46466SAndreas Gohr
190*6734bb8cSAndreas Gohr        // all tokens under group 0 (non-split)
191*6734bb8cSAndreas Gohr        $this->assertArrayHasKey(0, $result);
192*6734bb8cSAndreas Gohr        $this->assertCount(2, $result[0]); // deduplicated
193ede46466SAndreas Gohr
194ede46466SAndreas Gohr        // token IDs are sequential: logo=0, banner=1
195*6734bb8cSAndreas Gohr        $this->assertEquals(1, $result[0][0]); // logo freq=1
196*6734bb8cSAndreas Gohr        $this->assertEquals(1, $result[0][1]); // banner freq=1
197ede46466SAndreas Gohr    }
198ede46466SAndreas Gohr
199ede46466SAndreas Gohr    /**
200ede46466SAndreas Gohr     * resolveTokens with empty input should return empty array
201ede46466SAndreas Gohr     */
202ede46466SAndreas Gohr    public function testResolveTokensEmpty()
203ede46466SAndreas Gohr    {
204ede46466SAndreas Gohr        $index = new MockLookupCollection('rte_entity', 'rte_token', 'rte_freq', 'rte_reverse');
205ede46466SAndreas Gohr        $index->lock();
206ede46466SAndreas Gohr
207ede46466SAndreas Gohr        $result = $this->callInaccessibleMethod($index, 'resolveTokens', [[]]);
208ede46466SAndreas Gohr
209ede46466SAndreas Gohr        $this->assertEmpty($result);
210ede46466SAndreas Gohr    }
211ede46466SAndreas Gohr
212ede46466SAndreas Gohr    /**
213ede46466SAndreas Gohr     * countTokens should deduplicate and assign frequency 1
214ede46466SAndreas Gohr     */
215ede46466SAndreas Gohr    public function testCountTokens()
216ede46466SAndreas Gohr    {
217ede46466SAndreas Gohr        $index = new MockLookupCollection();
218ede46466SAndreas Gohr
219ede46466SAndreas Gohr        $result = $this->callInaccessibleMethod($index, 'countTokens', [
220ede46466SAndreas Gohr            ['wiki:logo.png', 'wiki:banner.jpg', 'wiki:logo.png'],
221ede46466SAndreas Gohr        ]);
222ede46466SAndreas Gohr
223ede46466SAndreas Gohr        $this->assertEquals([
224ede46466SAndreas Gohr            'wiki:logo.png' => 1,
225ede46466SAndreas Gohr            'wiki:banner.jpg' => 1,
226ede46466SAndreas Gohr        ], $result);
227ede46466SAndreas Gohr    }
228*6734bb8cSAndreas Gohr
229*6734bb8cSAndreas Gohr    /**
230*6734bb8cSAndreas Gohr     * getEntitiesWithData returns entities that have frequency data
231*6734bb8cSAndreas Gohr     */
232*6734bb8cSAndreas Gohr    public function testGetEntitiesWithData()
233*6734bb8cSAndreas Gohr    {
234*6734bb8cSAndreas Gohr        $index = new MockLookupCollection('ewd_entity', 'ewd_token', 'ewd_freq', 'ewd_reverse');
235*6734bb8cSAndreas Gohr        $index->lock();
236*6734bb8cSAndreas Gohr        $index->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']);
237*6734bb8cSAndreas Gohr        $index->addEntity('wiki:other', ['wiki:syntax']);
238*6734bb8cSAndreas Gohr        $index->addEntity('wiki:empty', []);
239*6734bb8cSAndreas Gohr        $index->unlock();
240*6734bb8cSAndreas Gohr
241*6734bb8cSAndreas Gohr        $result = $index->getEntitiesWithData();
242*6734bb8cSAndreas Gohr        sort($result);
243*6734bb8cSAndreas Gohr        $this->assertEquals(['wiki:other', 'wiki:start'], $result);
244*6734bb8cSAndreas Gohr    }
245*6734bb8cSAndreas Gohr
246*6734bb8cSAndreas Gohr    /**
247*6734bb8cSAndreas Gohr     * resolveTokenFrequencies returns entity frequencies for given token IDs
248*6734bb8cSAndreas Gohr     */
249*6734bb8cSAndreas Gohr    public function testResolveTokenFrequencies()
250*6734bb8cSAndreas Gohr    {
251*6734bb8cSAndreas Gohr        $index = new MockLookupCollection('rtf_entity', 'rtf_token', 'rtf_freq', 'rtf_reverse');
252*6734bb8cSAndreas Gohr        $index->lock();
253*6734bb8cSAndreas Gohr        $index->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']);
254*6734bb8cSAndreas Gohr        $index->addEntity('wiki:other', ['wiki:syntax']);
255*6734bb8cSAndreas Gohr        $index->unlock();
256*6734bb8cSAndreas Gohr
257*6734bb8cSAndreas Gohr        // token ID 0 = wiki:syntax, referenced by both entities
258*6734bb8cSAndreas Gohr        $result = $index->resolveTokenFrequencies(0, [0]);
259*6734bb8cSAndreas Gohr        $this->assertArrayHasKey(0, $result);
260*6734bb8cSAndreas Gohr        $this->assertCount(2, $result[0]); // two entities have this token
261*6734bb8cSAndreas Gohr    }
262*6734bb8cSAndreas Gohr
263*6734bb8cSAndreas Gohr    /**
264*6734bb8cSAndreas Gohr     * groupToSuffix throws on non-0 group for non-split collection
265*6734bb8cSAndreas Gohr     */
266*6734bb8cSAndreas Gohr    public function testGroupToSuffixValidation()
267*6734bb8cSAndreas Gohr    {
268*6734bb8cSAndreas Gohr        $this->expectException(\dokuwiki\Search\Exception\IndexUsageException::class);
269*6734bb8cSAndreas Gohr
270*6734bb8cSAndreas Gohr        $index = new MockLookupCollection('gs_entity', 'gs_token', 'gs_freq', 'gs_reverse');
271*6734bb8cSAndreas Gohr        // non-split collection should reject group 5
272*6734bb8cSAndreas Gohr        $index->getTokenIndex(5);
273*6734bb8cSAndreas Gohr    }
274ede46466SAndreas Gohr}
275