xref: /dokuwiki/_test/tests/Search/Collection/LookupCollectionTest.php (revision 1148921de6af6909f19cb5b30b698d0f27d7751e)
1<?php
2
3namespace dokuwiki\test\Search\Collection;
4
5use dokuwiki\Search\Collection\PageMetaCollection;
6use dokuwiki\Search\Exception\IndexIntegrityException;
7use dokuwiki\Search\Exception\IndexLockException;
8use dokuwiki\Search\Index\MemoryIndex;
9
10class LookupCollectionTest extends \DokuWikiTest
11{
12    /**
13     * Add data and directly check the underlying indexes for correctness
14     */
15    public function testAddEntity()
16    {
17        $index = new MockLookupCollection('a_entity', 'a_token', 'a_freq', 'a_reverse');
18        $index->lock();
19        $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg', 'wiki:icon.svg']);
20        $index->unlock();
21
22        // check entity index
23        $idxEntity = new MemoryIndex('a_entity');
24        $this->assertEquals('wiki:start', $idxEntity->retrieveRow(0));
25
26        // check token index (single file, no suffix)
27        $idxToken = new MemoryIndex('a_token');
28        $this->assertEquals('wiki:logo.png', $idxToken->retrieveRow(0));
29        $this->assertEquals('wiki:banner.jpg', $idxToken->retrieveRow(1));
30        $this->assertEquals('wiki:icon.svg', $idxToken->retrieveRow(2));
31
32        // check frequency index — all frequencies are 1 (written without *1)
33        $idxFreq = new MemoryIndex('a_freq');
34        $this->assertEquals('0', $idxFreq->retrieveRow(0)); // entity 0 with implicit freq 1
35        $this->assertEquals('0', $idxFreq->retrieveRow(1));
36        $this->assertEquals('0', $idxFreq->retrieveRow(2));
37
38        // check reverse index
39        $idxRev = new MemoryIndex('a_reverse');
40        $this->assertEquals('0:1:2', $idxRev->retrieveRow(0));
41    }
42
43    /**
44     * Duplicate tokens should be deduplicated
45     */
46    public function testAddEntityDedup()
47    {
48        $index = new MockLookupCollection('b_entity', 'b_token', 'b_freq', 'b_reverse');
49        $index->lock();
50        $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:logo.png', 'wiki:banner.jpg']);
51        $index->unlock();
52
53        $idxToken = new MemoryIndex('b_token');
54        $this->assertEquals('wiki:logo.png', $idxToken->retrieveRow(0));
55        $this->assertEquals('wiki:banner.jpg', $idxToken->retrieveRow(1));
56
57        $idxRev = new MemoryIndex('b_reverse');
58        $this->assertEquals('0:1', $idxRev->retrieveRow(0));
59    }
60
61    /**
62     * Updating an entity should remove old tokens and add new ones
63     */
64    public function testUpdateEntity()
65    {
66        $index = new MockLookupCollection('c_entity', 'c_token', 'c_freq', 'c_reverse');
67
68        // initial add
69        $index->lock();
70        $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg']);
71        $index->unlock();
72
73        // update: remove logo, keep banner, add icon
74        $index->lock();
75        $index->addEntity('wiki:start', ['wiki:banner.jpg', 'wiki:icon.svg']);
76        $index->unlock();
77
78        // logo should be removed from frequency index
79        $idxFreq = new MemoryIndex('c_freq');
80        $this->assertEquals('', $idxFreq->retrieveRow(0)); // logo removed
81        $this->assertEquals('0', $idxFreq->retrieveRow(1)); // banner still on entity 0
82        $this->assertEquals('0', $idxFreq->retrieveRow(2)); // icon added on entity 0
83
84        // reverse index should only have banner and icon
85        $idxRev = new MemoryIndex('c_reverse');
86        $this->assertEquals('1:2', $idxRev->retrieveRow(0));
87    }
88
89    /**
90     * Test reverse assignments returns two-level structure with empty group key
91     */
92    public function testReverseAssignments()
93    {
94        $index = new MockLookupCollection('d_entity', 'd_token', 'd_freq', 'd_reverse');
95        $index->lock();
96        $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg']);
97        $index->unlock();
98
99        $result = $index->getReverseAssignments('wiki:start');
100        $this->assertEquals([0 => [0 => 0, 1 => 0]], $result);
101    }
102
103    /**
104     * Adding entity without lock should throw exception
105     */
106    public function testAddEntityWithoutLock()
107    {
108        $this->expectException(IndexLockException::class);
109
110        $index = new MockLookupCollection();
111        $index->addEntity('wiki:start', ['wiki:logo.png']);
112    }
113
114    /**
115     * Adding empty token list should clear entity from indexes
116     */
117    public function testEmptyTokens()
118    {
119        $index = new MockLookupCollection('f_entity', 'f_token', 'f_freq', 'f_reverse');
120
121        // add some tokens first
122        $index->lock();
123        $index->addEntity('wiki:start', ['wiki:logo.png']);
124        $index->unlock();
125
126        // now clear
127        $index->lock();
128        $index->addEntity('wiki:start', []);
129        $index->unlock();
130
131        // frequency index should be empty for this token
132        $idxFreq = new MemoryIndex('f_freq');
133        $this->assertEquals('', $idxFreq->retrieveRow(0));
134
135        // reverse index should be empty
136        $idxRev = new MemoryIndex('f_reverse');
137        $this->assertEquals('', $idxRev->retrieveRow(0));
138    }
139
140    /**
141     * Test that PageMetaCollection('relation_media') uses correct index names
142     */
143    public function testMediaCollection()
144    {
145        $index = new PageMetaCollection('relation_media');
146        $index->lock();
147        $index->addEntity('wiki:start', ['wiki:logo.png', 'wiki:banner.jpg']);
148        $index->unlock();
149
150        $idxToken = new MemoryIndex('relation_media_w');
151        $this->assertEquals('wiki:logo.png', $idxToken->retrieveRow(0));
152        $this->assertEquals('wiki:banner.jpg', $idxToken->retrieveRow(1));
153
154        $idxRev = new MemoryIndex('relation_media_p');
155        $this->assertEquals('0:1', $idxRev->retrieveRow(0));
156    }
157
158    /**
159     * Test that PageMetaCollection('relation_references') uses correct index names
160     */
161    public function testReferencesCollection()
162    {
163        $index = new PageMetaCollection('relation_references');
164        $index->lock();
165        $index->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']);
166        $index->unlock();
167
168        $idxToken = new MemoryIndex('relation_references_w');
169        $this->assertEquals('wiki:syntax', $idxToken->retrieveRow(0));
170        $this->assertEquals('wiki:welcome', $idxToken->retrieveRow(1));
171
172        $idxRev = new MemoryIndex('relation_references_p');
173        $this->assertEquals('0:1', $idxRev->retrieveRow(0));
174
175        $result = $index->getReverseAssignments('wiki:start');
176        $this->assertEquals([0 => [0 => 0, 1 => 0]], $result);
177    }
178
179    /**
180     * resolveTokens should deduplicate and assign frequency 1 under group 0
181     */
182    public function testResolveTokens()
183    {
184        $index = new MockLookupCollection('rt_entity', 'rt_token', 'rt_freq', 'rt_reverse');
185        $index->lock();
186
187        $result = $this->callInaccessibleMethod($index, 'resolveTokens', [
188            ['wiki:logo.png', 'wiki:banner.jpg', 'wiki:logo.png'],
189        ]);
190
191        // all tokens under group 0 (non-split)
192        $this->assertArrayHasKey(0, $result);
193        $this->assertCount(2, $result[0]); // deduplicated
194
195        // token IDs are sequential: logo=0, banner=1
196        $this->assertEquals(1, $result[0][0]); // logo freq=1
197        $this->assertEquals(1, $result[0][1]); // banner freq=1
198    }
199
200    /**
201     * resolveTokens with empty input should return empty array
202     */
203    public function testResolveTokensEmpty()
204    {
205        $index = new MockLookupCollection('rte_entity', 'rte_token', 'rte_freq', 'rte_reverse');
206        $index->lock();
207
208        $result = $this->callInaccessibleMethod($index, 'resolveTokens', [[]]);
209
210        $this->assertEmpty($result);
211    }
212
213    /**
214     * countTokens should deduplicate and assign frequency 1
215     */
216    public function testCountTokens()
217    {
218        $index = new MockLookupCollection();
219
220        $result = $this->callInaccessibleMethod($index, 'countTokens', [
221            ['wiki:logo.png', 'wiki:banner.jpg', 'wiki:logo.png'],
222        ]);
223
224        $this->assertEquals([
225            'wiki:logo.png' => 1,
226            'wiki:banner.jpg' => 1,
227        ], $result);
228    }
229
230    /**
231     * getEntitiesWithData returns entities that have frequency data
232     */
233    public function testGetEntitiesWithData()
234    {
235        $index = new MockLookupCollection('ewd_entity', 'ewd_token', 'ewd_freq', 'ewd_reverse');
236        $index->lock();
237        $index->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']);
238        $index->addEntity('wiki:other', ['wiki:syntax']);
239        $index->addEntity('wiki:empty', []);
240        $index->unlock();
241
242        $result = $index->getEntitiesWithData();
243        sort($result);
244        $this->assertEquals(['wiki:other', 'wiki:start'], $result);
245    }
246
247    /**
248     * resolveTokenFrequencies returns entity frequencies for given token IDs
249     */
250    public function testResolveTokenFrequencies()
251    {
252        $index = new MockLookupCollection('rtf_entity', 'rtf_token', 'rtf_freq', 'rtf_reverse');
253        $index->lock();
254        $index->addEntity('wiki:start', ['wiki:syntax', 'wiki:welcome']);
255        $index->addEntity('wiki:other', ['wiki:syntax']);
256        $index->unlock();
257
258        // token ID 0 = wiki:syntax, referenced by both entities
259        $result = $index->resolveTokenFrequencies(0, [0]);
260        $this->assertArrayHasKey(0, $result);
261        $this->assertCount(2, $result[0]); // two entities have this token
262    }
263
264    /**
265     * checkIntegrity passes on a healthy non-split collection
266     */
267    public function testCheckIntegrityHealthy()
268    {
269        $index = new MockLookupCollection('cih_entity', 'cih_token', 'cih_freq', 'cih_reverse');
270        $index->lock();
271        $index->addEntity('wiki:start', ['wiki:syntax']);
272        $index->unlock();
273
274        $index->checkIntegrity(); // should not throw
275        $this->assertTrue(true);
276    }
277
278    /**
279     * checkIntegrity passes on an empty non-split collection
280     */
281    public function testCheckIntegrityEmpty()
282    {
283        $index = new MockLookupCollection('cie_entity', 'cie_token', 'cie_freq', 'cie_reverse');
284        $index->checkIntegrity(); // should not throw
285        $this->assertTrue(true);
286    }
287
288    /**
289     * checkIntegrity detects token/frequency mismatch on non-split collection
290     */
291    public function testCheckIntegrityTokenFreqMismatch()
292    {
293        global $conf;
294        $index = new MockLookupCollection('cim_entity', 'cim_token', 'cim_freq', 'cim_reverse');
295        $index->lock();
296        $index->addEntity('wiki:start', ['wiki:syntax']);
297        $index->unlock();
298
299        // corrupt: add extra line to token index
300        file_put_contents($conf['indexdir'] . '/cim_token.idx', "extra\n", FILE_APPEND);
301
302        $this->expectException(IndexIntegrityException::class);
303        (new MockLookupCollection('cim_entity', 'cim_token', 'cim_freq', 'cim_reverse'))->checkIntegrity();
304    }
305
306    /**
307     * checkIntegrity detects entity/reverse mismatch on non-split collection
308     */
309    public function testCheckIntegrityEntityReverseMismatch()
310    {
311        global $conf;
312        $index = new MockLookupCollection('cir_entity', 'cir_token', 'cir_freq', 'cir_reverse');
313        $index->lock();
314        $index->addEntity('wiki:start', ['wiki:syntax']);
315        $index->unlock();
316
317        // corrupt: add extra line to reverse index
318        file_put_contents($conf['indexdir'] . '/cir_reverse.idx', "0\n", FILE_APPEND);
319
320        $this->expectException(IndexIntegrityException::class);
321        (new MockLookupCollection('cir_entity', 'cir_token', 'cir_freq', 'cir_reverse'))->checkIntegrity();
322    }
323
324    /**
325     * checkIntegrity detects missing frequency index when token index exists
326     */
327    public function testCheckIntegrityMissingFreqIndex()
328    {
329        global $conf;
330        $index = new MockLookupCollection('cimf_entity', 'cimf_token', 'cimf_freq', 'cimf_reverse');
331        $index->lock();
332        $index->addEntity('wiki:start', ['wiki:syntax']);
333        $index->unlock();
334
335        // corrupt: delete frequency index
336        @unlink($conf['indexdir'] . '/cimf_freq.idx');
337
338        $this->expectException(IndexIntegrityException::class);
339        (new MockLookupCollection('cimf_entity', 'cimf_token', 'cimf_freq', 'cimf_reverse'))->checkIntegrity();
340    }
341
342    /**
343     * groupToSuffix throws on non-0 group for non-split collection
344     */
345    public function testGroupToSuffixValidation()
346    {
347        $this->expectException(\dokuwiki\Search\Exception\IndexUsageException::class);
348
349        $index = new MockLookupCollection('gs_entity', 'gs_token', 'gs_freq', 'gs_reverse');
350        // non-split collection should reject group 5
351        $index->getTokenIndex(5);
352    }
353}
354