xref: /dokuwiki/_test/tests/Search/IndexerTest.php (revision 2cda016644e923dbda996c52bedee2113ba6d653) !
1ede46466SAndreas Gohr<?php
2ede46466SAndreas Gohr
3ede46466SAndreas Gohrnamespace dokuwiki\test\Search;
4ede46466SAndreas Gohr
5ede46466SAndreas Gohruse dokuwiki\Search\Indexer;
6ede46466SAndreas Gohruse dokuwiki\Search\Index\FileIndex;
72ff7e61cSAndreas Gohruse dokuwiki\Search\MetadataSearch;
8ede46466SAndreas Gohr
9ede46466SAndreas Gohr/**
10ede46466SAndreas Gohr * Tests the Indexer class
11ede46466SAndreas Gohr */
12ede46466SAndreas Gohrclass IndexerTest extends \DokuWikiTest
13ede46466SAndreas Gohr{
14ede46466SAndreas Gohr    /**
15ede46466SAndreas Gohr     * Test basic page indexing via addPage
16ede46466SAndreas Gohr     */
17ede46466SAndreas Gohr    public function testAddPage()
18ede46466SAndreas Gohr    {
19ede46466SAndreas Gohr        $indexer = new Indexer();
20ede46466SAndreas Gohr
21ede46466SAndreas Gohr        saveWikiText('testpage', 'Foo bar baz.', 'Test initialization');
22ede46466SAndreas Gohr        $indexer->addPage('testpage');
23ede46466SAndreas Gohr
24ede46466SAndreas Gohr        // page should be in the entity index
25ede46466SAndreas Gohr        $pageIndex = new FileIndex('page');
26ede46466SAndreas Gohr        $result = $pageIndex->search('/^testpage$/');
27ede46466SAndreas Gohr        $this->assertNotEmpty($result, 'testpage not found in page.idx');
28ede46466SAndreas Gohr    }
29ede46466SAndreas Gohr
30ede46466SAndreas Gohr    /**
31ede46466SAndreas Gohr     * Test that deletePage clears data
32ede46466SAndreas Gohr     */
33ede46466SAndreas Gohr    public function testDeletePage()
34ede46466SAndreas Gohr    {
35ede46466SAndreas Gohr        $indexer = new Indexer();
36ede46466SAndreas Gohr
37ede46466SAndreas Gohr        saveWikiText('delpage', 'Delete me content.', 'Test initialization');
38ede46466SAndreas Gohr        $indexer->addPage('delpage');
39ede46466SAndreas Gohr        $indexer->deletePage('delpage', true);
40ede46466SAndreas Gohr
41ede46466SAndreas Gohr        // page entity persists in page.idx but data is cleared
42ede46466SAndreas Gohr        $pageIndex = new FileIndex('page');
43ede46466SAndreas Gohr        $result = $pageIndex->search('/^delpage$/');
44ede46466SAndreas Gohr        $this->assertNotEmpty($result, 'delpage should persist in page.idx');
45ede46466SAndreas Gohr    }
46ede46466SAndreas Gohr
47ede46466SAndreas Gohr    /**
48ede46466SAndreas Gohr     * Test renamePage clears old and indexes new
49ede46466SAndreas Gohr     */
50ede46466SAndreas Gohr    public function testRenamePage()
51ede46466SAndreas Gohr    {
52ede46466SAndreas Gohr        $indexer = new Indexer();
53ede46466SAndreas Gohr
54ede46466SAndreas Gohr        saveWikiText('old_name', 'Old page content words.', 'Test initialization');
55ede46466SAndreas Gohr        $indexer->addPage('old_name');
56ede46466SAndreas Gohr
57ede46466SAndreas Gohr        $indexer->renamePage('old_name', 'new_name');
58ede46466SAndreas Gohr
59*2cda0166SAndreas Gohr        // the entity is renamed in place: new name present, old name gone
60ede46466SAndreas Gohr        $pageIndex = new FileIndex('page');
61*2cda0166SAndreas Gohr        $this->assertNotEmpty($pageIndex->search('/^new_name$/'), 'new_name not found in page.idx after rename');
62*2cda0166SAndreas Gohr        $this->assertEmpty($pageIndex->search('/^old_name$/'), 'old_name should be gone from page.idx after rename');
63ede46466SAndreas Gohr    }
64ede46466SAndreas Gohr
65ede46466SAndreas Gohr    /**
662ff7e61cSAndreas Gohr     * renamePage must preserve the renamed page's outgoing references
672ff7e61cSAndreas Gohr     *
682ff7e61cSAndreas Gohr     * The rename only changes the page's name in the index, not its content, so all of
692ff7e61cSAndreas Gohr     * its index associations - including the pages it links to (relation_references) -
702ff7e61cSAndreas Gohr     * must survive under the new name. This is what allows a page renamed early during a
712ff7e61cSAndreas Gohr     * namespace move to still be found as a backlink source for pages moved afterwards.
722ff7e61cSAndreas Gohr     * It must work even though the destination page is not on disk yet at rename time
732ff7e61cSAndreas Gohr     * (the move operation writes it only later), so re-indexing from disk cannot be relied
742ff7e61cSAndreas Gohr     * upon here.
752ff7e61cSAndreas Gohr     *
762ff7e61cSAndreas Gohr     * @see https://github.com/dokuwiki/dokuwiki - regression after the indexer rewrite
772ff7e61cSAndreas Gohr     */
782ff7e61cSAndreas Gohr    public function testRenamePagePreservesOutgoingReferences()
792ff7e61cSAndreas Gohr    {
802ff7e61cSAndreas Gohr        $indexer = new Indexer();
812ff7e61cSAndreas Gohr
822ff7e61cSAndreas Gohr        saveWikiText('refsource', '[[target:page]]', 'Test initialization');
832ff7e61cSAndreas Gohr        $indexer->addPage('refsource');
842ff7e61cSAndreas Gohr
852ff7e61cSAndreas Gohr        $search = new MetadataSearch();
862ff7e61cSAndreas Gohr
872ff7e61cSAndreas Gohr        // sanity: the source page references target:page
882ff7e61cSAndreas Gohr        $value = 'target:page';
892ff7e61cSAndreas Gohr        $this->assertEquals(['refsource'], $search->lookupKey('relation_references', $value));
902ff7e61cSAndreas Gohr
912ff7e61cSAndreas Gohr        // rename the source page WITHOUT writing the destination to disk first,
922ff7e61cSAndreas Gohr        // mimicking how the move plugin calls renamePage before saving the new page
932ff7e61cSAndreas Gohr        $indexer->renamePage('refsource', 'moved:newsource');
942ff7e61cSAndreas Gohr
952ff7e61cSAndreas Gohr        // the outgoing reference must now belong to the renamed page
962ff7e61cSAndreas Gohr        $value = 'target:page';
972ff7e61cSAndreas Gohr        $this->assertEquals(
982ff7e61cSAndreas Gohr            ['moved:newsource'],
992ff7e61cSAndreas Gohr            $search->lookupKey('relation_references', $value),
1002ff7e61cSAndreas Gohr            'rename lost the outgoing reference of the renamed page'
1012ff7e61cSAndreas Gohr        );
1022ff7e61cSAndreas Gohr    }
1032ff7e61cSAndreas Gohr
1042ff7e61cSAndreas Gohr    /**
1052ff7e61cSAndreas Gohr     * renamePage onto a name that already has its own index entry
1062ff7e61cSAndreas Gohr     *
1072ff7e61cSAndreas Gohr     * The renamed page must take over the destination name (keeping its own data) while the
1082ff7e61cSAndreas Gohr     * destination's previous data is dropped. The stale destination row must be vacated so the
1092ff7e61cSAndreas Gohr     * name resolves only to the renamed entity and does not leak as a phantom page.
1102ff7e61cSAndreas Gohr     */
1112ff7e61cSAndreas Gohr    public function testRenamePageOntoExistingPage()
1122ff7e61cSAndreas Gohr    {
1132ff7e61cSAndreas Gohr        $indexer = new Indexer();
1142ff7e61cSAndreas Gohr
1152ff7e61cSAndreas Gohr        saveWikiText('src', '[[target:fromsrc]]', 'Test initialization');
1162ff7e61cSAndreas Gohr        $indexer->addPage('src');
1172ff7e61cSAndreas Gohr        saveWikiText('dst', '[[target:fromdst]]', 'Test initialization');
1182ff7e61cSAndreas Gohr        $indexer->addPage('dst');
1192ff7e61cSAndreas Gohr
1202ff7e61cSAndreas Gohr        $indexer->renamePage('src', 'dst');
1212ff7e61cSAndreas Gohr
1222ff7e61cSAndreas Gohr        $search = new MetadataSearch();
1232ff7e61cSAndreas Gohr
1242ff7e61cSAndreas Gohr        // dst now carries src's outgoing reference ...
1252ff7e61cSAndreas Gohr        $value = 'target:fromsrc';
1262ff7e61cSAndreas Gohr        $this->assertEquals(['dst'], $search->lookupKey('relation_references', $value));
1272ff7e61cSAndreas Gohr        // ... and the destination's previous reference is gone
1282ff7e61cSAndreas Gohr        $value = 'target:fromdst';
1292ff7e61cSAndreas Gohr        $this->assertEquals([], $search->lookupKey('relation_references', $value));
1302ff7e61cSAndreas Gohr
1312ff7e61cSAndreas Gohr        // exactly one entity named 'dst', the old name and any phantom entry are gone
1322ff7e61cSAndreas Gohr        $allPages = $indexer->getAllPages();
1332ff7e61cSAndreas Gohr        $this->assertSame(['dst'], array_values(array_filter($allPages, fn($p) => $p === 'dst' || $p === 'src')));
1342ff7e61cSAndreas Gohr    }
1352ff7e61cSAndreas Gohr
1362ff7e61cSAndreas Gohr    /**
137ede46466SAndreas Gohr     * Test that clear removes all index files
138ede46466SAndreas Gohr     */
139ede46466SAndreas Gohr    public function testClear()
140ede46466SAndreas Gohr    {
141ede46466SAndreas Gohr        global $conf;
142ede46466SAndreas Gohr        $indexer = new Indexer();
143ede46466SAndreas Gohr
144ede46466SAndreas Gohr        saveWikiText('clearpage', 'Some words to index.', 'Test initialization');
145ede46466SAndreas Gohr        $indexer->addPage('clearpage');
146ede46466SAndreas Gohr
147ede46466SAndreas Gohr        $this->assertFileExists($conf['indexdir'] . '/page.idx');
148ede46466SAndreas Gohr
149ede46466SAndreas Gohr        $indexer->clear();
150ede46466SAndreas Gohr
151ede46466SAndreas Gohr        $this->assertFileDoesNotExist($conf['indexdir'] . '/page.idx');
152ede46466SAndreas Gohr    }
153ede46466SAndreas Gohr
154ede46466SAndreas Gohr    /**
155ede46466SAndreas Gohr     * Test that getVersion returns a version string
156ede46466SAndreas Gohr     */
157ede46466SAndreas Gohr    public function testGetVersion()
158ede46466SAndreas Gohr    {
159ede46466SAndreas Gohr        $indexer = new Indexer();
160*2cda0166SAndreas Gohr        // with no version-modifying plugins active the raw INDEXER_VERSION is returned
161*2cda0166SAndreas Gohr        $this->assertSame(\dokuwiki\Search\INDEXER_VERSION, $indexer->getVersion());
162ede46466SAndreas Gohr    }
163ede46466SAndreas Gohr
164ede46466SAndreas Gohr    /**
165ede46466SAndreas Gohr     * Test needsIndexing returns true for new pages
166ede46466SAndreas Gohr     */
167ede46466SAndreas Gohr    public function testNeedsIndexing()
168ede46466SAndreas Gohr    {
169ede46466SAndreas Gohr        $indexer = new Indexer();
170ede46466SAndreas Gohr
171ede46466SAndreas Gohr        saveWikiText('needsidx', 'Some content.', 'Test initialization');
17279dae64dSAndreas Gohr        // a brand-new page has no .indexed tag yet, so it always needs indexing
173ede46466SAndreas Gohr        $this->assertTrue($indexer->needsIndexing('needsidx'));
174ede46466SAndreas Gohr
17579dae64dSAndreas Gohr        // once indexed it is up to date, even when saved and indexed in the same second
176ede46466SAndreas Gohr        $indexer->addPage('needsidx');
177ede46466SAndreas Gohr        $this->assertFalse($indexer->needsIndexing('needsidx'));
178ede46466SAndreas Gohr        $this->assertTrue($indexer->needsIndexing('needsidx', true)); // force
179ede46466SAndreas Gohr    }
180ede46466SAndreas Gohr
181ede46466SAndreas Gohr    /**
182*2cda0166SAndreas Gohr     * addPage returns true when it indexed the page and false when there was nothing to do
183*2cda0166SAndreas Gohr     */
184*2cda0166SAndreas Gohr    public function testAddPageReturn()
185*2cda0166SAndreas Gohr    {
186*2cda0166SAndreas Gohr        $indexer = new Indexer();
187*2cda0166SAndreas Gohr
188*2cda0166SAndreas Gohr        saveWikiText('retadd', 'Some content to index.', 'Test initialization');
189*2cda0166SAndreas Gohr        $this->assertTrue($indexer->addPage('retadd'), 'addPage should report work done');
190*2cda0166SAndreas Gohr
191*2cda0166SAndreas Gohr        // already up to date: nothing to do
192*2cda0166SAndreas Gohr        $this->assertFalse($indexer->addPage('retadd'), 'addPage should report nothing to do when up to date');
193*2cda0166SAndreas Gohr
194*2cda0166SAndreas Gohr        // forcing reindexing always reports work done
195*2cda0166SAndreas Gohr        $this->assertTrue($indexer->addPage('retadd', true), 'forced addPage should report work done');
196*2cda0166SAndreas Gohr    }
197*2cda0166SAndreas Gohr
198*2cda0166SAndreas Gohr    /**
199*2cda0166SAndreas Gohr     * deletePage returns true when it removed the page and false when there was nothing to do
200*2cda0166SAndreas Gohr     */
201*2cda0166SAndreas Gohr    public function testDeletePageReturn()
202*2cda0166SAndreas Gohr    {
203*2cda0166SAndreas Gohr        $indexer = new Indexer();
204*2cda0166SAndreas Gohr
205*2cda0166SAndreas Gohr        // never indexed and not forced: nothing to do
206*2cda0166SAndreas Gohr        $this->assertFalse($indexer->deletePage('retdel'), 'deletePage should report nothing to do for an unknown page');
207*2cda0166SAndreas Gohr
208*2cda0166SAndreas Gohr        saveWikiText('retdel', 'Delete me content.', 'Test initialization');
209*2cda0166SAndreas Gohr        $indexer->addPage('retdel');
210*2cda0166SAndreas Gohr        $this->assertTrue($indexer->deletePage('retdel'), 'deletePage should report work done');
211*2cda0166SAndreas Gohr
212*2cda0166SAndreas Gohr        // the delete removed the .indexed tag, so a second unforced call has nothing to do
213*2cda0166SAndreas Gohr        $this->assertFalse($indexer->deletePage('retdel'), 'deletePage should report nothing to do once removed');
214*2cda0166SAndreas Gohr    }
215*2cda0166SAndreas Gohr
216*2cda0166SAndreas Gohr    /**
217*2cda0166SAndreas Gohr     * renamePage returns true when it renamed the page and false for the no-op cases
218*2cda0166SAndreas Gohr     */
219*2cda0166SAndreas Gohr    public function testRenamePageReturn()
220*2cda0166SAndreas Gohr    {
221*2cda0166SAndreas Gohr        $indexer = new Indexer();
222*2cda0166SAndreas Gohr
223*2cda0166SAndreas Gohr        // identical names: nothing to do
224*2cda0166SAndreas Gohr        $this->assertFalse($indexer->renamePage('retrename', 'retrename'), 'renamePage should report nothing to do for identical names');
225*2cda0166SAndreas Gohr
226*2cda0166SAndreas Gohr        // old page not in the index: nothing to do
227*2cda0166SAndreas Gohr        $this->assertFalse($indexer->renamePage('retrename', 'retrenamed'), 'renamePage should report nothing to do for an unindexed page');
228*2cda0166SAndreas Gohr
229*2cda0166SAndreas Gohr        saveWikiText('retrename', 'Rename me content.', 'Test initialization');
230*2cda0166SAndreas Gohr        $indexer->addPage('retrename');
231*2cda0166SAndreas Gohr        $this->assertTrue($indexer->renamePage('retrename', 'retrenamed'), 'renamePage should report work done');
232*2cda0166SAndreas Gohr    }
233*2cda0166SAndreas Gohr
234*2cda0166SAndreas Gohr    /**
235ede46466SAndreas Gohr     * Test the logger callback
236ede46466SAndreas Gohr     */
237ede46466SAndreas Gohr    public function testLogger()
238ede46466SAndreas Gohr    {
239ede46466SAndreas Gohr        $messages = [];
240ede46466SAndreas Gohr        $indexer = (new Indexer())->setLogger(function ($msg) use (&$messages) {
241ede46466SAndreas Gohr            $messages[] = $msg;
242ede46466SAndreas Gohr        });
243ede46466SAndreas Gohr
244ede46466SAndreas Gohr        saveWikiText('logpage', 'Log test content.', 'Test initialization');
245ede46466SAndreas Gohr        $indexer->addPage('logpage');
246ede46466SAndreas Gohr
24779dae64dSAndreas Gohr        // second call detects the page is already up to date
248ede46466SAndreas Gohr        $indexer->addPage('logpage');
249ede46466SAndreas Gohr        $this->assertNotEmpty($messages);
250ede46466SAndreas Gohr        $this->assertStringContainsString('up to date', end($messages));
251ede46466SAndreas Gohr    }
252ede46466SAndreas Gohr}
253