xref: /dokuwiki/inc/Search/Indexer.php (revision 2cda016644e923dbda996c52bedee2113ba6d653)
16225b270SMichael Große<?php
26225b270SMichael Große
36225b270SMichael Großenamespace dokuwiki\Search;
46225b270SMichael Große
5e1272c08SAndreas Gohruse dokuwiki\Debug\DebugHelper;
66225b270SMichael Großeuse dokuwiki\Extension\Event;
783b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageFulltextCollection;
883b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageMetaCollection;
983b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageTitleCollection;
1015f699acSAndreas Gohruse dokuwiki\Search\Exception\IndexAccessException;
1121fbd01bSAndreas Gohruse dokuwiki\Search\Exception\IndexIntegrityException;
12a16bd548SSatoshi Saharause dokuwiki\Search\Exception\IndexLockException;
13a16bd548SSatoshi Saharause dokuwiki\Search\Exception\IndexWriteException;
1483b3acccSAndreas Gohruse dokuwiki\Search\Index\FileIndex;
1583b3acccSAndreas Gohruse dokuwiki\Search\Index\Lock;
16e1272c08SAndreas Gohruse dokuwiki\Search\Index\MemoryIndex;
174027a91aSSatoshi Sahara
184027a91aSSatoshi Sahara// Version tag used to force rebuild on upgrade
195d034a75SAndreas Gohrconst INDEXER_VERSION = 9;
206225b270SMichael Große
216225b270SMichael Große/**
22a32da6ddSSatoshi Sahara * Class DokuWiki Indexer
236225b270SMichael Große *
2483b3acccSAndreas Gohr * Manages the page search index by delegating to Collection classes.
2583b3acccSAndreas Gohr *
264027a91aSSatoshi Sahara * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
276225b270SMichael Große * @author     Andreas Gohr <andi@splitbrain.org>
284027a91aSSatoshi Sahara * @author Tom N Harris <tnharris@whoopdedo.org>
296225b270SMichael Große */
3083b3acccSAndreas Gohrclass Indexer
314027a91aSSatoshi Sahara{
3283b3acccSAndreas Gohr    /** @var callable|null Logging callback, receives a string message */
3383b3acccSAndreas Gohr    protected $logger;
346225b270SMichael Große
354027a91aSSatoshi Sahara    /**
3683b3acccSAndreas Gohr     * Set a logging callback
374027a91aSSatoshi Sahara     *
3883b3acccSAndreas Gohr     * The callback receives a single string message. Use this to integrate
3983b3acccSAndreas Gohr     * with different output mechanisms (TaskRunner echo, CLI output, Logger, etc.)
4083b3acccSAndreas Gohr     *
4183b3acccSAndreas Gohr     * @param callable $logger
4283b3acccSAndreas Gohr     * @return static
434027a91aSSatoshi Sahara     */
4483b3acccSAndreas Gohr    public function setLogger(callable $logger): static
454027a91aSSatoshi Sahara    {
4683b3acccSAndreas Gohr        $this->logger = $logger;
4783b3acccSAndreas Gohr        return $this;
486225b270SMichael Große    }
496225b270SMichael Große
506225b270SMichael Große    /**
5183b3acccSAndreas Gohr     * Send a message to the registered logger
526225b270SMichael Große     *
5383b3acccSAndreas Gohr     * @param string $message
546225b270SMichael Große     */
5583b3acccSAndreas Gohr    protected function log(string $message): void
564027a91aSSatoshi Sahara    {
5783b3acccSAndreas Gohr        if ($this->logger)($this->logger)($message);
586225b270SMichael Große    }
596225b270SMichael Große
606225b270SMichael Große    /**
614027a91aSSatoshi Sahara     * Version of the indexer taking into consideration the external tokenizer.
624027a91aSSatoshi Sahara     * The indexer is only compatible with data written by the same version.
636225b270SMichael Große     *
644027a91aSSatoshi Sahara     * @triggers INDEXER_VERSION_GET
654027a91aSSatoshi Sahara     * Plugins that modify what gets indexed should hook this event and
664027a91aSSatoshi Sahara     * add their version info to the event data like so:
674027a91aSSatoshi Sahara     *     $data[$plugin_name] = $plugin_version;
686225b270SMichael Große     *
694027a91aSSatoshi Sahara     * @return int|string
706225b270SMichael Große     */
719369b4a9SAndreas Gohr    public function getVersion(): int|string
724027a91aSSatoshi Sahara    {
734027a91aSSatoshi Sahara        static $indexer_version = null;
744027a91aSSatoshi Sahara        if ($indexer_version == null) {
754027a91aSSatoshi Sahara            $version = INDEXER_VERSION;
764027a91aSSatoshi Sahara
7783b3acccSAndreas Gohr            $data = ['dokuwiki' => $version];
784027a91aSSatoshi Sahara            Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false);
794027a91aSSatoshi Sahara            unset($data['dokuwiki']); // this needs to be first
804027a91aSSatoshi Sahara            ksort($data);
814027a91aSSatoshi Sahara            foreach ($data as $plugin => $vers) {
824027a91aSSatoshi Sahara                $version .= '+' . $plugin . '=' . $vers;
834027a91aSSatoshi Sahara            }
844027a91aSSatoshi Sahara            $indexer_version = $version;
854027a91aSSatoshi Sahara        }
864027a91aSSatoshi Sahara        return $indexer_version;
876225b270SMichael Große    }
886225b270SMichael Große
894027a91aSSatoshi Sahara    /**
9083b3acccSAndreas Gohr     * Return a list of all indexed pages
9183b3acccSAndreas Gohr     *
9283b3acccSAndreas Gohr     * @param bool $existsFilter only return pages that exist on disk
9383b3acccSAndreas Gohr     * @return string[] list of page names (keys are the RIDs in the page index)
9483b3acccSAndreas Gohr     */
9583b3acccSAndreas Gohr    public function getAllPages(bool $existsFilter = false): array
9683b3acccSAndreas Gohr    {
979369b4a9SAndreas Gohr        $pageIndex = new MemoryIndex('page');
9883b3acccSAndreas Gohr        return array_filter(
9983b3acccSAndreas Gohr            iterator_to_array($pageIndex),
10083b3acccSAndreas Gohr            static fn($v) => $v !== '' && (!$existsFilter || page_exists($v, '', false))
10183b3acccSAndreas Gohr        );
10283b3acccSAndreas Gohr    }
10383b3acccSAndreas Gohr
10483b3acccSAndreas Gohr    /**
10583b3acccSAndreas Gohr     * Check if a page needs (re-)indexing
10683b3acccSAndreas Gohr     *
10783b3acccSAndreas Gohr     * @param string $page
10883b3acccSAndreas Gohr     * @param bool $force
10983b3acccSAndreas Gohr     * @return bool true if indexing is needed
11083b3acccSAndreas Gohr     */
11183b3acccSAndreas Gohr    public function needsIndexing(string $page, bool $force = false): bool
11283b3acccSAndreas Gohr    {
11383b3acccSAndreas Gohr        $idxtag = metaFN($page, '.indexed');
11483b3acccSAndreas Gohr        if ($force || !file_exists($idxtag)) return true;
11583b3acccSAndreas Gohr
11683b3acccSAndreas Gohr        if (trim(io_readFile($idxtag)) != $this->getVersion()) return true;
11783b3acccSAndreas Gohr
11879dae64dSAndreas Gohr        // the index tag is written when the page is indexed; the page only needs
11979dae64dSAndreas Gohr        // (re-)indexing if it was changed *after* that - an equal mtime means it was
12079dae64dSAndreas Gohr        // saved and indexed within the same second and is therefore up to date
12183b3acccSAndreas Gohr        $last = @filemtime($idxtag);
12279dae64dSAndreas Gohr        return $last < @filemtime(wikiFN($page));
12383b3acccSAndreas Gohr    }
12483b3acccSAndreas Gohr
12583b3acccSAndreas Gohr    /**
12683b3acccSAndreas Gohr     * Add/update the search index for a page
1274027a91aSSatoshi Sahara     *
1284027a91aSSatoshi Sahara     * Locking is handled internally.
1294027a91aSSatoshi Sahara     *
13083b3acccSAndreas Gohr     * @param string $page The page to index
1314027a91aSSatoshi Sahara     * @param bool $force force reindexing even when the index is up to date
1324027a91aSSatoshi Sahara     *
133*2cda0166SAndreas Gohr     * @return bool true if the page was indexed, false if there was nothing to do
134a32da6ddSSatoshi Sahara     * @throws IndexAccessException
135a16bd548SSatoshi Sahara     * @throws IndexLockException
136a16bd548SSatoshi Sahara     * @throws IndexWriteException
1374027a91aSSatoshi Sahara     */
138*2cda0166SAndreas Gohr    public function addPage(string $page, bool $force = false): bool
1394027a91aSSatoshi Sahara    {
14083b3acccSAndreas Gohr        if (!$this->needsIndexing($page, $force)) {
1419369b4a9SAndreas Gohr            $this->log("Indexer: index for $page up to date");
142*2cda0166SAndreas Gohr            return false;
143a32da6ddSSatoshi Sahara        }
144a32da6ddSSatoshi Sahara
14583b3acccSAndreas Gohr        // create shared writable page index early so we can resolve the PID for plugins
14683b3acccSAndreas Gohr        $pageIndex = new FileIndex('page', '', true);
1476225b270SMichael Große
14883b3acccSAndreas Gohr        // prepare event data
14983b3acccSAndreas Gohr        $data = [
15083b3acccSAndreas Gohr            'page' => $page,
15183b3acccSAndreas Gohr            'body' => '',
15283b3acccSAndreas Gohr            'metadata' => [
15383b3acccSAndreas Gohr                'title' => p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED),
15483b3acccSAndreas Gohr                'relation_references' => array_keys(
15583b3acccSAndreas Gohr                    p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED) ?? []
15683b3acccSAndreas Gohr                ),
15783b3acccSAndreas Gohr                'relation_media' => array_keys(
15883b3acccSAndreas Gohr                    p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED) ?? []
15983b3acccSAndreas Gohr                ),
16083b3acccSAndreas Gohr                'internal_index' => p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED) !== false,
16183b3acccSAndreas Gohr            ],
16283b3acccSAndreas Gohr            'pid' => $pageIndex->accessCachedValue($page),
16383b3acccSAndreas Gohr        ];
1646225b270SMichael Große
16583b3acccSAndreas Gohr        // let plugins modify the data
1664027a91aSSatoshi Sahara        $event = new Event('INDEXER_PAGE_ADD', $data);
16783b3acccSAndreas Gohr        if ($event->advise_before()) {
16883b3acccSAndreas Gohr            $data['body'] = $data['body'] . ' ' . rawWiki($data['page']);
16983b3acccSAndreas Gohr        }
1704027a91aSSatoshi Sahara        $event->advise_after();
1714027a91aSSatoshi Sahara        unset($event);
1726225b270SMichael Große
17383b3acccSAndreas Gohr        // index title
17483b3acccSAndreas Gohr        (new PageTitleCollection($pageIndex))->lock()
17583b3acccSAndreas Gohr            ->addEntity($data['page'], [$data['metadata']['title']])->unlock();
17683b3acccSAndreas Gohr        unset($data['metadata']['title']);
1776225b270SMichael Große
17883b3acccSAndreas Gohr        // index fulltext
17983b3acccSAndreas Gohr        if ($data['metadata']['internal_index']) {
18083b3acccSAndreas Gohr            $words = Tokenizer::getWords($data['body']);
18183b3acccSAndreas Gohr            (new PageFulltextCollection($pageIndex))->lock()->addEntity($data['page'], $words)->unlock();
1826225b270SMichael Große        } else {
18383b3acccSAndreas Gohr            $this->log("Indexer: full text indexing disabled for {$data['page']}");
18483b3acccSAndreas Gohr            // clear any previously stored fulltext data
18583b3acccSAndreas Gohr            (new PageFulltextCollection($pageIndex))->lock()->addEntity($data['page'], [])->unlock();
1866225b270SMichael Große        }
18783b3acccSAndreas Gohr        unset($data['metadata']['internal_index']);
18883b3acccSAndreas Gohr
18983b3acccSAndreas Gohr        // index metadata keys
19083b3acccSAndreas Gohr        foreach ($data['metadata'] as $key => $values) {
19183b3acccSAndreas Gohr            if (!is_array($values)) {
19283b3acccSAndreas Gohr                $values = ($values !== null && $values !== '') ? [$values] : [];
1936225b270SMichael Große            }
19483b3acccSAndreas Gohr            (new PageMetaCollection($key, $pageIndex))->lock()->addEntity($data['page'], $values)->unlock();
19583b3acccSAndreas Gohr        }
19683b3acccSAndreas Gohr
19783b3acccSAndreas Gohr        // update metadata registry
19883b3acccSAndreas Gohr        $this->updateMetadataRegistry(array_keys($data['metadata']));
1996225b270SMichael Große
2004027a91aSSatoshi Sahara        // update index tag file
20183b3acccSAndreas Gohr        io_saveFile(metaFN($data['page'], '.indexed'), $this->getVersion());
20283b3acccSAndreas Gohr        $this->log("Indexer: finished indexing {$data['page']}");
203*2cda0166SAndreas Gohr        return true;
2046225b270SMichael Große    }
2056225b270SMichael Große
2066225b270SMichael Große    /**
2075f9bd525SSatoshi Sahara     * Remove a page from the index
2086225b270SMichael Große     *
20983b3acccSAndreas Gohr     * Clears the page's data from all collections. The entity persists in page.idx.
2106225b270SMichael Große     *
21183b3acccSAndreas Gohr     * @param string $page The page to remove
21283b3acccSAndreas Gohr     * @param bool $force force deletion even when no .indexed tag exists
2136225b270SMichael Große     *
214*2cda0166SAndreas Gohr     * @return bool true if the page was removed, false if there was nothing to do
215a32da6ddSSatoshi Sahara     * @throws IndexAccessException
216a16bd548SSatoshi Sahara     * @throws IndexLockException
217a16bd548SSatoshi Sahara     * @throws IndexWriteException
2186225b270SMichael Große     */
219*2cda0166SAndreas Gohr    public function deletePage(string $page, bool $force = false): bool
2204027a91aSSatoshi Sahara    {
2214027a91aSSatoshi Sahara        $idxtag = metaFN($page, '.indexed');
2224027a91aSSatoshi Sahara        if (!$force && !file_exists($idxtag)) {
2239369b4a9SAndreas Gohr            $this->log("Indexer: $page.indexed file does not exist, ignoring");
224*2cda0166SAndreas Gohr            return false;
2254027a91aSSatoshi Sahara        }
2266225b270SMichael Große
22783b3acccSAndreas Gohr        $pageIndex = new FileIndex('page', '', true);
228725e8e5fSSatoshi Sahara
22983b3acccSAndreas Gohr        (new PageTitleCollection($pageIndex))->lock()->addEntity($page, [])->unlock();
23083b3acccSAndreas Gohr        (new PageFulltextCollection($pageIndex))->lock()->addEntity($page, [])->unlock();
23183b3acccSAndreas Gohr
23283b3acccSAndreas Gohr        foreach ($this->getMetadataRegistryKeys() as $key) {
23383b3acccSAndreas Gohr            (new PageMetaCollection($key, $pageIndex))->lock()->addEntity($page, [])->unlock();
2344027a91aSSatoshi Sahara        }
2356225b270SMichael Große
2369369b4a9SAndreas Gohr        $this->log("Indexer: deleted $page from index");
2374027a91aSSatoshi Sahara        @unlink($idxtag);
238*2cda0166SAndreas Gohr        return true;
2394027a91aSSatoshi Sahara    }
2404027a91aSSatoshi Sahara
2414027a91aSSatoshi Sahara    /**
24283b3acccSAndreas Gohr     * Rename a page in the search index
24383b3acccSAndreas Gohr     *
2442ff7e61cSAndreas Gohr     * This renames the page's entity entry in place: its entity ID (the row in the
2452ff7e61cSAndreas Gohr     * page index) is kept and only its name is changed. Because every collection
2462ff7e61cSAndreas Gohr     * (title, fulltext and all metadata keys such as relation_references) is keyed by
2472ff7e61cSAndreas Gohr     * that entity ID, all token, frequency and reverse associations are preserved and
2482ff7e61cSAndreas Gohr     * transparently belong to the new name afterwards.
2492ff7e61cSAndreas Gohr     *
2502ff7e61cSAndreas Gohr     * In particular this keeps the renamed page's *outgoing* references intact. That is
2512ff7e61cSAndreas Gohr     * essential during multi-step operations such as namespace moves: a page renamed
2522ff7e61cSAndreas Gohr     * early on must still be discoverable as a backlink source for pages that are moved
2532ff7e61cSAndreas Gohr     * later. Re-indexing from disk instead would lose this, because the destination page
2542ff7e61cSAndreas Gohr     * has usually not been written to disk yet when this method is called.
2554027a91aSSatoshi Sahara     *
2564027a91aSSatoshi Sahara     * @param string $oldpage The old page name
2574027a91aSSatoshi Sahara     * @param string $newpage The new page name
25883b3acccSAndreas Gohr     *
259*2cda0166SAndreas Gohr     * @return bool true if the page was renamed, false if there was nothing to do
26083b3acccSAndreas Gohr     * @throws IndexAccessException
261a16bd548SSatoshi Sahara     * @throws IndexLockException
262a16bd548SSatoshi Sahara     * @throws IndexWriteException
2634027a91aSSatoshi Sahara     */
264*2cda0166SAndreas Gohr    public function renamePage(string $oldpage, string $newpage): bool
2654027a91aSSatoshi Sahara    {
266*2cda0166SAndreas Gohr        if ($oldpage === $newpage) return false;
2672ff7e61cSAndreas Gohr
2682ff7e61cSAndreas Gohr        $pageIndex = new FileIndex('page', '', true);
2692ff7e61cSAndreas Gohr
2702ff7e61cSAndreas Gohr        // locate the existing entity rows; stop as soon as both are known
2712ff7e61cSAndreas Gohr        $oldId = null;
2722ff7e61cSAndreas Gohr        $newId = null;
2732ff7e61cSAndreas Gohr        foreach ($pageIndex as $rid => $value) {
2742ff7e61cSAndreas Gohr            if ($value === $oldpage) $oldId = $rid;
2752ff7e61cSAndreas Gohr            if ($value === $newpage) $newId = $rid;
2762ff7e61cSAndreas Gohr            if ($oldId !== null && $newId !== null) break;
2772ff7e61cSAndreas Gohr        }
2782ff7e61cSAndreas Gohr
2792ff7e61cSAndreas Gohr        // nothing to rename if the old page was never indexed
2802ff7e61cSAndreas Gohr        if ($oldId === null) {
2812ff7e61cSAndreas Gohr            $pageIndex->unlock();
2822ff7e61cSAndreas Gohr            $this->log("Indexer: $oldpage is not in the index, nothing to rename");
283*2cda0166SAndreas Gohr            return false;
2842ff7e61cSAndreas Gohr        }
2852ff7e61cSAndreas Gohr
2862ff7e61cSAndreas Gohr        // If the new name already has its own entity, drop its indexed data first.
2872ff7e61cSAndreas Gohr        // deletePage() intentionally keeps the entity row in page.idx, so we additionally
2882ff7e61cSAndreas Gohr        // blank that row - an empty entry is the index's "removed" marker (see getAllPages()).
2892ff7e61cSAndreas Gohr        // Otherwise two rows would carry the new name and a lookup could resolve to the
2902ff7e61cSAndreas Gohr        // now-empty one instead of the renamed entity that holds the data.
2912ff7e61cSAndreas Gohr        if ($newId !== null) {
2922ff7e61cSAndreas Gohr            $this->deletePage($newpage, true);
2932ff7e61cSAndreas Gohr            $pageIndex->changeRow($newId, '');
2942ff7e61cSAndreas Gohr        }
2952ff7e61cSAndreas Gohr
2962ff7e61cSAndreas Gohr        // rename in place — keeps the entity ID and thus all index associations
2972ff7e61cSAndreas Gohr        $pageIndex->changeRow($oldId, $newpage);
2982ff7e61cSAndreas Gohr
2992ff7e61cSAndreas Gohr        $pageIndex->unlock();
3002ff7e61cSAndreas Gohr        $this->log("Indexer: renamed $oldpage to $newpage in index");
301*2cda0166SAndreas Gohr        return true;
3026225b270SMichael Große    }
3036225b270SMichael Große
3046225b270SMichael Große    /**
30583b3acccSAndreas Gohr     * Clear all page indexes
3066225b270SMichael Große     */
30783b3acccSAndreas Gohr    public function clear(): void
3084027a91aSSatoshi Sahara    {
3096225b270SMichael Große        global $conf;
3106225b270SMichael Große
31183b3acccSAndreas Gohr        Lock::acquire('page');
3124027a91aSSatoshi Sahara
31383b3acccSAndreas Gohr        // clear metadata indexes
31483b3acccSAndreas Gohr        foreach ($this->getMetadataRegistryKeys() as $key) {
31583b3acccSAndreas Gohr            $clean = PageMetaCollection::cleanName($key);
31683b3acccSAndreas Gohr            @unlink($conf['indexdir'] . '/' . $clean . '_w.idx');
31783b3acccSAndreas Gohr            @unlink($conf['indexdir'] . '/' . $clean . '_i.idx');
31883b3acccSAndreas Gohr            @unlink($conf['indexdir'] . '/' . $clean . '_p.idx');
3196225b270SMichael Große        }
3206225b270SMichael Große
32183b3acccSAndreas Gohr        // clear fulltext indexes
32283b3acccSAndreas Gohr        $files = glob($conf['indexdir'] . '/i*.idx');
32383b3acccSAndreas Gohr        if ($files) foreach ($files as $f) @unlink($f);
32483b3acccSAndreas Gohr        $files = glob($conf['indexdir'] . '/w*.idx');
32583b3acccSAndreas Gohr        if ($files) foreach ($files as $f) @unlink($f);
32683b3acccSAndreas Gohr
32783b3acccSAndreas Gohr        @unlink($conf['indexdir'] . '/pageword.idx');
32883b3acccSAndreas Gohr        @unlink($conf['indexdir'] . '/lengths.idx');
32983b3acccSAndreas Gohr
33083b3acccSAndreas Gohr        // clear title and page indexes
33183b3acccSAndreas Gohr        @unlink($conf['indexdir'] . '/title.idx');
33283b3acccSAndreas Gohr        @unlink($conf['indexdir'] . '/page.idx');
33383b3acccSAndreas Gohr        @unlink($conf['indexdir'] . '/metadata.idx');
33483b3acccSAndreas Gohr
33583b3acccSAndreas Gohr        Lock::release('page');
33683b3acccSAndreas Gohr    }
33783b3acccSAndreas Gohr
33883b3acccSAndreas Gohr    /**
33921fbd01bSAndreas Gohr     * Check the structural integrity of all search indexes
34021fbd01bSAndreas Gohr     *
34121fbd01bSAndreas Gohr     * @throws IndexIntegrityException when a structural inconsistency is found
34221fbd01bSAndreas Gohr     */
34321fbd01bSAndreas Gohr    public function checkIntegrity(): void
34421fbd01bSAndreas Gohr    {
34521fbd01bSAndreas Gohr        (new PageFulltextCollection())->checkIntegrity();
34621fbd01bSAndreas Gohr        (new PageTitleCollection())->checkIntegrity();
34721fbd01bSAndreas Gohr
34821fbd01bSAndreas Gohr        foreach ($this->getMetadataRegistryKeys() as $key) {
34921fbd01bSAndreas Gohr            (new PageMetaCollection($key))->checkIntegrity();
35021fbd01bSAndreas Gohr        }
35121fbd01bSAndreas Gohr    }
35221fbd01bSAndreas Gohr
35321fbd01bSAndreas Gohr    /**
35421fbd01bSAndreas Gohr     * Whether the search index is empty (no fulltext data indexed yet)
35521fbd01bSAndreas Gohr     *
35621fbd01bSAndreas Gohr     * @return bool
35721fbd01bSAndreas Gohr     */
35821fbd01bSAndreas Gohr    public function isIndexEmpty(): bool
35921fbd01bSAndreas Gohr    {
36021fbd01bSAndreas Gohr        return (new PageFulltextCollection())->getTokenIndexMaximum() === 0;
36121fbd01bSAndreas Gohr    }
36221fbd01bSAndreas Gohr
36321fbd01bSAndreas Gohr    /**
36483b3acccSAndreas Gohr     * Get the list of known metadata keys from the metadata registry
36583b3acccSAndreas Gohr     *
36683b3acccSAndreas Gohr     * @return string[] list of metadata key names
36783b3acccSAndreas Gohr     */
36883b3acccSAndreas Gohr    protected function getMetadataRegistryKeys(): array
36983b3acccSAndreas Gohr    {
37083b3acccSAndreas Gohr        global $conf;
37183b3acccSAndreas Gohr        $fn = $conf['indexdir'] . '/metadata.idx';
37283b3acccSAndreas Gohr        if (!file_exists($fn)) return [];
37383b3acccSAndreas Gohr        $keys = file($fn, FILE_IGNORE_NEW_LINES);
37483b3acccSAndreas Gohr        return $keys ?: [];
37583b3acccSAndreas Gohr    }
37683b3acccSAndreas Gohr
37783b3acccSAndreas Gohr    /**
37883b3acccSAndreas Gohr     * Update the metadata registry with new keys
37983b3acccSAndreas Gohr     *
38083b3acccSAndreas Gohr     * @param string[] $keys metadata key names to ensure are registered
3816e39b4e3SAndreas Gohr     *
3826e39b4e3SAndreas Gohr     * @internal Only marked public for access via LegacyIndexer
38383b3acccSAndreas Gohr     */
3846e39b4e3SAndreas Gohr    public function updateMetadataRegistry(array $keys): void
38583b3acccSAndreas Gohr    {
38683b3acccSAndreas Gohr        global $conf;
38783b3acccSAndreas Gohr        $fn = $conf['indexdir'] . '/metadata.idx';
38883b3acccSAndreas Gohr        $existing = file_exists($fn) ? file($fn, FILE_IGNORE_NEW_LINES) : [];
38983b3acccSAndreas Gohr        if (!$existing) $existing = [];
39083b3acccSAndreas Gohr
39183b3acccSAndreas Gohr        $added = false;
39283b3acccSAndreas Gohr        foreach ($keys as $key) {
39383b3acccSAndreas Gohr            if (!in_array($key, $existing)) {
39483b3acccSAndreas Gohr                $existing[] = $key;
39583b3acccSAndreas Gohr                $added = true;
39683b3acccSAndreas Gohr            }
39783b3acccSAndreas Gohr        }
39883b3acccSAndreas Gohr
39983b3acccSAndreas Gohr        if ($added) {
40083b3acccSAndreas Gohr            io_saveFile($fn, implode("\n", $existing) . "\n");
40183b3acccSAndreas Gohr        }
40283b3acccSAndreas Gohr    }
403e1272c08SAndreas Gohr
404e1272c08SAndreas Gohr    /**
405e1272c08SAndreas Gohr     * Return a list of all indexed pages, optionally filtered by metadata key
406e1272c08SAndreas Gohr     *
4076e39b4e3SAndreas Gohr     * Kept on Indexer (not just LegacyIndexer) because several plugins call it
4086e39b4e3SAndreas Gohr     * directly on `new Indexer()` instances rather than going through
4096e39b4e3SAndreas Gohr     * idx_get_indexer().
4106e39b4e3SAndreas Gohr     *
411e1272c08SAndreas Gohr     * @param string|null $key metadata key name
412e1272c08SAndreas Gohr     * @return string[]
413e1272c08SAndreas Gohr     *
414e1272c08SAndreas Gohr     * @deprecated 2026-04-07 use MetadataSearch::getPages() or Indexer::getAllPages() instead
415e1272c08SAndreas Gohr     */
416e1272c08SAndreas Gohr    public function getPages($key = null)
417e1272c08SAndreas Gohr    {
418e1272c08SAndreas Gohr        DebugHelper::dbgDeprecatedFunction(MetadataSearch::class . '::getPages()');
419e1272c08SAndreas Gohr        return (new MetadataSearch())->getPages($key);
420e1272c08SAndreas Gohr    }
4216225b270SMichael Große}
422