xref: /dokuwiki/inc/Search/Indexer.php (revision 5d034a75ec636eaf8dd957fac678f8d04e5b23fc)
16225b270SMichael Große<?php
26225b270SMichael Große
36225b270SMichael Großenamespace dokuwiki\Search;
46225b270SMichael Große
5e1272c08SAndreas Gohruse dokuwiki\Debug\DebugHelper;
66225b270SMichael Großeuse dokuwiki\Extension\Event;
7e1272c08SAndreas Gohruse dokuwiki\Search\Collection\CollectionSearch;
883b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageFulltextCollection;
983b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageMetaCollection;
1083b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageTitleCollection;
1115f699acSAndreas Gohruse dokuwiki\Search\Exception\IndexAccessException;
1221fbd01bSAndreas Gohruse dokuwiki\Search\Exception\IndexIntegrityException;
13a16bd548SSatoshi Saharause dokuwiki\Search\Exception\IndexLockException;
14a16bd548SSatoshi Saharause dokuwiki\Search\Exception\IndexWriteException;
15e1272c08SAndreas Gohruse dokuwiki\Search\Exception\SearchException;
1683b3acccSAndreas Gohruse dokuwiki\Search\Index\FileIndex;
1783b3acccSAndreas Gohruse dokuwiki\Search\Index\Lock;
18e1272c08SAndreas Gohruse dokuwiki\Search\Index\MemoryIndex;
19e1272c08SAndreas Gohruse dokuwiki\Search\Index\TupleOps;
204027a91aSSatoshi Sahara
214027a91aSSatoshi Sahara// Version tag used to force rebuild on upgrade
22*5d034a75SAndreas Gohrconst INDEXER_VERSION = 9;
236225b270SMichael Große
246225b270SMichael Große/**
25a32da6ddSSatoshi Sahara * Class DokuWiki Indexer
266225b270SMichael Große *
2783b3acccSAndreas Gohr * Manages the page search index by delegating to Collection classes.
2883b3acccSAndreas Gohr *
294027a91aSSatoshi Sahara * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
306225b270SMichael Große * @author     Andreas Gohr <andi@splitbrain.org>
314027a91aSSatoshi Sahara * @author Tom N Harris <tnharris@whoopdedo.org>
326225b270SMichael Große */
3383b3acccSAndreas Gohrclass Indexer
344027a91aSSatoshi Sahara{
3583b3acccSAndreas Gohr    /** @var callable|null Logging callback, receives a string message */
3683b3acccSAndreas Gohr    protected $logger;
376225b270SMichael Große
384027a91aSSatoshi Sahara    /**
3983b3acccSAndreas Gohr     * Set a logging callback
404027a91aSSatoshi Sahara     *
4183b3acccSAndreas Gohr     * The callback receives a single string message. Use this to integrate
4283b3acccSAndreas Gohr     * with different output mechanisms (TaskRunner echo, CLI output, Logger, etc.)
4383b3acccSAndreas Gohr     *
4483b3acccSAndreas Gohr     * @param callable $logger
4583b3acccSAndreas Gohr     * @return static
464027a91aSSatoshi Sahara     */
4783b3acccSAndreas Gohr    public function setLogger(callable $logger): static
484027a91aSSatoshi Sahara    {
4983b3acccSAndreas Gohr        $this->logger = $logger;
5083b3acccSAndreas Gohr        return $this;
516225b270SMichael Große    }
526225b270SMichael Große
536225b270SMichael Große    /**
5483b3acccSAndreas Gohr     * Send a message to the registered logger
556225b270SMichael Große     *
5683b3acccSAndreas Gohr     * @param string $message
576225b270SMichael Große     */
5883b3acccSAndreas Gohr    protected function log(string $message): void
594027a91aSSatoshi Sahara    {
6083b3acccSAndreas Gohr        if ($this->logger)($this->logger)($message);
616225b270SMichael Große    }
626225b270SMichael Große
636225b270SMichael Große    /**
644027a91aSSatoshi Sahara     * Version of the indexer taking into consideration the external tokenizer.
654027a91aSSatoshi Sahara     * The indexer is only compatible with data written by the same version.
666225b270SMichael Große     *
674027a91aSSatoshi Sahara     * @triggers INDEXER_VERSION_GET
684027a91aSSatoshi Sahara     * Plugins that modify what gets indexed should hook this event and
694027a91aSSatoshi Sahara     * add their version info to the event data like so:
704027a91aSSatoshi Sahara     *     $data[$plugin_name] = $plugin_version;
716225b270SMichael Große     *
724027a91aSSatoshi Sahara     * @return int|string
736225b270SMichael Große     */
749369b4a9SAndreas Gohr    public function getVersion(): int|string
754027a91aSSatoshi Sahara    {
764027a91aSSatoshi Sahara        static $indexer_version = null;
774027a91aSSatoshi Sahara        if ($indexer_version == null) {
784027a91aSSatoshi Sahara            $version = INDEXER_VERSION;
794027a91aSSatoshi Sahara
8083b3acccSAndreas Gohr            $data = ['dokuwiki' => $version];
814027a91aSSatoshi Sahara            Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false);
824027a91aSSatoshi Sahara            unset($data['dokuwiki']); // this needs to be first
834027a91aSSatoshi Sahara            ksort($data);
844027a91aSSatoshi Sahara            foreach ($data as $plugin => $vers) {
854027a91aSSatoshi Sahara                $version .= '+' . $plugin . '=' . $vers;
864027a91aSSatoshi Sahara            }
874027a91aSSatoshi Sahara            $indexer_version = $version;
884027a91aSSatoshi Sahara        }
894027a91aSSatoshi Sahara        return $indexer_version;
906225b270SMichael Große    }
916225b270SMichael Große
924027a91aSSatoshi Sahara    /**
9383b3acccSAndreas Gohr     * Return a list of all indexed pages
9483b3acccSAndreas Gohr     *
9583b3acccSAndreas Gohr     * @param bool $existsFilter only return pages that exist on disk
9683b3acccSAndreas Gohr     * @return string[] list of page names (keys are the RIDs in the page index)
9783b3acccSAndreas Gohr     */
9883b3acccSAndreas Gohr    public function getAllPages(bool $existsFilter = false): array
9983b3acccSAndreas Gohr    {
1009369b4a9SAndreas Gohr        $pageIndex = new MemoryIndex('page');
10183b3acccSAndreas Gohr        return array_filter(
10283b3acccSAndreas Gohr            iterator_to_array($pageIndex),
10383b3acccSAndreas Gohr            static fn($v) => $v !== '' && (!$existsFilter || page_exists($v, '', false))
10483b3acccSAndreas Gohr        );
10583b3acccSAndreas Gohr    }
10683b3acccSAndreas Gohr
10783b3acccSAndreas Gohr    /**
10883b3acccSAndreas Gohr     * Check if a page needs (re-)indexing
10983b3acccSAndreas Gohr     *
11083b3acccSAndreas Gohr     * @param string $page
11183b3acccSAndreas Gohr     * @param bool $force
11283b3acccSAndreas Gohr     * @return bool true if indexing is needed
11383b3acccSAndreas Gohr     */
11483b3acccSAndreas Gohr    public function needsIndexing(string $page, bool $force = false): bool
11583b3acccSAndreas Gohr    {
11683b3acccSAndreas Gohr        $idxtag = metaFN($page, '.indexed');
11783b3acccSAndreas Gohr        if ($force || !file_exists($idxtag)) return true;
11883b3acccSAndreas Gohr
11983b3acccSAndreas Gohr        if (trim(io_readFile($idxtag)) != $this->getVersion()) return true;
12083b3acccSAndreas Gohr
12183b3acccSAndreas Gohr        $last = @filemtime($idxtag);
12283b3acccSAndreas Gohr        return $last <= @filemtime(wikiFN($page));
12383b3acccSAndreas Gohr    }
12483b3acccSAndreas Gohr
12583b3acccSAndreas Gohr    /**
12683b3acccSAndreas Gohr     * Add/update the search index for a page
1274027a91aSSatoshi Sahara     *
1284027a91aSSatoshi Sahara     * Locking is handled internally.
1294027a91aSSatoshi Sahara     *
13083b3acccSAndreas Gohr     * @param string $page The page to index
1314027a91aSSatoshi Sahara     * @param bool $force force reindexing even when the index is up to date
1324027a91aSSatoshi Sahara     *
133a32da6ddSSatoshi Sahara     * @throws IndexAccessException
134a16bd548SSatoshi Sahara     * @throws IndexLockException
135a16bd548SSatoshi Sahara     * @throws IndexWriteException
1364027a91aSSatoshi Sahara     */
13783b3acccSAndreas Gohr    public function addPage(string $page, bool $force = false): void
1384027a91aSSatoshi Sahara    {
13983b3acccSAndreas Gohr        if (!$this->needsIndexing($page, $force)) {
1409369b4a9SAndreas Gohr            $this->log("Indexer: index for $page up to date");
14183b3acccSAndreas Gohr            return;
142a32da6ddSSatoshi Sahara        }
143a32da6ddSSatoshi Sahara
14483b3acccSAndreas Gohr        // create shared writable page index early so we can resolve the PID for plugins
14583b3acccSAndreas Gohr        $pageIndex = new FileIndex('page', '', true);
1466225b270SMichael Große
14783b3acccSAndreas Gohr        // prepare event data
14883b3acccSAndreas Gohr        $data = [
14983b3acccSAndreas Gohr            'page' => $page,
15083b3acccSAndreas Gohr            'body' => '',
15183b3acccSAndreas Gohr            'metadata' => [
15283b3acccSAndreas Gohr                'title' => p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED),
15383b3acccSAndreas Gohr                'relation_references' => array_keys(
15483b3acccSAndreas Gohr                    p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED) ?? []
15583b3acccSAndreas Gohr                ),
15683b3acccSAndreas Gohr                'relation_media' => array_keys(
15783b3acccSAndreas Gohr                    p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED) ?? []
15883b3acccSAndreas Gohr                ),
15983b3acccSAndreas Gohr                'internal_index' => p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED) !== false,
16083b3acccSAndreas Gohr            ],
16183b3acccSAndreas Gohr            'pid' => $pageIndex->accessCachedValue($page),
16283b3acccSAndreas Gohr        ];
1636225b270SMichael Große
16483b3acccSAndreas Gohr        // let plugins modify the data
1654027a91aSSatoshi Sahara        $event = new Event('INDEXER_PAGE_ADD', $data);
16683b3acccSAndreas Gohr        if ($event->advise_before()) {
16783b3acccSAndreas Gohr            $data['body'] = $data['body'] . ' ' . rawWiki($data['page']);
16883b3acccSAndreas Gohr        }
1694027a91aSSatoshi Sahara        $event->advise_after();
1704027a91aSSatoshi Sahara        unset($event);
1716225b270SMichael Große
17283b3acccSAndreas Gohr        // index title
17383b3acccSAndreas Gohr        (new PageTitleCollection($pageIndex))->lock()
17483b3acccSAndreas Gohr            ->addEntity($data['page'], [$data['metadata']['title']])->unlock();
17583b3acccSAndreas Gohr        unset($data['metadata']['title']);
1766225b270SMichael Große
17783b3acccSAndreas Gohr        // index fulltext
17883b3acccSAndreas Gohr        if ($data['metadata']['internal_index']) {
17983b3acccSAndreas Gohr            $words = Tokenizer::getWords($data['body']);
18083b3acccSAndreas Gohr            (new PageFulltextCollection($pageIndex))->lock()->addEntity($data['page'], $words)->unlock();
1816225b270SMichael Große        } else {
18283b3acccSAndreas Gohr            $this->log("Indexer: full text indexing disabled for {$data['page']}");
18383b3acccSAndreas Gohr            // clear any previously stored fulltext data
18483b3acccSAndreas Gohr            (new PageFulltextCollection($pageIndex))->lock()->addEntity($data['page'], [])->unlock();
1856225b270SMichael Große        }
18683b3acccSAndreas Gohr        unset($data['metadata']['internal_index']);
18783b3acccSAndreas Gohr
18883b3acccSAndreas Gohr        // index metadata keys
18983b3acccSAndreas Gohr        foreach ($data['metadata'] as $key => $values) {
19083b3acccSAndreas Gohr            if (!is_array($values)) {
19183b3acccSAndreas Gohr                $values = ($values !== null && $values !== '') ? [$values] : [];
1926225b270SMichael Große            }
19383b3acccSAndreas Gohr            (new PageMetaCollection($key, $pageIndex))->lock()->addEntity($data['page'], $values)->unlock();
19483b3acccSAndreas Gohr        }
19583b3acccSAndreas Gohr
19683b3acccSAndreas Gohr        // update metadata registry
19783b3acccSAndreas Gohr        $this->updateMetadataRegistry(array_keys($data['metadata']));
1986225b270SMichael Große
1994027a91aSSatoshi Sahara        // update index tag file
20083b3acccSAndreas Gohr        io_saveFile(metaFN($data['page'], '.indexed'), $this->getVersion());
20183b3acccSAndreas Gohr        $this->log("Indexer: finished indexing {$data['page']}");
2026225b270SMichael Große    }
2036225b270SMichael Große
2046225b270SMichael Große    /**
2055f9bd525SSatoshi Sahara     * Remove a page from the index
2066225b270SMichael Große     *
20783b3acccSAndreas Gohr     * Clears the page's data from all collections. The entity persists in page.idx.
2086225b270SMichael Große     *
20983b3acccSAndreas Gohr     * @param string $page The page to remove
21083b3acccSAndreas Gohr     * @param bool $force force deletion even when no .indexed tag exists
2116225b270SMichael Große     *
212a32da6ddSSatoshi Sahara     * @throws IndexAccessException
213a16bd548SSatoshi Sahara     * @throws IndexLockException
214a16bd548SSatoshi Sahara     * @throws IndexWriteException
2156225b270SMichael Große     */
21683b3acccSAndreas Gohr    public function deletePage(string $page, bool $force = false): void
2174027a91aSSatoshi Sahara    {
2184027a91aSSatoshi Sahara        $idxtag = metaFN($page, '.indexed');
2194027a91aSSatoshi Sahara        if (!$force && !file_exists($idxtag)) {
2209369b4a9SAndreas Gohr            $this->log("Indexer: $page.indexed file does not exist, ignoring");
22183b3acccSAndreas Gohr            return;
2224027a91aSSatoshi Sahara        }
2236225b270SMichael Große
22483b3acccSAndreas Gohr        $pageIndex = new FileIndex('page', '', true);
225725e8e5fSSatoshi Sahara
22683b3acccSAndreas Gohr        (new PageTitleCollection($pageIndex))->lock()->addEntity($page, [])->unlock();
22783b3acccSAndreas Gohr        (new PageFulltextCollection($pageIndex))->lock()->addEntity($page, [])->unlock();
22883b3acccSAndreas Gohr
22983b3acccSAndreas Gohr        foreach ($this->getMetadataRegistryKeys() as $key) {
23083b3acccSAndreas Gohr            (new PageMetaCollection($key, $pageIndex))->lock()->addEntity($page, [])->unlock();
2314027a91aSSatoshi Sahara        }
2326225b270SMichael Große
2339369b4a9SAndreas Gohr        $this->log("Indexer: deleted $page from index");
2344027a91aSSatoshi Sahara        @unlink($idxtag);
2354027a91aSSatoshi Sahara    }
2364027a91aSSatoshi Sahara
2374027a91aSSatoshi Sahara    /**
23883b3acccSAndreas Gohr     * Rename a page in the search index
23983b3acccSAndreas Gohr     *
24083b3acccSAndreas Gohr     * The page must already have been moved on disk before calling this.
24183b3acccSAndreas Gohr     * Clears the old page's data and re-indexes under the new name.
2424027a91aSSatoshi Sahara     *
2434027a91aSSatoshi Sahara     * @param string $oldpage The old page name
2444027a91aSSatoshi Sahara     * @param string $newpage The new page name
24583b3acccSAndreas Gohr     *
24683b3acccSAndreas Gohr     * @throws IndexAccessException
247a16bd548SSatoshi Sahara     * @throws IndexLockException
248a16bd548SSatoshi Sahara     * @throws IndexWriteException
2494027a91aSSatoshi Sahara     */
25083b3acccSAndreas Gohr    public function renamePage(string $oldpage, string $newpage): void
2514027a91aSSatoshi Sahara    {
25283b3acccSAndreas Gohr        $this->deletePage($oldpage, true);
25383b3acccSAndreas Gohr        $this->addPage($newpage, true);
2546225b270SMichael Große    }
2556225b270SMichael Große
2566225b270SMichael Große    /**
25783b3acccSAndreas Gohr     * Clear all page indexes
2586225b270SMichael Große     */
25983b3acccSAndreas Gohr    public function clear(): void
2604027a91aSSatoshi Sahara    {
2616225b270SMichael Große        global $conf;
2626225b270SMichael Große
26383b3acccSAndreas Gohr        Lock::acquire('page');
2644027a91aSSatoshi Sahara
26583b3acccSAndreas Gohr        // clear metadata indexes
26683b3acccSAndreas Gohr        foreach ($this->getMetadataRegistryKeys() as $key) {
26783b3acccSAndreas Gohr            $clean = PageMetaCollection::cleanName($key);
26883b3acccSAndreas Gohr            @unlink($conf['indexdir'] . '/' . $clean . '_w.idx');
26983b3acccSAndreas Gohr            @unlink($conf['indexdir'] . '/' . $clean . '_i.idx');
27083b3acccSAndreas Gohr            @unlink($conf['indexdir'] . '/' . $clean . '_p.idx');
2716225b270SMichael Große        }
2726225b270SMichael Große
27383b3acccSAndreas Gohr        // clear fulltext indexes
27483b3acccSAndreas Gohr        $files = glob($conf['indexdir'] . '/i*.idx');
27583b3acccSAndreas Gohr        if ($files) foreach ($files as $f) @unlink($f);
27683b3acccSAndreas Gohr        $files = glob($conf['indexdir'] . '/w*.idx');
27783b3acccSAndreas Gohr        if ($files) foreach ($files as $f) @unlink($f);
27883b3acccSAndreas Gohr
27983b3acccSAndreas Gohr        @unlink($conf['indexdir'] . '/pageword.idx');
28083b3acccSAndreas Gohr        @unlink($conf['indexdir'] . '/lengths.idx');
28183b3acccSAndreas Gohr
28283b3acccSAndreas Gohr        // clear title and page indexes
28383b3acccSAndreas Gohr        @unlink($conf['indexdir'] . '/title.idx');
28483b3acccSAndreas Gohr        @unlink($conf['indexdir'] . '/page.idx');
28583b3acccSAndreas Gohr        @unlink($conf['indexdir'] . '/metadata.idx');
28683b3acccSAndreas Gohr
28783b3acccSAndreas Gohr        Lock::release('page');
28883b3acccSAndreas Gohr    }
28983b3acccSAndreas Gohr
29083b3acccSAndreas Gohr    /**
29121fbd01bSAndreas Gohr     * Check the structural integrity of all search indexes
29221fbd01bSAndreas Gohr     *
29321fbd01bSAndreas Gohr     * @throws IndexIntegrityException when a structural inconsistency is found
29421fbd01bSAndreas Gohr     */
29521fbd01bSAndreas Gohr    public function checkIntegrity(): void
29621fbd01bSAndreas Gohr    {
29721fbd01bSAndreas Gohr        (new PageFulltextCollection())->checkIntegrity();
29821fbd01bSAndreas Gohr        (new PageTitleCollection())->checkIntegrity();
29921fbd01bSAndreas Gohr
30021fbd01bSAndreas Gohr        foreach ($this->getMetadataRegistryKeys() as $key) {
30121fbd01bSAndreas Gohr            (new PageMetaCollection($key))->checkIntegrity();
30221fbd01bSAndreas Gohr        }
30321fbd01bSAndreas Gohr    }
30421fbd01bSAndreas Gohr
30521fbd01bSAndreas Gohr    /**
30621fbd01bSAndreas Gohr     * Whether the search index is empty (no fulltext data indexed yet)
30721fbd01bSAndreas Gohr     *
30821fbd01bSAndreas Gohr     * @return bool
30921fbd01bSAndreas Gohr     */
31021fbd01bSAndreas Gohr    public function isIndexEmpty(): bool
31121fbd01bSAndreas Gohr    {
31221fbd01bSAndreas Gohr        return (new PageFulltextCollection())->getTokenIndexMaximum() === 0;
31321fbd01bSAndreas Gohr    }
31421fbd01bSAndreas Gohr
31521fbd01bSAndreas Gohr    /**
31683b3acccSAndreas Gohr     * Get the list of known metadata keys from the metadata registry
31783b3acccSAndreas Gohr     *
31883b3acccSAndreas Gohr     * @return string[] list of metadata key names
31983b3acccSAndreas Gohr     */
32083b3acccSAndreas Gohr    protected function getMetadataRegistryKeys(): array
32183b3acccSAndreas Gohr    {
32283b3acccSAndreas Gohr        global $conf;
32383b3acccSAndreas Gohr        $fn = $conf['indexdir'] . '/metadata.idx';
32483b3acccSAndreas Gohr        if (!file_exists($fn)) return [];
32583b3acccSAndreas Gohr        $keys = file($fn, FILE_IGNORE_NEW_LINES);
32683b3acccSAndreas Gohr        return $keys ?: [];
32783b3acccSAndreas Gohr    }
32883b3acccSAndreas Gohr
32983b3acccSAndreas Gohr    /**
33083b3acccSAndreas Gohr     * Update the metadata registry with new keys
33183b3acccSAndreas Gohr     *
33283b3acccSAndreas Gohr     * @param string[] $keys metadata key names to ensure are registered
33383b3acccSAndreas Gohr     */
33483b3acccSAndreas Gohr    protected function updateMetadataRegistry(array $keys): void
33583b3acccSAndreas Gohr    {
33683b3acccSAndreas Gohr        global $conf;
33783b3acccSAndreas Gohr        $fn = $conf['indexdir'] . '/metadata.idx';
33883b3acccSAndreas Gohr        $existing = file_exists($fn) ? file($fn, FILE_IGNORE_NEW_LINES) : [];
33983b3acccSAndreas Gohr        if (!$existing) $existing = [];
34083b3acccSAndreas Gohr
34183b3acccSAndreas Gohr        $added = false;
34283b3acccSAndreas Gohr        foreach ($keys as $key) {
34383b3acccSAndreas Gohr            if (!in_array($key, $existing)) {
34483b3acccSAndreas Gohr                $existing[] = $key;
34583b3acccSAndreas Gohr                $added = true;
34683b3acccSAndreas Gohr            }
34783b3acccSAndreas Gohr        }
34883b3acccSAndreas Gohr
34983b3acccSAndreas Gohr        if ($added) {
35083b3acccSAndreas Gohr            io_saveFile($fn, implode("\n", $existing) . "\n");
35183b3acccSAndreas Gohr        }
35283b3acccSAndreas Gohr    }
353e1272c08SAndreas Gohr
354e1272c08SAndreas Gohr    // region Deprecated methods
355e1272c08SAndreas Gohr
356e1272c08SAndreas Gohr    /**
357e1272c08SAndreas Gohr     * Find pages containing a metadata value
358e1272c08SAndreas Gohr     *
359e1272c08SAndreas Gohr     * @param string $key metadata key name
360e1272c08SAndreas Gohr     * @param string|string[] $value search term(s)
3611148921dSAndreas Gohr     * @param callable|null $func ignored, kept for backward compatibility
362e1272c08SAndreas Gohr     * @return array
363e1272c08SAndreas Gohr     *
364e1272c08SAndreas Gohr     * @deprecated 2026-04-07 use MetadataSearch::lookupKey() instead
365e1272c08SAndreas Gohr     */
366e1272c08SAndreas Gohr    public function lookupKey($key, &$value, $func = null)
367e1272c08SAndreas Gohr    {
368e1272c08SAndreas Gohr        DebugHelper::dbgDeprecatedFunction(MetadataSearch::class . '::lookupKey()');
3691148921dSAndreas Gohr        return (new MetadataSearch())->lookupKey($key, $value);
370e1272c08SAndreas Gohr    }
371e1272c08SAndreas Gohr
372e1272c08SAndreas Gohr    /**
373e1272c08SAndreas Gohr     * Return a list of all indexed pages, optionally filtered by metadata key
374e1272c08SAndreas Gohr     *
375e1272c08SAndreas Gohr     * @param string|null $key metadata key name
376e1272c08SAndreas Gohr     * @return string[]
377e1272c08SAndreas Gohr     *
378e1272c08SAndreas Gohr     * @deprecated 2026-04-07 use MetadataSearch::getPages() or Indexer::getAllPages() instead
379e1272c08SAndreas Gohr     */
380e1272c08SAndreas Gohr    public function getPages($key = null)
381e1272c08SAndreas Gohr    {
382e1272c08SAndreas Gohr        DebugHelper::dbgDeprecatedFunction(MetadataSearch::class . '::getPages()');
383e1272c08SAndreas Gohr        return (new MetadataSearch())->getPages($key);
384e1272c08SAndreas Gohr    }
385e1272c08SAndreas Gohr
386e1272c08SAndreas Gohr    /**
387e1272c08SAndreas Gohr     * Add metadata values for a page
388e1272c08SAndreas Gohr     *
389e1272c08SAndreas Gohr     * @param string $page page name
390e1272c08SAndreas Gohr     * @param string $key metadata key name
391e1272c08SAndreas Gohr     * @param string|string[]|null $value value(s) to add
3929369b4a9SAndreas Gohr     * @return bool
393e1272c08SAndreas Gohr     *
394e1272c08SAndreas Gohr     * @deprecated 2026-04-07 use Collection classes directly instead
395e1272c08SAndreas Gohr     */
396e1272c08SAndreas Gohr    public function addMetaKeys($page, $key, $value = null)
397e1272c08SAndreas Gohr    {
398e1272c08SAndreas Gohr        DebugHelper::dbgDeprecatedFunction('Collection classes');
399e1272c08SAndreas Gohr        try {
400e1272c08SAndreas Gohr            if ($key === 'title') {
401e1272c08SAndreas Gohr                $collection = new PageTitleCollection();
402e1272c08SAndreas Gohr            } else {
403e1272c08SAndreas Gohr                $collection = new PageMetaCollection($key);
404e1272c08SAndreas Gohr            }
405e1272c08SAndreas Gohr            $values = is_array($value) ? $value : ($value !== null && $value !== '' ? [$value] : []);
406e1272c08SAndreas Gohr            $collection->lock()->addEntity($page, $values)->unlock();
407e1272c08SAndreas Gohr            $this->updateMetadataRegistry([$key]);
408e1272c08SAndreas Gohr            return true;
4099369b4a9SAndreas Gohr        } catch (SearchException) {
410e1272c08SAndreas Gohr            return false;
411e1272c08SAndreas Gohr        }
412e1272c08SAndreas Gohr    }
413e1272c08SAndreas Gohr
414e1272c08SAndreas Gohr    /**
415e1272c08SAndreas Gohr     * Rename a metadata value in the index
416e1272c08SAndreas Gohr     *
417e1272c08SAndreas Gohr     * @param string $key metadata key name
418e1272c08SAndreas Gohr     * @param string $oldvalue old value
419e1272c08SAndreas Gohr     * @param string $newvalue new value
4209369b4a9SAndreas Gohr     * @return bool
421e1272c08SAndreas Gohr     *
422e1272c08SAndreas Gohr     * @deprecated 2026-04-07 use Collection classes directly instead
423e1272c08SAndreas Gohr     */
424e1272c08SAndreas Gohr    public function renameMetaValue($key, $oldvalue, $newvalue)
425e1272c08SAndreas Gohr    {
426e1272c08SAndreas Gohr        DebugHelper::dbgDeprecatedFunction('Collection classes');
427e1272c08SAndreas Gohr        try {
428e1272c08SAndreas Gohr            $collection = new PageMetaCollection($key);
429e1272c08SAndreas Gohr            $collection->lock();
430e1272c08SAndreas Gohr
4319369b4a9SAndreas Gohr            $tokenIndex = $collection->getTokenIndex();
432e1272c08SAndreas Gohr
433e1272c08SAndreas Gohr            // find old value — search() is read-only, won't create entries
434e1272c08SAndreas Gohr            $matches = $tokenIndex->search('/^' . preg_quote($oldvalue, '/') . '$/');
4359369b4a9SAndreas Gohr            if ($matches === []) {
436e1272c08SAndreas Gohr                $collection->unlock();
437e1272c08SAndreas Gohr                return true;
438e1272c08SAndreas Gohr            }
439e1272c08SAndreas Gohr            $oldid = array_key_first($matches);
440e1272c08SAndreas Gohr
441e1272c08SAndreas Gohr            // check if new value already exists (read-only lookup)
442e1272c08SAndreas Gohr            $newMatches = $tokenIndex->search('/^' . preg_quote($newvalue, '/') . '$/');
443e1272c08SAndreas Gohr
4449369b4a9SAndreas Gohr            if ($newMatches !== []) {
445e1272c08SAndreas Gohr                // both values exist — merge frequency data from old to new
446e1272c08SAndreas Gohr                $newid = array_key_first($newMatches);
4479369b4a9SAndreas Gohr                $freqIndex = $collection->getFrequencyIndex();
448e1272c08SAndreas Gohr                $reverseIndex = $collection->getReverseIndex();
449e1272c08SAndreas Gohr                $oldFreqLine = $freqIndex->retrieveRow($oldid);
450e1272c08SAndreas Gohr
451e1272c08SAndreas Gohr                if ($oldFreqLine !== '') {
452e1272c08SAndreas Gohr                    $newFreqLine = $freqIndex->retrieveRow($newid);
453e1272c08SAndreas Gohr                    foreach (TupleOps::parseTuples($oldFreqLine) as $entityId => $count) {
454e1272c08SAndreas Gohr                        $newFreqLine = TupleOps::updateTuple($newFreqLine, $entityId, $count);
455e1272c08SAndreas Gohr
456e1272c08SAndreas Gohr                        // update reverse index: remove old token, add new
457e1272c08SAndreas Gohr                        $reverseRow = $reverseIndex->retrieveRow((int)$entityId);
458e1272c08SAndreas Gohr                        $keyline = explode(':', $reverseRow);
459e1272c08SAndreas Gohr                        $keyline = array_diff($keyline, [(string)$oldid]);
460e1272c08SAndreas Gohr                        if (!in_array((string)$newid, $keyline)) {
461e1272c08SAndreas Gohr                            $keyline[] = $newid;
462e1272c08SAndreas Gohr                        }
4639369b4a9SAndreas Gohr                        $reverseIndex->changeRow(
4649369b4a9SAndreas Gohr                            (int)$entityId,
4659369b4a9SAndreas Gohr                            implode(':', array_filter($keyline, fn($v) => $v !== ''))
4669369b4a9SAndreas Gohr                        );
467e1272c08SAndreas Gohr                    }
468e1272c08SAndreas Gohr                    $freqIndex->changeRow($oldid, '');
469e1272c08SAndreas Gohr                    $freqIndex->changeRow($newid, $newFreqLine);
470e1272c08SAndreas Gohr                }
471e1272c08SAndreas Gohr            } else {
472e1272c08SAndreas Gohr                // new value doesn't exist — simple rename
473e1272c08SAndreas Gohr                $tokenIndex->changeRow($oldid, $newvalue);
474e1272c08SAndreas Gohr            }
475e1272c08SAndreas Gohr
476e1272c08SAndreas Gohr            $collection->unlock();
477e1272c08SAndreas Gohr            return true;
4789369b4a9SAndreas Gohr        } catch (SearchException) {
479e1272c08SAndreas Gohr            return false;
480e1272c08SAndreas Gohr        }
481e1272c08SAndreas Gohr    }
482e1272c08SAndreas Gohr
483e1272c08SAndreas Gohr    /**
484e1272c08SAndreas Gohr     * Get the page ID for a page name
485e1272c08SAndreas Gohr     *
486e1272c08SAndreas Gohr     * @param string $page page name
487e1272c08SAndreas Gohr     * @return int|false
488e1272c08SAndreas Gohr     *
489e1272c08SAndreas Gohr     * @deprecated 2026-04-07 use FileIndex directly instead
490e1272c08SAndreas Gohr     */
491e1272c08SAndreas Gohr    public function getPID($page)
492e1272c08SAndreas Gohr    {
493e1272c08SAndreas Gohr        DebugHelper::dbgDeprecatedFunction(FileIndex::class);
494e1272c08SAndreas Gohr        try {
495e1272c08SAndreas Gohr            return (new FileIndex('page', '', true))->accessCachedValue($page);
4969369b4a9SAndreas Gohr        } catch (SearchException) {
497e1272c08SAndreas Gohr            return false;
498e1272c08SAndreas Gohr        }
499e1272c08SAndreas Gohr    }
500e1272c08SAndreas Gohr
501e1272c08SAndreas Gohr    /**
502e1272c08SAndreas Gohr     * Find tokens in the fulltext index
503e1272c08SAndreas Gohr     *
504e1272c08SAndreas Gohr     * @param array $tokens list of words to search for
505e1272c08SAndreas Gohr     * @return array list of pages found [word => [page => count, ...]]
506e1272c08SAndreas Gohr     *
507e1272c08SAndreas Gohr     * @deprecated 2026-04-07 use CollectionSearch on PageFulltextCollection instead
508e1272c08SAndreas Gohr     */
5099369b4a9SAndreas Gohr    public function lookup($tokens)
510e1272c08SAndreas Gohr    {
511e1272c08SAndreas Gohr        DebugHelper::dbgDeprecatedFunction(CollectionSearch::class);
512e1272c08SAndreas Gohr        $collection = new PageFulltextCollection();
513e1272c08SAndreas Gohr        $search = new CollectionSearch($collection);
514e1272c08SAndreas Gohr        $termMap = [];
515e1272c08SAndreas Gohr        foreach ($tokens as $token) {
5161148921dSAndreas Gohr            if (!Tokenizer::isValidSearchTerm($token)) continue;
517e1272c08SAndreas Gohr            $term = $search->addTerm($token);
518e1272c08SAndreas Gohr            $termMap[$token] = $term;
519e1272c08SAndreas Gohr        }
520e1272c08SAndreas Gohr
5219369b4a9SAndreas Gohr        if ($termMap === []) return [];
522e1272c08SAndreas Gohr        $search->execute();
523e1272c08SAndreas Gohr
524e1272c08SAndreas Gohr        $result = [];
525e1272c08SAndreas Gohr        foreach ($termMap as $word => $term) {
526e1272c08SAndreas Gohr            $freqs = $term->getEntityFrequencies();
527e1272c08SAndreas Gohr            // filter to only existing pages
5289369b4a9SAndreas Gohr            $filtered = array_filter($freqs, fn($page) => page_exists($page, '', false), ARRAY_FILTER_USE_KEY);
529e1272c08SAndreas Gohr            $result[$word] = $filtered;
530e1272c08SAndreas Gohr        }
531e1272c08SAndreas Gohr        return $result;
532e1272c08SAndreas Gohr    }
533e1272c08SAndreas Gohr
534e1272c08SAndreas Gohr    // endregion
5356225b270SMichael Große}
536