xref: /dokuwiki/inc/Search/LegacyIndexer.php (revision 6e39b4e379a661a3abd765df49fa679d2119741c)
1*6e39b4e3SAndreas Gohr<?php
2*6e39b4e3SAndreas Gohr
3*6e39b4e3SAndreas Gohrnamespace dokuwiki\Search;
4*6e39b4e3SAndreas Gohr
5*6e39b4e3SAndreas Gohruse dokuwiki\Debug\DebugHelper;
6*6e39b4e3SAndreas Gohruse dokuwiki\Search\Collection\CollectionSearch;
7*6e39b4e3SAndreas Gohruse dokuwiki\Search\Collection\PageFulltextCollection;
8*6e39b4e3SAndreas Gohruse dokuwiki\Search\Collection\PageMetaCollection;
9*6e39b4e3SAndreas Gohruse dokuwiki\Search\Collection\PageTitleCollection;
10*6e39b4e3SAndreas Gohruse dokuwiki\Search\Exception\SearchException;
11*6e39b4e3SAndreas Gohruse dokuwiki\Search\Index\FileIndex;
12*6e39b4e3SAndreas Gohruse dokuwiki\Search\Index\TupleOps;
13*6e39b4e3SAndreas Gohr
14*6e39b4e3SAndreas Gohr/**
15*6e39b4e3SAndreas Gohr * Backward-compatible wrapper around {@see Indexer}
16*6e39b4e3SAndreas Gohr *
17*6e39b4e3SAndreas Gohr * The refactored {@see Indexer} reports failures by throwing
18*6e39b4e3SAndreas Gohr * {@see SearchException} subclasses. Plugins written against the legacy
19*6e39b4e3SAndreas Gohr * Doku_Indexer API expect the four mutating methods (addPage, deletePage,
20*6e39b4e3SAndreas Gohr * renamePage, clear) to return `true` on success or a string error message
21*6e39b4e3SAndreas Gohr * on failure. This class wraps an {@see Indexer} instance and restores that
22*6e39b4e3SAndreas Gohr * contract for those four methods. It also hosts the legacy helpers
23*6e39b4e3SAndreas Gohr * (lookupKey, getPages, addMetaKeys, renameMetaValue, getPID, lookup) that
24*6e39b4e3SAndreas Gohr * used to live on Indexer itself.
25*6e39b4e3SAndreas Gohr *
26*6e39b4e3SAndreas Gohr * It is returned by the deprecated {@see ::idx_get_indexer()} helper, which
27*6e39b4e3SAndreas Gohr * is the entry point most plugins use to obtain an indexer instance. New
28*6e39b4e3SAndreas Gohr * code should instantiate {@see Indexer} directly and handle
29*6e39b4e3SAndreas Gohr * {@see SearchException} via try/catch.
30*6e39b4e3SAndreas Gohr *
31*6e39b4e3SAndreas Gohr * Composition (not inheritance) is used because PHP does not allow
32*6e39b4e3SAndreas Gohr * overriding a `void` return type with `bool|string`.
33*6e39b4e3SAndreas Gohr *
34*6e39b4e3SAndreas Gohr * @deprecated 2026-04-07 use {@see Indexer} directly with try/catch
35*6e39b4e3SAndreas Gohr *
36*6e39b4e3SAndreas Gohr * @method string|int getVersion()
37*6e39b4e3SAndreas Gohr * @method string[] getAllPages(bool $existsFilter = false)
38*6e39b4e3SAndreas Gohr * @method string[] getPages(?string $key = null)
39*6e39b4e3SAndreas Gohr * @method bool needsIndexing(string $page, bool $force = false)
40*6e39b4e3SAndreas Gohr * @method void checkIntegrity()
41*6e39b4e3SAndreas Gohr * @method bool isIndexEmpty()
42*6e39b4e3SAndreas Gohr */
43*6e39b4e3SAndreas Gohrclass LegacyIndexer
44*6e39b4e3SAndreas Gohr{
45*6e39b4e3SAndreas Gohr    protected Indexer $indexer;
46*6e39b4e3SAndreas Gohr
47*6e39b4e3SAndreas Gohr    public function __construct(?Indexer $indexer = null)
48*6e39b4e3SAndreas Gohr    {
49*6e39b4e3SAndreas Gohr        $this->indexer = $indexer ?? new Indexer();
50*6e39b4e3SAndreas Gohr    }
51*6e39b4e3SAndreas Gohr
52*6e39b4e3SAndreas Gohr    /**
53*6e39b4e3SAndreas Gohr     * Forward any other call (getVersion, getAllPages, getPages, needsIndexing,
54*6e39b4e3SAndreas Gohr     * checkIntegrity, isIndexEmpty, ...) to the wrapped indexer.
55*6e39b4e3SAndreas Gohr     *
56*6e39b4e3SAndreas Gohr     * @deprecated 2026-04-07 call the same method on {@see Indexer} directly
57*6e39b4e3SAndreas Gohr     */
58*6e39b4e3SAndreas Gohr    public function __call(string $name, array $args): mixed
59*6e39b4e3SAndreas Gohr    {
60*6e39b4e3SAndreas Gohr        DebugHelper::dbgDeprecatedFunction(Indexer::class . '::' . $name . '()');
61*6e39b4e3SAndreas Gohr        return $this->indexer->$name(...$args);
62*6e39b4e3SAndreas Gohr    }
63*6e39b4e3SAndreas Gohr
64*6e39b4e3SAndreas Gohr    /**
65*6e39b4e3SAndreas Gohr     * @return true|string true on success, error message on failure
66*6e39b4e3SAndreas Gohr     *
67*6e39b4e3SAndreas Gohr     * @deprecated 2026-04-07 use {@see Indexer::addPage()} with try/catch instead
68*6e39b4e3SAndreas Gohr     */
69*6e39b4e3SAndreas Gohr    public function addPage(string $page, bool $force = false): bool|string
70*6e39b4e3SAndreas Gohr    {
71*6e39b4e3SAndreas Gohr        DebugHelper::dbgDeprecatedFunction(Indexer::class . '::addPage()');
72*6e39b4e3SAndreas Gohr        try {
73*6e39b4e3SAndreas Gohr            $this->indexer->addPage($page, $force);
74*6e39b4e3SAndreas Gohr            return true;
75*6e39b4e3SAndreas Gohr        } catch (SearchException $e) {
76*6e39b4e3SAndreas Gohr            return $e->getMessage();
77*6e39b4e3SAndreas Gohr        }
78*6e39b4e3SAndreas Gohr    }
79*6e39b4e3SAndreas Gohr
80*6e39b4e3SAndreas Gohr    /**
81*6e39b4e3SAndreas Gohr     * @return true|string true on success, error message on failure
82*6e39b4e3SAndreas Gohr     *
83*6e39b4e3SAndreas Gohr     * @deprecated 2026-04-07 use {@see Indexer::deletePage()} with try/catch instead
84*6e39b4e3SAndreas Gohr     */
85*6e39b4e3SAndreas Gohr    public function deletePage(string $page, bool $force = false): bool|string
86*6e39b4e3SAndreas Gohr    {
87*6e39b4e3SAndreas Gohr        DebugHelper::dbgDeprecatedFunction(Indexer::class . '::deletePage()');
88*6e39b4e3SAndreas Gohr        try {
89*6e39b4e3SAndreas Gohr            $this->indexer->deletePage($page, $force);
90*6e39b4e3SAndreas Gohr            return true;
91*6e39b4e3SAndreas Gohr        } catch (SearchException $e) {
92*6e39b4e3SAndreas Gohr            return $e->getMessage();
93*6e39b4e3SAndreas Gohr        }
94*6e39b4e3SAndreas Gohr    }
95*6e39b4e3SAndreas Gohr
96*6e39b4e3SAndreas Gohr    /**
97*6e39b4e3SAndreas Gohr     * @return true|string true on success, error message on failure
98*6e39b4e3SAndreas Gohr     *
99*6e39b4e3SAndreas Gohr     * @deprecated 2026-04-07 use {@see Indexer::renamePage()} with try/catch instead
100*6e39b4e3SAndreas Gohr     */
101*6e39b4e3SAndreas Gohr    public function renamePage(string $oldpage, string $newpage): bool|string
102*6e39b4e3SAndreas Gohr    {
103*6e39b4e3SAndreas Gohr        DebugHelper::dbgDeprecatedFunction(Indexer::class . '::renamePage()');
104*6e39b4e3SAndreas Gohr        try {
105*6e39b4e3SAndreas Gohr            $this->indexer->renamePage($oldpage, $newpage);
106*6e39b4e3SAndreas Gohr            return true;
107*6e39b4e3SAndreas Gohr        } catch (SearchException $e) {
108*6e39b4e3SAndreas Gohr            return $e->getMessage();
109*6e39b4e3SAndreas Gohr        }
110*6e39b4e3SAndreas Gohr    }
111*6e39b4e3SAndreas Gohr
112*6e39b4e3SAndreas Gohr    /**
113*6e39b4e3SAndreas Gohr     * @return true|string true on success, error message on failure
114*6e39b4e3SAndreas Gohr     *
115*6e39b4e3SAndreas Gohr     * @deprecated 2026-04-07 use {@see Indexer::clear()} with try/catch instead
116*6e39b4e3SAndreas Gohr     */
117*6e39b4e3SAndreas Gohr    public function clear(): bool|string
118*6e39b4e3SAndreas Gohr    {
119*6e39b4e3SAndreas Gohr        DebugHelper::dbgDeprecatedFunction(Indexer::class . '::clear()');
120*6e39b4e3SAndreas Gohr        try {
121*6e39b4e3SAndreas Gohr            $this->indexer->clear();
122*6e39b4e3SAndreas Gohr            return true;
123*6e39b4e3SAndreas Gohr        } catch (SearchException $e) {
124*6e39b4e3SAndreas Gohr            return $e->getMessage();
125*6e39b4e3SAndreas Gohr        }
126*6e39b4e3SAndreas Gohr    }
127*6e39b4e3SAndreas Gohr
128*6e39b4e3SAndreas Gohr    /**
129*6e39b4e3SAndreas Gohr     * Find pages containing a metadata value
130*6e39b4e3SAndreas Gohr     *
131*6e39b4e3SAndreas Gohr     * @param string $key metadata key name
132*6e39b4e3SAndreas Gohr     * @param string|string[] $value search term(s)
133*6e39b4e3SAndreas Gohr     * @param callable|null $func ignored, kept for backward compatibility
134*6e39b4e3SAndreas Gohr     * @return array
135*6e39b4e3SAndreas Gohr     *
136*6e39b4e3SAndreas Gohr     * @deprecated 2026-04-07 use MetadataSearch::lookupKey() instead
137*6e39b4e3SAndreas Gohr     */
138*6e39b4e3SAndreas Gohr    public function lookupKey($key, &$value, $func = null)
139*6e39b4e3SAndreas Gohr    {
140*6e39b4e3SAndreas Gohr        DebugHelper::dbgDeprecatedFunction(MetadataSearch::class . '::lookupKey()');
141*6e39b4e3SAndreas Gohr        return (new MetadataSearch())->lookupKey($key, $value);
142*6e39b4e3SAndreas Gohr    }
143*6e39b4e3SAndreas Gohr
144*6e39b4e3SAndreas Gohr    /**
145*6e39b4e3SAndreas Gohr     * Add metadata values for a page
146*6e39b4e3SAndreas Gohr     *
147*6e39b4e3SAndreas Gohr     * @param string $page page name
148*6e39b4e3SAndreas Gohr     * @param string $key metadata key name
149*6e39b4e3SAndreas Gohr     * @param string|string[]|null $value value(s) to add
150*6e39b4e3SAndreas Gohr     * @return bool
151*6e39b4e3SAndreas Gohr     *
152*6e39b4e3SAndreas Gohr     * @deprecated 2026-04-07 use Collection classes directly instead
153*6e39b4e3SAndreas Gohr     */
154*6e39b4e3SAndreas Gohr    public function addMetaKeys($page, $key, $value = null)
155*6e39b4e3SAndreas Gohr    {
156*6e39b4e3SAndreas Gohr        DebugHelper::dbgDeprecatedFunction('Collection classes');
157*6e39b4e3SAndreas Gohr        try {
158*6e39b4e3SAndreas Gohr            if ($key === 'title') {
159*6e39b4e3SAndreas Gohr                $collection = new PageTitleCollection();
160*6e39b4e3SAndreas Gohr            } else {
161*6e39b4e3SAndreas Gohr                $collection = new PageMetaCollection($key);
162*6e39b4e3SAndreas Gohr            }
163*6e39b4e3SAndreas Gohr            $values = is_array($value) ? $value : ($value !== null && $value !== '' ? [$value] : []);
164*6e39b4e3SAndreas Gohr            $collection->lock()->addEntity($page, $values)->unlock();
165*6e39b4e3SAndreas Gohr            $this->indexer->updateMetadataRegistry([$key]);
166*6e39b4e3SAndreas Gohr            return true;
167*6e39b4e3SAndreas Gohr        } catch (SearchException) {
168*6e39b4e3SAndreas Gohr            return false;
169*6e39b4e3SAndreas Gohr        }
170*6e39b4e3SAndreas Gohr    }
171*6e39b4e3SAndreas Gohr
172*6e39b4e3SAndreas Gohr    /**
173*6e39b4e3SAndreas Gohr     * Rename a metadata value in the index
174*6e39b4e3SAndreas Gohr     *
175*6e39b4e3SAndreas Gohr     * @param string $key metadata key name
176*6e39b4e3SAndreas Gohr     * @param string $oldvalue old value
177*6e39b4e3SAndreas Gohr     * @param string $newvalue new value
178*6e39b4e3SAndreas Gohr     * @return bool
179*6e39b4e3SAndreas Gohr     *
180*6e39b4e3SAndreas Gohr     * @deprecated 2026-04-07 use Collection classes directly instead
181*6e39b4e3SAndreas Gohr     */
182*6e39b4e3SAndreas Gohr    public function renameMetaValue($key, $oldvalue, $newvalue)
183*6e39b4e3SAndreas Gohr    {
184*6e39b4e3SAndreas Gohr        DebugHelper::dbgDeprecatedFunction('Collection classes');
185*6e39b4e3SAndreas Gohr        try {
186*6e39b4e3SAndreas Gohr            $collection = new PageMetaCollection($key);
187*6e39b4e3SAndreas Gohr            $collection->lock();
188*6e39b4e3SAndreas Gohr
189*6e39b4e3SAndreas Gohr            $tokenIndex = $collection->getTokenIndex();
190*6e39b4e3SAndreas Gohr
191*6e39b4e3SAndreas Gohr            // find old value — search() is read-only, won't create entries
192*6e39b4e3SAndreas Gohr            $matches = $tokenIndex->search('/^' . preg_quote($oldvalue, '/') . '$/');
193*6e39b4e3SAndreas Gohr            if ($matches === []) {
194*6e39b4e3SAndreas Gohr                $collection->unlock();
195*6e39b4e3SAndreas Gohr                return true;
196*6e39b4e3SAndreas Gohr            }
197*6e39b4e3SAndreas Gohr            $oldid = array_key_first($matches);
198*6e39b4e3SAndreas Gohr
199*6e39b4e3SAndreas Gohr            // check if new value already exists (read-only lookup)
200*6e39b4e3SAndreas Gohr            $newMatches = $tokenIndex->search('/^' . preg_quote($newvalue, '/') . '$/');
201*6e39b4e3SAndreas Gohr
202*6e39b4e3SAndreas Gohr            if ($newMatches !== []) {
203*6e39b4e3SAndreas Gohr                // both values exist — merge frequency data from old to new
204*6e39b4e3SAndreas Gohr                $newid = array_key_first($newMatches);
205*6e39b4e3SAndreas Gohr                $freqIndex = $collection->getFrequencyIndex();
206*6e39b4e3SAndreas Gohr                $reverseIndex = $collection->getReverseIndex();
207*6e39b4e3SAndreas Gohr                $oldFreqLine = $freqIndex->retrieveRow($oldid);
208*6e39b4e3SAndreas Gohr
209*6e39b4e3SAndreas Gohr                if ($oldFreqLine !== '') {
210*6e39b4e3SAndreas Gohr                    $newFreqLine = $freqIndex->retrieveRow($newid);
211*6e39b4e3SAndreas Gohr                    foreach (TupleOps::parseTuples($oldFreqLine) as $entityId => $count) {
212*6e39b4e3SAndreas Gohr                        $newFreqLine = TupleOps::updateTuple($newFreqLine, $entityId, $count);
213*6e39b4e3SAndreas Gohr
214*6e39b4e3SAndreas Gohr                        // update reverse index: remove old token, add new
215*6e39b4e3SAndreas Gohr                        $reverseRow = $reverseIndex->retrieveRow((int)$entityId);
216*6e39b4e3SAndreas Gohr                        $keyline = explode(':', $reverseRow);
217*6e39b4e3SAndreas Gohr                        $keyline = array_diff($keyline, [(string)$oldid]);
218*6e39b4e3SAndreas Gohr                        if (!in_array((string)$newid, $keyline)) {
219*6e39b4e3SAndreas Gohr                            $keyline[] = $newid;
220*6e39b4e3SAndreas Gohr                        }
221*6e39b4e3SAndreas Gohr                        $reverseIndex->changeRow(
222*6e39b4e3SAndreas Gohr                            (int)$entityId,
223*6e39b4e3SAndreas Gohr                            implode(':', array_filter($keyline, fn($v) => $v !== ''))
224*6e39b4e3SAndreas Gohr                        );
225*6e39b4e3SAndreas Gohr                    }
226*6e39b4e3SAndreas Gohr                    $freqIndex->changeRow($oldid, '');
227*6e39b4e3SAndreas Gohr                    $freqIndex->changeRow($newid, $newFreqLine);
228*6e39b4e3SAndreas Gohr                }
229*6e39b4e3SAndreas Gohr            } else {
230*6e39b4e3SAndreas Gohr                // new value doesn't exist — simple rename
231*6e39b4e3SAndreas Gohr                $tokenIndex->changeRow($oldid, $newvalue);
232*6e39b4e3SAndreas Gohr            }
233*6e39b4e3SAndreas Gohr
234*6e39b4e3SAndreas Gohr            $collection->unlock();
235*6e39b4e3SAndreas Gohr            return true;
236*6e39b4e3SAndreas Gohr        } catch (SearchException) {
237*6e39b4e3SAndreas Gohr            return false;
238*6e39b4e3SAndreas Gohr        }
239*6e39b4e3SAndreas Gohr    }
240*6e39b4e3SAndreas Gohr
241*6e39b4e3SAndreas Gohr    /**
242*6e39b4e3SAndreas Gohr     * Get the page ID for a page name
243*6e39b4e3SAndreas Gohr     *
244*6e39b4e3SAndreas Gohr     * @param string $page page name
245*6e39b4e3SAndreas Gohr     * @return int|false
246*6e39b4e3SAndreas Gohr     *
247*6e39b4e3SAndreas Gohr     * @deprecated 2026-04-07 use FileIndex directly instead
248*6e39b4e3SAndreas Gohr     */
249*6e39b4e3SAndreas Gohr    public function getPID($page)
250*6e39b4e3SAndreas Gohr    {
251*6e39b4e3SAndreas Gohr        DebugHelper::dbgDeprecatedFunction(FileIndex::class);
252*6e39b4e3SAndreas Gohr        try {
253*6e39b4e3SAndreas Gohr            return (new FileIndex('page', '', true))->accessCachedValue($page);
254*6e39b4e3SAndreas Gohr        } catch (SearchException) {
255*6e39b4e3SAndreas Gohr            return false;
256*6e39b4e3SAndreas Gohr        }
257*6e39b4e3SAndreas Gohr    }
258*6e39b4e3SAndreas Gohr
259*6e39b4e3SAndreas Gohr    /**
260*6e39b4e3SAndreas Gohr     * Find tokens in the fulltext index
261*6e39b4e3SAndreas Gohr     *
262*6e39b4e3SAndreas Gohr     * @param array $tokens list of words to search for
263*6e39b4e3SAndreas Gohr     * @return array list of pages found [word => [page => count, ...]]
264*6e39b4e3SAndreas Gohr     *
265*6e39b4e3SAndreas Gohr     * @deprecated 2026-04-07 use CollectionSearch on PageFulltextCollection instead
266*6e39b4e3SAndreas Gohr     */
267*6e39b4e3SAndreas Gohr    public function lookup($tokens)
268*6e39b4e3SAndreas Gohr    {
269*6e39b4e3SAndreas Gohr        DebugHelper::dbgDeprecatedFunction(CollectionSearch::class);
270*6e39b4e3SAndreas Gohr        $collection = new PageFulltextCollection();
271*6e39b4e3SAndreas Gohr        $search = new CollectionSearch($collection);
272*6e39b4e3SAndreas Gohr        $termMap = [];
273*6e39b4e3SAndreas Gohr        foreach ($tokens as $token) {
274*6e39b4e3SAndreas Gohr            if (!Tokenizer::isValidSearchTerm($token)) continue;
275*6e39b4e3SAndreas Gohr            $term = $search->addTerm($token);
276*6e39b4e3SAndreas Gohr            $termMap[$token] = $term;
277*6e39b4e3SAndreas Gohr        }
278*6e39b4e3SAndreas Gohr
279*6e39b4e3SAndreas Gohr        if ($termMap === []) return [];
280*6e39b4e3SAndreas Gohr        $search->execute();
281*6e39b4e3SAndreas Gohr
282*6e39b4e3SAndreas Gohr        $result = [];
283*6e39b4e3SAndreas Gohr        foreach ($termMap as $word => $term) {
284*6e39b4e3SAndreas Gohr            $freqs = $term->getEntityFrequencies();
285*6e39b4e3SAndreas Gohr            // filter to only existing pages
286*6e39b4e3SAndreas Gohr            $filtered = array_filter($freqs, fn($page) => page_exists($page, '', false), ARRAY_FILTER_USE_KEY);
287*6e39b4e3SAndreas Gohr            $result[$word] = $filtered;
288*6e39b4e3SAndreas Gohr        }
289*6e39b4e3SAndreas Gohr        return $result;
290*6e39b4e3SAndreas Gohr    }
291*6e39b4e3SAndreas Gohr}
292