16225b270SMichael Große<?php 26225b270SMichael Große 36225b270SMichael Großenamespace dokuwiki\Search; 46225b270SMichael Große 5e1272c08SAndreas Gohruse dokuwiki\Debug\DebugHelper; 66225b270SMichael Großeuse dokuwiki\Extension\Event; 7e1272c08SAndreas Gohruse dokuwiki\Search\Collection\CollectionSearch; 883b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageFulltextCollection; 983b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageMetaCollection; 1083b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageTitleCollection; 1115f699acSAndreas Gohruse dokuwiki\Search\Exception\IndexAccessException; 1221fbd01bSAndreas Gohruse dokuwiki\Search\Exception\IndexIntegrityException; 13a16bd548SSatoshi Saharause dokuwiki\Search\Exception\IndexLockException; 14a16bd548SSatoshi Saharause dokuwiki\Search\Exception\IndexWriteException; 15e1272c08SAndreas Gohruse dokuwiki\Search\Exception\SearchException; 1683b3acccSAndreas Gohruse dokuwiki\Search\Index\FileIndex; 1783b3acccSAndreas Gohruse dokuwiki\Search\Index\Lock; 18e1272c08SAndreas Gohruse dokuwiki\Search\Index\MemoryIndex; 19e1272c08SAndreas Gohruse dokuwiki\Search\Index\TupleOps; 204027a91aSSatoshi Sahara 214027a91aSSatoshi Sahara// Version tag used to force rebuild on upgrade 224027a91aSSatoshi Saharaconst INDEXER_VERSION = 8; 236225b270SMichael Große 246225b270SMichael Große/** 25a32da6ddSSatoshi Sahara * Class DokuWiki Indexer 266225b270SMichael Große * 2783b3acccSAndreas Gohr * Manages the page search index by delegating to Collection classes. 2883b3acccSAndreas Gohr * 294027a91aSSatoshi Sahara * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 306225b270SMichael Große * @author Andreas Gohr <andi@splitbrain.org> 314027a91aSSatoshi Sahara * @author Tom N Harris <tnharris@whoopdedo.org> 326225b270SMichael Große */ 3383b3acccSAndreas Gohrclass Indexer 344027a91aSSatoshi Sahara{ 3583b3acccSAndreas Gohr /** @var callable|null Logging callback, receives a string message */ 3683b3acccSAndreas Gohr protected $logger; 376225b270SMichael Große 384027a91aSSatoshi Sahara /** 3983b3acccSAndreas Gohr * Set a logging callback 404027a91aSSatoshi Sahara * 4183b3acccSAndreas Gohr * The callback receives a single string message. Use this to integrate 4283b3acccSAndreas Gohr * with different output mechanisms (TaskRunner echo, CLI output, Logger, etc.) 4383b3acccSAndreas Gohr * 4483b3acccSAndreas Gohr * @param callable $logger 4583b3acccSAndreas Gohr * @return static 464027a91aSSatoshi Sahara */ 4783b3acccSAndreas Gohr public function setLogger(callable $logger): static 484027a91aSSatoshi Sahara { 4983b3acccSAndreas Gohr $this->logger = $logger; 5083b3acccSAndreas Gohr return $this; 516225b270SMichael Große } 526225b270SMichael Große 536225b270SMichael Große /** 5483b3acccSAndreas Gohr * Send a message to the registered logger 556225b270SMichael Große * 5683b3acccSAndreas Gohr * @param string $message 576225b270SMichael Große */ 5883b3acccSAndreas Gohr protected function log(string $message): void 594027a91aSSatoshi Sahara { 6083b3acccSAndreas Gohr if ($this->logger)($this->logger)($message); 616225b270SMichael Große } 626225b270SMichael Große 636225b270SMichael Große /** 644027a91aSSatoshi Sahara * Version of the indexer taking into consideration the external tokenizer. 654027a91aSSatoshi Sahara * The indexer is only compatible with data written by the same version. 666225b270SMichael Große * 674027a91aSSatoshi Sahara * @triggers INDEXER_VERSION_GET 684027a91aSSatoshi Sahara * Plugins that modify what gets indexed should hook this event and 694027a91aSSatoshi Sahara * add their version info to the event data like so: 704027a91aSSatoshi Sahara * $data[$plugin_name] = $plugin_version; 716225b270SMichael Große * 724027a91aSSatoshi Sahara * @return int|string 736225b270SMichael Große */ 74*9369b4a9SAndreas Gohr public function getVersion(): int|string 754027a91aSSatoshi Sahara { 764027a91aSSatoshi Sahara static $indexer_version = null; 774027a91aSSatoshi Sahara if ($indexer_version == null) { 784027a91aSSatoshi Sahara $version = INDEXER_VERSION; 794027a91aSSatoshi Sahara 8083b3acccSAndreas Gohr $data = ['dokuwiki' => $version]; 814027a91aSSatoshi Sahara Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false); 824027a91aSSatoshi Sahara unset($data['dokuwiki']); // this needs to be first 834027a91aSSatoshi Sahara ksort($data); 844027a91aSSatoshi Sahara foreach ($data as $plugin => $vers) { 854027a91aSSatoshi Sahara $version .= '+' . $plugin . '=' . $vers; 864027a91aSSatoshi Sahara } 874027a91aSSatoshi Sahara $indexer_version = $version; 884027a91aSSatoshi Sahara } 894027a91aSSatoshi Sahara return $indexer_version; 906225b270SMichael Große } 916225b270SMichael Große 924027a91aSSatoshi Sahara /** 9383b3acccSAndreas Gohr * Return a list of all indexed pages 9483b3acccSAndreas Gohr * 9583b3acccSAndreas Gohr * @param bool $existsFilter only return pages that exist on disk 9683b3acccSAndreas Gohr * @return string[] list of page names (keys are the RIDs in the page index) 9783b3acccSAndreas Gohr */ 9883b3acccSAndreas Gohr public function getAllPages(bool $existsFilter = false): array 9983b3acccSAndreas Gohr { 100*9369b4a9SAndreas Gohr $pageIndex = new MemoryIndex('page'); 10183b3acccSAndreas Gohr return array_filter( 10283b3acccSAndreas Gohr iterator_to_array($pageIndex), 10383b3acccSAndreas Gohr static fn($v) => $v !== '' && (!$existsFilter || page_exists($v, '', false)) 10483b3acccSAndreas Gohr ); 10583b3acccSAndreas Gohr } 10683b3acccSAndreas Gohr 10783b3acccSAndreas Gohr /** 10883b3acccSAndreas Gohr * Check if a page needs (re-)indexing 10983b3acccSAndreas Gohr * 11083b3acccSAndreas Gohr * @param string $page 11183b3acccSAndreas Gohr * @param bool $force 11283b3acccSAndreas Gohr * @return bool true if indexing is needed 11383b3acccSAndreas Gohr */ 11483b3acccSAndreas Gohr public function needsIndexing(string $page, bool $force = false): bool 11583b3acccSAndreas Gohr { 11683b3acccSAndreas Gohr $idxtag = metaFN($page, '.indexed'); 11783b3acccSAndreas Gohr if ($force || !file_exists($idxtag)) return true; 11883b3acccSAndreas Gohr 11983b3acccSAndreas Gohr if (trim(io_readFile($idxtag)) != $this->getVersion()) return true; 12083b3acccSAndreas Gohr 12183b3acccSAndreas Gohr $last = @filemtime($idxtag); 12283b3acccSAndreas Gohr return $last <= @filemtime(wikiFN($page)); 12383b3acccSAndreas Gohr } 12483b3acccSAndreas Gohr 12583b3acccSAndreas Gohr /** 12683b3acccSAndreas Gohr * Add/update the search index for a page 1274027a91aSSatoshi Sahara * 1284027a91aSSatoshi Sahara * Locking is handled internally. 1294027a91aSSatoshi Sahara * 13083b3acccSAndreas Gohr * @param string $page The page to index 1314027a91aSSatoshi Sahara * @param bool $force force reindexing even when the index is up to date 1324027a91aSSatoshi Sahara * 133a32da6ddSSatoshi Sahara * @throws IndexAccessException 134a16bd548SSatoshi Sahara * @throws IndexLockException 135a16bd548SSatoshi Sahara * @throws IndexWriteException 1364027a91aSSatoshi Sahara */ 13783b3acccSAndreas Gohr public function addPage(string $page, bool $force = false): void 1384027a91aSSatoshi Sahara { 13983b3acccSAndreas Gohr if (!$this->needsIndexing($page, $force)) { 140*9369b4a9SAndreas Gohr $this->log("Indexer: index for $page up to date"); 14183b3acccSAndreas Gohr return; 142a32da6ddSSatoshi Sahara } 143a32da6ddSSatoshi Sahara 14483b3acccSAndreas Gohr // create shared writable page index early so we can resolve the PID for plugins 14583b3acccSAndreas Gohr $pageIndex = new FileIndex('page', '', true); 1466225b270SMichael Große 14783b3acccSAndreas Gohr // prepare event data 14883b3acccSAndreas Gohr $data = [ 14983b3acccSAndreas Gohr 'page' => $page, 15083b3acccSAndreas Gohr 'body' => '', 15183b3acccSAndreas Gohr 'metadata' => [ 15283b3acccSAndreas Gohr 'title' => p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED), 15383b3acccSAndreas Gohr 'relation_references' => array_keys( 15483b3acccSAndreas Gohr p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED) ?? [] 15583b3acccSAndreas Gohr ), 15683b3acccSAndreas Gohr 'relation_media' => array_keys( 15783b3acccSAndreas Gohr p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED) ?? [] 15883b3acccSAndreas Gohr ), 15983b3acccSAndreas Gohr 'internal_index' => p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED) !== false, 16083b3acccSAndreas Gohr ], 16183b3acccSAndreas Gohr 'pid' => $pageIndex->accessCachedValue($page), 16283b3acccSAndreas Gohr ]; 1636225b270SMichael Große 16483b3acccSAndreas Gohr // let plugins modify the data 1654027a91aSSatoshi Sahara $event = new Event('INDEXER_PAGE_ADD', $data); 16683b3acccSAndreas Gohr if ($event->advise_before()) { 16783b3acccSAndreas Gohr $data['body'] = $data['body'] . ' ' . rawWiki($data['page']); 16883b3acccSAndreas Gohr } 1694027a91aSSatoshi Sahara $event->advise_after(); 1704027a91aSSatoshi Sahara unset($event); 1716225b270SMichael Große 17283b3acccSAndreas Gohr // index title 17383b3acccSAndreas Gohr (new PageTitleCollection($pageIndex))->lock() 17483b3acccSAndreas Gohr ->addEntity($data['page'], [$data['metadata']['title']])->unlock(); 17583b3acccSAndreas Gohr unset($data['metadata']['title']); 1766225b270SMichael Große 17783b3acccSAndreas Gohr // index fulltext 17883b3acccSAndreas Gohr if ($data['metadata']['internal_index']) { 17983b3acccSAndreas Gohr $words = Tokenizer::getWords($data['body']); 18083b3acccSAndreas Gohr (new PageFulltextCollection($pageIndex))->lock()->addEntity($data['page'], $words)->unlock(); 1816225b270SMichael Große } else { 18283b3acccSAndreas Gohr $this->log("Indexer: full text indexing disabled for {$data['page']}"); 18383b3acccSAndreas Gohr // clear any previously stored fulltext data 18483b3acccSAndreas Gohr (new PageFulltextCollection($pageIndex))->lock()->addEntity($data['page'], [])->unlock(); 1856225b270SMichael Große } 18683b3acccSAndreas Gohr unset($data['metadata']['internal_index']); 18783b3acccSAndreas Gohr 18883b3acccSAndreas Gohr // index metadata keys 18983b3acccSAndreas Gohr foreach ($data['metadata'] as $key => $values) { 19083b3acccSAndreas Gohr if (!is_array($values)) { 19183b3acccSAndreas Gohr $values = ($values !== null && $values !== '') ? [$values] : []; 1926225b270SMichael Große } 19383b3acccSAndreas Gohr (new PageMetaCollection($key, $pageIndex))->lock()->addEntity($data['page'], $values)->unlock(); 19483b3acccSAndreas Gohr } 19583b3acccSAndreas Gohr 19683b3acccSAndreas Gohr // update metadata registry 19783b3acccSAndreas Gohr $this->updateMetadataRegistry(array_keys($data['metadata'])); 1986225b270SMichael Große 1994027a91aSSatoshi Sahara // update index tag file 20083b3acccSAndreas Gohr io_saveFile(metaFN($data['page'], '.indexed'), $this->getVersion()); 20183b3acccSAndreas Gohr $this->log("Indexer: finished indexing {$data['page']}"); 2026225b270SMichael Große } 2036225b270SMichael Große 2046225b270SMichael Große /** 2055f9bd525SSatoshi Sahara * Remove a page from the index 2066225b270SMichael Große * 20783b3acccSAndreas Gohr * Clears the page's data from all collections. The entity persists in page.idx. 2086225b270SMichael Große * 20983b3acccSAndreas Gohr * @param string $page The page to remove 21083b3acccSAndreas Gohr * @param bool $force force deletion even when no .indexed tag exists 2116225b270SMichael Große * 212a32da6ddSSatoshi Sahara * @throws IndexAccessException 213a16bd548SSatoshi Sahara * @throws IndexLockException 214a16bd548SSatoshi Sahara * @throws IndexWriteException 2156225b270SMichael Große */ 21683b3acccSAndreas Gohr public function deletePage(string $page, bool $force = false): void 2174027a91aSSatoshi Sahara { 2184027a91aSSatoshi Sahara $idxtag = metaFN($page, '.indexed'); 2194027a91aSSatoshi Sahara if (!$force && !file_exists($idxtag)) { 220*9369b4a9SAndreas Gohr $this->log("Indexer: $page.indexed file does not exist, ignoring"); 22183b3acccSAndreas Gohr return; 2224027a91aSSatoshi Sahara } 2236225b270SMichael Große 22483b3acccSAndreas Gohr $pageIndex = new FileIndex('page', '', true); 225725e8e5fSSatoshi Sahara 22683b3acccSAndreas Gohr (new PageTitleCollection($pageIndex))->lock()->addEntity($page, [])->unlock(); 22783b3acccSAndreas Gohr (new PageFulltextCollection($pageIndex))->lock()->addEntity($page, [])->unlock(); 22883b3acccSAndreas Gohr 22983b3acccSAndreas Gohr foreach ($this->getMetadataRegistryKeys() as $key) { 23083b3acccSAndreas Gohr (new PageMetaCollection($key, $pageIndex))->lock()->addEntity($page, [])->unlock(); 2314027a91aSSatoshi Sahara } 2326225b270SMichael Große 233*9369b4a9SAndreas Gohr $this->log("Indexer: deleted $page from index"); 2344027a91aSSatoshi Sahara @unlink($idxtag); 2354027a91aSSatoshi Sahara } 2364027a91aSSatoshi Sahara 2374027a91aSSatoshi Sahara /** 23883b3acccSAndreas Gohr * Rename a page in the search index 23983b3acccSAndreas Gohr * 24083b3acccSAndreas Gohr * The page must already have been moved on disk before calling this. 24183b3acccSAndreas Gohr * Clears the old page's data and re-indexes under the new name. 2424027a91aSSatoshi Sahara * 2434027a91aSSatoshi Sahara * @param string $oldpage The old page name 2444027a91aSSatoshi Sahara * @param string $newpage The new page name 24583b3acccSAndreas Gohr * 24683b3acccSAndreas Gohr * @throws IndexAccessException 247a16bd548SSatoshi Sahara * @throws IndexLockException 248a16bd548SSatoshi Sahara * @throws IndexWriteException 2494027a91aSSatoshi Sahara */ 25083b3acccSAndreas Gohr public function renamePage(string $oldpage, string $newpage): void 2514027a91aSSatoshi Sahara { 25283b3acccSAndreas Gohr $this->deletePage($oldpage, true); 25383b3acccSAndreas Gohr $this->addPage($newpage, true); 2546225b270SMichael Große } 2556225b270SMichael Große 2566225b270SMichael Große /** 25783b3acccSAndreas Gohr * Clear all page indexes 2586225b270SMichael Große */ 25983b3acccSAndreas Gohr public function clear(): void 2604027a91aSSatoshi Sahara { 2616225b270SMichael Große global $conf; 2626225b270SMichael Große 26383b3acccSAndreas Gohr Lock::acquire('page'); 2644027a91aSSatoshi Sahara 26583b3acccSAndreas Gohr // clear metadata indexes 26683b3acccSAndreas Gohr foreach ($this->getMetadataRegistryKeys() as $key) { 26783b3acccSAndreas Gohr $clean = PageMetaCollection::cleanName($key); 26883b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/' . $clean . '_w.idx'); 26983b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/' . $clean . '_i.idx'); 27083b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/' . $clean . '_p.idx'); 2716225b270SMichael Große } 2726225b270SMichael Große 27383b3acccSAndreas Gohr // clear fulltext indexes 27483b3acccSAndreas Gohr $files = glob($conf['indexdir'] . '/i*.idx'); 27583b3acccSAndreas Gohr if ($files) foreach ($files as $f) @unlink($f); 27683b3acccSAndreas Gohr $files = glob($conf['indexdir'] . '/w*.idx'); 27783b3acccSAndreas Gohr if ($files) foreach ($files as $f) @unlink($f); 27883b3acccSAndreas Gohr 27983b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/pageword.idx'); 28083b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/lengths.idx'); 28183b3acccSAndreas Gohr 28283b3acccSAndreas Gohr // clear title and page indexes 28383b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/title.idx'); 28483b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/page.idx'); 28583b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/metadata.idx'); 28683b3acccSAndreas Gohr 28783b3acccSAndreas Gohr Lock::release('page'); 28883b3acccSAndreas Gohr } 28983b3acccSAndreas Gohr 29083b3acccSAndreas Gohr /** 29121fbd01bSAndreas Gohr * Check the structural integrity of all search indexes 29221fbd01bSAndreas Gohr * 29321fbd01bSAndreas Gohr * @throws IndexIntegrityException when a structural inconsistency is found 29421fbd01bSAndreas Gohr */ 29521fbd01bSAndreas Gohr public function checkIntegrity(): void 29621fbd01bSAndreas Gohr { 29721fbd01bSAndreas Gohr (new PageFulltextCollection())->checkIntegrity(); 29821fbd01bSAndreas Gohr (new PageTitleCollection())->checkIntegrity(); 29921fbd01bSAndreas Gohr 30021fbd01bSAndreas Gohr foreach ($this->getMetadataRegistryKeys() as $key) { 30121fbd01bSAndreas Gohr (new PageMetaCollection($key))->checkIntegrity(); 30221fbd01bSAndreas Gohr } 30321fbd01bSAndreas Gohr } 30421fbd01bSAndreas Gohr 30521fbd01bSAndreas Gohr /** 30621fbd01bSAndreas Gohr * Whether the search index is empty (no fulltext data indexed yet) 30721fbd01bSAndreas Gohr * 30821fbd01bSAndreas Gohr * @return bool 30921fbd01bSAndreas Gohr */ 31021fbd01bSAndreas Gohr public function isIndexEmpty(): bool 31121fbd01bSAndreas Gohr { 31221fbd01bSAndreas Gohr return (new PageFulltextCollection())->getTokenIndexMaximum() === 0; 31321fbd01bSAndreas Gohr } 31421fbd01bSAndreas Gohr 31521fbd01bSAndreas Gohr /** 31683b3acccSAndreas Gohr * Get the list of known metadata keys from the metadata registry 31783b3acccSAndreas Gohr * 31883b3acccSAndreas Gohr * @return string[] list of metadata key names 31983b3acccSAndreas Gohr */ 32083b3acccSAndreas Gohr protected function getMetadataRegistryKeys(): array 32183b3acccSAndreas Gohr { 32283b3acccSAndreas Gohr global $conf; 32383b3acccSAndreas Gohr $fn = $conf['indexdir'] . '/metadata.idx'; 32483b3acccSAndreas Gohr if (!file_exists($fn)) return []; 32583b3acccSAndreas Gohr $keys = file($fn, FILE_IGNORE_NEW_LINES); 32683b3acccSAndreas Gohr return $keys ?: []; 32783b3acccSAndreas Gohr } 32883b3acccSAndreas Gohr 32983b3acccSAndreas Gohr /** 33083b3acccSAndreas Gohr * Update the metadata registry with new keys 33183b3acccSAndreas Gohr * 33283b3acccSAndreas Gohr * @param string[] $keys metadata key names to ensure are registered 33383b3acccSAndreas Gohr */ 33483b3acccSAndreas Gohr protected function updateMetadataRegistry(array $keys): void 33583b3acccSAndreas Gohr { 33683b3acccSAndreas Gohr global $conf; 33783b3acccSAndreas Gohr $fn = $conf['indexdir'] . '/metadata.idx'; 33883b3acccSAndreas Gohr $existing = file_exists($fn) ? file($fn, FILE_IGNORE_NEW_LINES) : []; 33983b3acccSAndreas Gohr if (!$existing) $existing = []; 34083b3acccSAndreas Gohr 34183b3acccSAndreas Gohr $added = false; 34283b3acccSAndreas Gohr foreach ($keys as $key) { 34383b3acccSAndreas Gohr if (!in_array($key, $existing)) { 34483b3acccSAndreas Gohr $existing[] = $key; 34583b3acccSAndreas Gohr $added = true; 34683b3acccSAndreas Gohr } 34783b3acccSAndreas Gohr } 34883b3acccSAndreas Gohr 34983b3acccSAndreas Gohr if ($added) { 35083b3acccSAndreas Gohr io_saveFile($fn, implode("\n", $existing) . "\n"); 35183b3acccSAndreas Gohr } 35283b3acccSAndreas Gohr } 353e1272c08SAndreas Gohr 354e1272c08SAndreas Gohr // region Deprecated methods 355e1272c08SAndreas Gohr 356e1272c08SAndreas Gohr /** 357e1272c08SAndreas Gohr * Find pages containing a metadata value 358e1272c08SAndreas Gohr * 359e1272c08SAndreas Gohr * @param string $key metadata key name 360e1272c08SAndreas Gohr * @param string|string[] $value search term(s) 3611148921dSAndreas Gohr * @param callable|null $func ignored, kept for backward compatibility 362e1272c08SAndreas Gohr * @return array 363e1272c08SAndreas Gohr * 364e1272c08SAndreas Gohr * @deprecated 2026-04-07 use MetadataSearch::lookupKey() instead 365e1272c08SAndreas Gohr */ 366e1272c08SAndreas Gohr public function lookupKey($key, &$value, $func = null) 367e1272c08SAndreas Gohr { 368e1272c08SAndreas Gohr DebugHelper::dbgDeprecatedFunction(MetadataSearch::class . '::lookupKey()'); 3691148921dSAndreas Gohr return (new MetadataSearch())->lookupKey($key, $value); 370e1272c08SAndreas Gohr } 371e1272c08SAndreas Gohr 372e1272c08SAndreas Gohr /** 373e1272c08SAndreas Gohr * Return a list of all indexed pages, optionally filtered by metadata key 374e1272c08SAndreas Gohr * 375e1272c08SAndreas Gohr * @param string|null $key metadata key name 376e1272c08SAndreas Gohr * @return string[] 377e1272c08SAndreas Gohr * 378e1272c08SAndreas Gohr * @deprecated 2026-04-07 use MetadataSearch::getPages() or Indexer::getAllPages() instead 379e1272c08SAndreas Gohr */ 380e1272c08SAndreas Gohr public function getPages($key = null) 381e1272c08SAndreas Gohr { 382e1272c08SAndreas Gohr DebugHelper::dbgDeprecatedFunction(MetadataSearch::class . '::getPages()'); 383e1272c08SAndreas Gohr return (new MetadataSearch())->getPages($key); 384e1272c08SAndreas Gohr } 385e1272c08SAndreas Gohr 386e1272c08SAndreas Gohr /** 387e1272c08SAndreas Gohr * Add metadata values for a page 388e1272c08SAndreas Gohr * 389e1272c08SAndreas Gohr * @param string $page page name 390e1272c08SAndreas Gohr * @param string $key metadata key name 391e1272c08SAndreas Gohr * @param string|string[]|null $value value(s) to add 392*9369b4a9SAndreas Gohr * @return bool 393e1272c08SAndreas Gohr * 394e1272c08SAndreas Gohr * @deprecated 2026-04-07 use Collection classes directly instead 395e1272c08SAndreas Gohr */ 396e1272c08SAndreas Gohr public function addMetaKeys($page, $key, $value = null) 397e1272c08SAndreas Gohr { 398e1272c08SAndreas Gohr DebugHelper::dbgDeprecatedFunction('Collection classes'); 399e1272c08SAndreas Gohr try { 400e1272c08SAndreas Gohr if ($key === 'title') { 401e1272c08SAndreas Gohr $collection = new PageTitleCollection(); 402e1272c08SAndreas Gohr } else { 403e1272c08SAndreas Gohr $collection = new PageMetaCollection($key); 404e1272c08SAndreas Gohr } 405e1272c08SAndreas Gohr $values = is_array($value) ? $value : ($value !== null && $value !== '' ? [$value] : []); 406e1272c08SAndreas Gohr $collection->lock()->addEntity($page, $values)->unlock(); 407e1272c08SAndreas Gohr $this->updateMetadataRegistry([$key]); 408e1272c08SAndreas Gohr return true; 409*9369b4a9SAndreas Gohr } catch (SearchException) { 410e1272c08SAndreas Gohr return false; 411e1272c08SAndreas Gohr } 412e1272c08SAndreas Gohr } 413e1272c08SAndreas Gohr 414e1272c08SAndreas Gohr /** 415e1272c08SAndreas Gohr * Rename a metadata value in the index 416e1272c08SAndreas Gohr * 417e1272c08SAndreas Gohr * @param string $key metadata key name 418e1272c08SAndreas Gohr * @param string $oldvalue old value 419e1272c08SAndreas Gohr * @param string $newvalue new value 420*9369b4a9SAndreas Gohr * @return bool 421e1272c08SAndreas Gohr * 422e1272c08SAndreas Gohr * @deprecated 2026-04-07 use Collection classes directly instead 423e1272c08SAndreas Gohr */ 424e1272c08SAndreas Gohr public function renameMetaValue($key, $oldvalue, $newvalue) 425e1272c08SAndreas Gohr { 426e1272c08SAndreas Gohr DebugHelper::dbgDeprecatedFunction('Collection classes'); 427e1272c08SAndreas Gohr try { 428e1272c08SAndreas Gohr $collection = new PageMetaCollection($key); 429e1272c08SAndreas Gohr $collection->lock(); 430e1272c08SAndreas Gohr 431*9369b4a9SAndreas Gohr $tokenIndex = $collection->getTokenIndex(); 432e1272c08SAndreas Gohr 433e1272c08SAndreas Gohr // find old value — search() is read-only, won't create entries 434e1272c08SAndreas Gohr $matches = $tokenIndex->search('/^' . preg_quote($oldvalue, '/') . '$/'); 435*9369b4a9SAndreas Gohr if ($matches === []) { 436e1272c08SAndreas Gohr $collection->unlock(); 437e1272c08SAndreas Gohr return true; 438e1272c08SAndreas Gohr } 439e1272c08SAndreas Gohr $oldid = array_key_first($matches); 440e1272c08SAndreas Gohr 441e1272c08SAndreas Gohr // check if new value already exists (read-only lookup) 442e1272c08SAndreas Gohr $newMatches = $tokenIndex->search('/^' . preg_quote($newvalue, '/') . '$/'); 443e1272c08SAndreas Gohr 444*9369b4a9SAndreas Gohr if ($newMatches !== []) { 445e1272c08SAndreas Gohr // both values exist — merge frequency data from old to new 446e1272c08SAndreas Gohr $newid = array_key_first($newMatches); 447*9369b4a9SAndreas Gohr $freqIndex = $collection->getFrequencyIndex(); 448e1272c08SAndreas Gohr $reverseIndex = $collection->getReverseIndex(); 449e1272c08SAndreas Gohr $oldFreqLine = $freqIndex->retrieveRow($oldid); 450e1272c08SAndreas Gohr 451e1272c08SAndreas Gohr if ($oldFreqLine !== '') { 452e1272c08SAndreas Gohr $newFreqLine = $freqIndex->retrieveRow($newid); 453e1272c08SAndreas Gohr foreach (TupleOps::parseTuples($oldFreqLine) as $entityId => $count) { 454e1272c08SAndreas Gohr $newFreqLine = TupleOps::updateTuple($newFreqLine, $entityId, $count); 455e1272c08SAndreas Gohr 456e1272c08SAndreas Gohr // update reverse index: remove old token, add new 457e1272c08SAndreas Gohr $reverseRow = $reverseIndex->retrieveRow((int)$entityId); 458e1272c08SAndreas Gohr $keyline = explode(':', $reverseRow); 459e1272c08SAndreas Gohr $keyline = array_diff($keyline, [(string)$oldid]); 460e1272c08SAndreas Gohr if (!in_array((string)$newid, $keyline)) { 461e1272c08SAndreas Gohr $keyline[] = $newid; 462e1272c08SAndreas Gohr } 463*9369b4a9SAndreas Gohr $reverseIndex->changeRow( 464*9369b4a9SAndreas Gohr (int)$entityId, 465*9369b4a9SAndreas Gohr implode(':', array_filter($keyline, fn($v) => $v !== '')) 466*9369b4a9SAndreas Gohr ); 467e1272c08SAndreas Gohr } 468e1272c08SAndreas Gohr $freqIndex->changeRow($oldid, ''); 469e1272c08SAndreas Gohr $freqIndex->changeRow($newid, $newFreqLine); 470e1272c08SAndreas Gohr } 471e1272c08SAndreas Gohr } else { 472e1272c08SAndreas Gohr // new value doesn't exist — simple rename 473e1272c08SAndreas Gohr $tokenIndex->changeRow($oldid, $newvalue); 474e1272c08SAndreas Gohr } 475e1272c08SAndreas Gohr 476e1272c08SAndreas Gohr $collection->unlock(); 477e1272c08SAndreas Gohr return true; 478*9369b4a9SAndreas Gohr } catch (SearchException) { 479e1272c08SAndreas Gohr return false; 480e1272c08SAndreas Gohr } 481e1272c08SAndreas Gohr } 482e1272c08SAndreas Gohr 483e1272c08SAndreas Gohr /** 484e1272c08SAndreas Gohr * Get the page ID for a page name 485e1272c08SAndreas Gohr * 486e1272c08SAndreas Gohr * @param string $page page name 487e1272c08SAndreas Gohr * @return int|false 488e1272c08SAndreas Gohr * 489e1272c08SAndreas Gohr * @deprecated 2026-04-07 use FileIndex directly instead 490e1272c08SAndreas Gohr */ 491e1272c08SAndreas Gohr public function getPID($page) 492e1272c08SAndreas Gohr { 493e1272c08SAndreas Gohr DebugHelper::dbgDeprecatedFunction(FileIndex::class); 494e1272c08SAndreas Gohr try { 495e1272c08SAndreas Gohr return (new FileIndex('page', '', true))->accessCachedValue($page); 496*9369b4a9SAndreas Gohr } catch (SearchException) { 497e1272c08SAndreas Gohr return false; 498e1272c08SAndreas Gohr } 499e1272c08SAndreas Gohr } 500e1272c08SAndreas Gohr 501e1272c08SAndreas Gohr /** 502e1272c08SAndreas Gohr * Find tokens in the fulltext index 503e1272c08SAndreas Gohr * 504e1272c08SAndreas Gohr * @param array $tokens list of words to search for 505e1272c08SAndreas Gohr * @return array list of pages found [word => [page => count, ...]] 506e1272c08SAndreas Gohr * 507e1272c08SAndreas Gohr * @deprecated 2026-04-07 use CollectionSearch on PageFulltextCollection instead 508e1272c08SAndreas Gohr */ 509*9369b4a9SAndreas Gohr public function lookup($tokens) 510e1272c08SAndreas Gohr { 511e1272c08SAndreas Gohr DebugHelper::dbgDeprecatedFunction(CollectionSearch::class); 512e1272c08SAndreas Gohr $collection = new PageFulltextCollection(); 513e1272c08SAndreas Gohr $search = new CollectionSearch($collection); 514e1272c08SAndreas Gohr $termMap = []; 515e1272c08SAndreas Gohr foreach ($tokens as $token) { 5161148921dSAndreas Gohr if (!Tokenizer::isValidSearchTerm($token)) continue; 517e1272c08SAndreas Gohr $term = $search->addTerm($token); 518e1272c08SAndreas Gohr $termMap[$token] = $term; 519e1272c08SAndreas Gohr } 520e1272c08SAndreas Gohr 521*9369b4a9SAndreas Gohr if ($termMap === []) return []; 522e1272c08SAndreas Gohr $search->execute(); 523e1272c08SAndreas Gohr 524e1272c08SAndreas Gohr $result = []; 525e1272c08SAndreas Gohr foreach ($termMap as $word => $term) { 526e1272c08SAndreas Gohr $freqs = $term->getEntityFrequencies(); 527e1272c08SAndreas Gohr // filter to only existing pages 528*9369b4a9SAndreas Gohr $filtered = array_filter($freqs, fn($page) => page_exists($page, '', false), ARRAY_FILTER_USE_KEY); 529e1272c08SAndreas Gohr $result[$word] = $filtered; 530e1272c08SAndreas Gohr } 531e1272c08SAndreas Gohr return $result; 532e1272c08SAndreas Gohr } 533e1272c08SAndreas Gohr 534e1272c08SAndreas Gohr // endregion 5356225b270SMichael Große} 536