16225b270SMichael Große<?php 26225b270SMichael Große 36225b270SMichael Großenamespace dokuwiki\Search; 46225b270SMichael Große 5*e1272c08SAndreas Gohruse dokuwiki\Debug\DebugHelper; 66225b270SMichael Großeuse dokuwiki\Extension\Event; 7*e1272c08SAndreas Gohruse dokuwiki\Search\Collection\CollectionSearch; 883b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageFulltextCollection; 983b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageMetaCollection; 1083b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageTitleCollection; 1115f699acSAndreas Gohruse dokuwiki\Search\Exception\IndexAccessException; 1221fbd01bSAndreas Gohruse dokuwiki\Search\Exception\IndexIntegrityException; 13a16bd548SSatoshi Saharause dokuwiki\Search\Exception\IndexLockException; 14a16bd548SSatoshi Saharause dokuwiki\Search\Exception\IndexWriteException; 15*e1272c08SAndreas Gohruse dokuwiki\Search\Exception\SearchException; 1683b3acccSAndreas Gohruse dokuwiki\Search\Index\FileIndex; 1783b3acccSAndreas Gohruse dokuwiki\Search\Index\Lock; 18*e1272c08SAndreas Gohruse dokuwiki\Search\Index\MemoryIndex; 19*e1272c08SAndreas Gohruse dokuwiki\Search\Index\TupleOps; 204027a91aSSatoshi Sahara 214027a91aSSatoshi Sahara// Version tag used to force rebuild on upgrade 224027a91aSSatoshi Saharaconst INDEXER_VERSION = 8; 236225b270SMichael Große 246225b270SMichael Große/** 25a32da6ddSSatoshi Sahara * Class DokuWiki Indexer 266225b270SMichael Große * 2783b3acccSAndreas Gohr * Manages the page search index by delegating to Collection classes. 2883b3acccSAndreas Gohr * 294027a91aSSatoshi Sahara * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 306225b270SMichael Große * @author Andreas Gohr <andi@splitbrain.org> 314027a91aSSatoshi Sahara * @author Tom N Harris <tnharris@whoopdedo.org> 326225b270SMichael Große */ 3383b3acccSAndreas Gohrclass Indexer 344027a91aSSatoshi Sahara{ 3583b3acccSAndreas Gohr /** @var callable|null Logging callback, receives a string message */ 3683b3acccSAndreas Gohr protected $logger; 376225b270SMichael Große 384027a91aSSatoshi Sahara /** 3983b3acccSAndreas Gohr * Set a logging callback 404027a91aSSatoshi Sahara * 4183b3acccSAndreas Gohr * The callback receives a single string message. Use this to integrate 4283b3acccSAndreas Gohr * with different output mechanisms (TaskRunner echo, CLI output, Logger, etc.) 4383b3acccSAndreas Gohr * 4483b3acccSAndreas Gohr * @param callable $logger 4583b3acccSAndreas Gohr * @return static 464027a91aSSatoshi Sahara */ 4783b3acccSAndreas Gohr public function setLogger(callable $logger): static 484027a91aSSatoshi Sahara { 4983b3acccSAndreas Gohr $this->logger = $logger; 5083b3acccSAndreas Gohr return $this; 516225b270SMichael Große } 526225b270SMichael Große 536225b270SMichael Große /** 5483b3acccSAndreas Gohr * Send a message to the registered logger 556225b270SMichael Große * 5683b3acccSAndreas Gohr * @param string $message 576225b270SMichael Große */ 5883b3acccSAndreas Gohr protected function log(string $message): void 594027a91aSSatoshi Sahara { 6083b3acccSAndreas Gohr if ($this->logger) ($this->logger)($message); 616225b270SMichael Große } 626225b270SMichael Große 636225b270SMichael Große /** 644027a91aSSatoshi Sahara * Version of the indexer taking into consideration the external tokenizer. 654027a91aSSatoshi Sahara * The indexer is only compatible with data written by the same version. 666225b270SMichael Große * 674027a91aSSatoshi Sahara * @triggers INDEXER_VERSION_GET 684027a91aSSatoshi Sahara * Plugins that modify what gets indexed should hook this event and 694027a91aSSatoshi Sahara * add their version info to the event data like so: 704027a91aSSatoshi Sahara * $data[$plugin_name] = $plugin_version; 716225b270SMichael Große * 724027a91aSSatoshi Sahara * @return int|string 736225b270SMichael Große */ 744027a91aSSatoshi Sahara public function getVersion() 754027a91aSSatoshi Sahara { 764027a91aSSatoshi Sahara static $indexer_version = null; 774027a91aSSatoshi Sahara if ($indexer_version == null) { 784027a91aSSatoshi Sahara $version = INDEXER_VERSION; 794027a91aSSatoshi Sahara 8083b3acccSAndreas Gohr $data = ['dokuwiki' => $version]; 814027a91aSSatoshi Sahara Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false); 824027a91aSSatoshi Sahara unset($data['dokuwiki']); // this needs to be first 834027a91aSSatoshi Sahara ksort($data); 844027a91aSSatoshi Sahara foreach ($data as $plugin => $vers) { 854027a91aSSatoshi Sahara $version .= '+' . $plugin . '=' . $vers; 864027a91aSSatoshi Sahara } 874027a91aSSatoshi Sahara $indexer_version = $version; 884027a91aSSatoshi Sahara } 894027a91aSSatoshi Sahara return $indexer_version; 906225b270SMichael Große } 916225b270SMichael Große 924027a91aSSatoshi Sahara /** 9383b3acccSAndreas Gohr * Return a list of all indexed pages 9483b3acccSAndreas Gohr * 9583b3acccSAndreas Gohr * @param bool $existsFilter only return pages that exist on disk 9683b3acccSAndreas Gohr * @return string[] list of page names (keys are the RIDs in the page index) 9783b3acccSAndreas Gohr */ 9883b3acccSAndreas Gohr public function getAllPages(bool $existsFilter = false): array 9983b3acccSAndreas Gohr { 10083b3acccSAndreas Gohr $pageIndex = new Index\MemoryIndex('page'); 10183b3acccSAndreas Gohr return array_filter( 10283b3acccSAndreas Gohr iterator_to_array($pageIndex), 10383b3acccSAndreas Gohr static fn($v) => $v !== '' && (!$existsFilter || page_exists($v, '', false)) 10483b3acccSAndreas Gohr ); 10583b3acccSAndreas Gohr } 10683b3acccSAndreas Gohr 10783b3acccSAndreas Gohr /** 10883b3acccSAndreas Gohr * Check if a page needs (re-)indexing 10983b3acccSAndreas Gohr * 11083b3acccSAndreas Gohr * @param string $page 11183b3acccSAndreas Gohr * @param bool $force 11283b3acccSAndreas Gohr * @return bool true if indexing is needed 11383b3acccSAndreas Gohr */ 11483b3acccSAndreas Gohr public function needsIndexing(string $page, bool $force = false): bool 11583b3acccSAndreas Gohr { 11683b3acccSAndreas Gohr $idxtag = metaFN($page, '.indexed'); 11783b3acccSAndreas Gohr if ($force || !file_exists($idxtag)) return true; 11883b3acccSAndreas Gohr 11983b3acccSAndreas Gohr if (trim(io_readFile($idxtag)) != $this->getVersion()) return true; 12083b3acccSAndreas Gohr 12183b3acccSAndreas Gohr $last = @filemtime($idxtag); 12283b3acccSAndreas Gohr return $last <= @filemtime(wikiFN($page)); 12383b3acccSAndreas Gohr } 12483b3acccSAndreas Gohr 12583b3acccSAndreas Gohr /** 12683b3acccSAndreas Gohr * Add/update the search index for a page 1274027a91aSSatoshi Sahara * 1284027a91aSSatoshi Sahara * Locking is handled internally. 1294027a91aSSatoshi Sahara * 13083b3acccSAndreas Gohr * @param string $page The page to index 1314027a91aSSatoshi Sahara * @param bool $force force reindexing even when the index is up to date 1324027a91aSSatoshi Sahara * 133a32da6ddSSatoshi Sahara * @throws IndexAccessException 134a16bd548SSatoshi Sahara * @throws IndexLockException 135a16bd548SSatoshi Sahara * @throws IndexWriteException 1364027a91aSSatoshi Sahara */ 13783b3acccSAndreas Gohr public function addPage(string $page, bool $force = false): void 1384027a91aSSatoshi Sahara { 13983b3acccSAndreas Gohr if (!$this->needsIndexing($page, $force)) { 14083b3acccSAndreas Gohr $this->log("Indexer: index for {$page} up to date"); 14183b3acccSAndreas Gohr return; 142a32da6ddSSatoshi Sahara } 143a32da6ddSSatoshi Sahara 14483b3acccSAndreas Gohr // create shared writable page index early so we can resolve the PID for plugins 14583b3acccSAndreas Gohr $pageIndex = new FileIndex('page', '', true); 1466225b270SMichael Große 14783b3acccSAndreas Gohr // prepare event data 14883b3acccSAndreas Gohr $data = [ 14983b3acccSAndreas Gohr 'page' => $page, 15083b3acccSAndreas Gohr 'body' => '', 15183b3acccSAndreas Gohr 'metadata' => [ 15283b3acccSAndreas Gohr 'title' => p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED), 15383b3acccSAndreas Gohr 'relation_references' => array_keys( 15483b3acccSAndreas Gohr p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED) ?? [] 15583b3acccSAndreas Gohr ), 15683b3acccSAndreas Gohr 'relation_media' => array_keys( 15783b3acccSAndreas Gohr p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED) ?? [] 15883b3acccSAndreas Gohr ), 15983b3acccSAndreas Gohr 'internal_index' => p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED) !== false, 16083b3acccSAndreas Gohr ], 16183b3acccSAndreas Gohr 'pid' => $pageIndex->accessCachedValue($page), 16283b3acccSAndreas Gohr ]; 1636225b270SMichael Große 16483b3acccSAndreas Gohr // let plugins modify the data 1654027a91aSSatoshi Sahara $event = new Event('INDEXER_PAGE_ADD', $data); 16683b3acccSAndreas Gohr if ($event->advise_before()) { 16783b3acccSAndreas Gohr $data['body'] = $data['body'] . ' ' . rawWiki($data['page']); 16883b3acccSAndreas Gohr } 1694027a91aSSatoshi Sahara $event->advise_after(); 1704027a91aSSatoshi Sahara unset($event); 1716225b270SMichael Große 17283b3acccSAndreas Gohr // index title 17383b3acccSAndreas Gohr (new PageTitleCollection($pageIndex))->lock() 17483b3acccSAndreas Gohr ->addEntity($data['page'], [$data['metadata']['title']])->unlock(); 17583b3acccSAndreas Gohr unset($data['metadata']['title']); 1766225b270SMichael Große 17783b3acccSAndreas Gohr // index fulltext 17883b3acccSAndreas Gohr if ($data['metadata']['internal_index']) { 17983b3acccSAndreas Gohr $words = Tokenizer::getWords($data['body']); 18083b3acccSAndreas Gohr (new PageFulltextCollection($pageIndex))->lock()->addEntity($data['page'], $words)->unlock(); 1816225b270SMichael Große } else { 18283b3acccSAndreas Gohr $this->log("Indexer: full text indexing disabled for {$data['page']}"); 18383b3acccSAndreas Gohr // clear any previously stored fulltext data 18483b3acccSAndreas Gohr (new PageFulltextCollection($pageIndex))->lock()->addEntity($data['page'], [])->unlock(); 1856225b270SMichael Große } 18683b3acccSAndreas Gohr unset($data['metadata']['internal_index']); 18783b3acccSAndreas Gohr 18883b3acccSAndreas Gohr // index metadata keys 18983b3acccSAndreas Gohr foreach ($data['metadata'] as $key => $values) { 19083b3acccSAndreas Gohr if (!is_array($values)) { 19183b3acccSAndreas Gohr $values = ($values !== null && $values !== '') ? [$values] : []; 1926225b270SMichael Große } 19383b3acccSAndreas Gohr (new PageMetaCollection($key, $pageIndex))->lock()->addEntity($data['page'], $values)->unlock(); 19483b3acccSAndreas Gohr } 19583b3acccSAndreas Gohr 19683b3acccSAndreas Gohr // update metadata registry 19783b3acccSAndreas Gohr $this->updateMetadataRegistry(array_keys($data['metadata'])); 1986225b270SMichael Große 1994027a91aSSatoshi Sahara // update index tag file 20083b3acccSAndreas Gohr io_saveFile(metaFN($data['page'], '.indexed'), $this->getVersion()); 20183b3acccSAndreas Gohr $this->log("Indexer: finished indexing {$data['page']}"); 2026225b270SMichael Große } 2036225b270SMichael Große 2046225b270SMichael Große /** 2055f9bd525SSatoshi Sahara * Remove a page from the index 2066225b270SMichael Große * 20783b3acccSAndreas Gohr * Clears the page's data from all collections. The entity persists in page.idx. 2086225b270SMichael Große * 20983b3acccSAndreas Gohr * @param string $page The page to remove 21083b3acccSAndreas Gohr * @param bool $force force deletion even when no .indexed tag exists 2116225b270SMichael Große * 212a32da6ddSSatoshi Sahara * @throws IndexAccessException 213a16bd548SSatoshi Sahara * @throws IndexLockException 214a16bd548SSatoshi Sahara * @throws IndexWriteException 2156225b270SMichael Große */ 21683b3acccSAndreas Gohr public function deletePage(string $page, bool $force = false): void 2174027a91aSSatoshi Sahara { 2184027a91aSSatoshi Sahara $idxtag = metaFN($page, '.indexed'); 2194027a91aSSatoshi Sahara if (!$force && !file_exists($idxtag)) { 22083b3acccSAndreas Gohr $this->log("Indexer: {$page}.indexed file does not exist, ignoring"); 22183b3acccSAndreas Gohr return; 2224027a91aSSatoshi Sahara } 2236225b270SMichael Große 22483b3acccSAndreas Gohr $pageIndex = new FileIndex('page', '', true); 225725e8e5fSSatoshi Sahara 22683b3acccSAndreas Gohr (new PageTitleCollection($pageIndex))->lock()->addEntity($page, [])->unlock(); 22783b3acccSAndreas Gohr (new PageFulltextCollection($pageIndex))->lock()->addEntity($page, [])->unlock(); 22883b3acccSAndreas Gohr 22983b3acccSAndreas Gohr foreach ($this->getMetadataRegistryKeys() as $key) { 23083b3acccSAndreas Gohr (new PageMetaCollection($key, $pageIndex))->lock()->addEntity($page, [])->unlock(); 2314027a91aSSatoshi Sahara } 2326225b270SMichael Große 23383b3acccSAndreas Gohr $this->log("Indexer: deleted {$page} from index"); 2344027a91aSSatoshi Sahara @unlink($idxtag); 2354027a91aSSatoshi Sahara } 2364027a91aSSatoshi Sahara 2374027a91aSSatoshi Sahara /** 23883b3acccSAndreas Gohr * Rename a page in the search index 23983b3acccSAndreas Gohr * 24083b3acccSAndreas Gohr * The page must already have been moved on disk before calling this. 24183b3acccSAndreas Gohr * Clears the old page's data and re-indexes under the new name. 2424027a91aSSatoshi Sahara * 2434027a91aSSatoshi Sahara * @param string $oldpage The old page name 2444027a91aSSatoshi Sahara * @param string $newpage The new page name 24583b3acccSAndreas Gohr * 24683b3acccSAndreas Gohr * @throws IndexAccessException 247a16bd548SSatoshi Sahara * @throws IndexLockException 248a16bd548SSatoshi Sahara * @throws IndexWriteException 2494027a91aSSatoshi Sahara */ 25083b3acccSAndreas Gohr public function renamePage(string $oldpage, string $newpage): void 2514027a91aSSatoshi Sahara { 25283b3acccSAndreas Gohr $this->deletePage($oldpage, true); 25383b3acccSAndreas Gohr $this->addPage($newpage, true); 2546225b270SMichael Große } 2556225b270SMichael Große 2566225b270SMichael Große /** 25783b3acccSAndreas Gohr * Clear all page indexes 2586225b270SMichael Große */ 25983b3acccSAndreas Gohr public function clear(): void 2604027a91aSSatoshi Sahara { 2616225b270SMichael Große global $conf; 2626225b270SMichael Große 26383b3acccSAndreas Gohr Lock::acquire('page'); 2644027a91aSSatoshi Sahara 26583b3acccSAndreas Gohr // clear metadata indexes 26683b3acccSAndreas Gohr foreach ($this->getMetadataRegistryKeys() as $key) { 26783b3acccSAndreas Gohr $clean = PageMetaCollection::cleanName($key); 26883b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/' . $clean . '_w.idx'); 26983b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/' . $clean . '_i.idx'); 27083b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/' . $clean . '_p.idx'); 2716225b270SMichael Große } 2726225b270SMichael Große 27383b3acccSAndreas Gohr // clear fulltext indexes 27483b3acccSAndreas Gohr $files = glob($conf['indexdir'] . '/i*.idx'); 27583b3acccSAndreas Gohr if ($files) foreach ($files as $f) @unlink($f); 27683b3acccSAndreas Gohr $files = glob($conf['indexdir'] . '/w*.idx'); 27783b3acccSAndreas Gohr if ($files) foreach ($files as $f) @unlink($f); 27883b3acccSAndreas Gohr 27983b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/pageword.idx'); 28083b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/lengths.idx'); 28183b3acccSAndreas Gohr 28283b3acccSAndreas Gohr // clear title and page indexes 28383b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/title.idx'); 28483b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/page.idx'); 28583b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/metadata.idx'); 28683b3acccSAndreas Gohr 28783b3acccSAndreas Gohr Lock::release('page'); 28883b3acccSAndreas Gohr } 28983b3acccSAndreas Gohr 29083b3acccSAndreas Gohr /** 29121fbd01bSAndreas Gohr * Check the structural integrity of all search indexes 29221fbd01bSAndreas Gohr * 29321fbd01bSAndreas Gohr * @throws IndexIntegrityException when a structural inconsistency is found 29421fbd01bSAndreas Gohr */ 29521fbd01bSAndreas Gohr public function checkIntegrity(): void 29621fbd01bSAndreas Gohr { 29721fbd01bSAndreas Gohr (new PageFulltextCollection())->checkIntegrity(); 29821fbd01bSAndreas Gohr (new PageTitleCollection())->checkIntegrity(); 29921fbd01bSAndreas Gohr 30021fbd01bSAndreas Gohr foreach ($this->getMetadataRegistryKeys() as $key) { 30121fbd01bSAndreas Gohr (new PageMetaCollection($key))->checkIntegrity(); 30221fbd01bSAndreas Gohr } 30321fbd01bSAndreas Gohr } 30421fbd01bSAndreas Gohr 30521fbd01bSAndreas Gohr /** 30621fbd01bSAndreas Gohr * Whether the search index is empty (no fulltext data indexed yet) 30721fbd01bSAndreas Gohr * 30821fbd01bSAndreas Gohr * @return bool 30921fbd01bSAndreas Gohr */ 31021fbd01bSAndreas Gohr public function isIndexEmpty(): bool 31121fbd01bSAndreas Gohr { 31221fbd01bSAndreas Gohr return (new PageFulltextCollection())->getTokenIndexMaximum() === 0; 31321fbd01bSAndreas Gohr } 31421fbd01bSAndreas Gohr 31521fbd01bSAndreas Gohr /** 31683b3acccSAndreas Gohr * Get the list of known metadata keys from the metadata registry 31783b3acccSAndreas Gohr * 31883b3acccSAndreas Gohr * @return string[] list of metadata key names 31983b3acccSAndreas Gohr */ 32083b3acccSAndreas Gohr protected function getMetadataRegistryKeys(): array 32183b3acccSAndreas Gohr { 32283b3acccSAndreas Gohr global $conf; 32383b3acccSAndreas Gohr $fn = $conf['indexdir'] . '/metadata.idx'; 32483b3acccSAndreas Gohr if (!file_exists($fn)) return []; 32583b3acccSAndreas Gohr $keys = file($fn, FILE_IGNORE_NEW_LINES); 32683b3acccSAndreas Gohr return $keys ?: []; 32783b3acccSAndreas Gohr } 32883b3acccSAndreas Gohr 32983b3acccSAndreas Gohr /** 33083b3acccSAndreas Gohr * Update the metadata registry with new keys 33183b3acccSAndreas Gohr * 33283b3acccSAndreas Gohr * @param string[] $keys metadata key names to ensure are registered 33383b3acccSAndreas Gohr */ 33483b3acccSAndreas Gohr protected function updateMetadataRegistry(array $keys): void 33583b3acccSAndreas Gohr { 33683b3acccSAndreas Gohr global $conf; 33783b3acccSAndreas Gohr $fn = $conf['indexdir'] . '/metadata.idx'; 33883b3acccSAndreas Gohr $existing = file_exists($fn) ? file($fn, FILE_IGNORE_NEW_LINES) : []; 33983b3acccSAndreas Gohr if (!$existing) $existing = []; 34083b3acccSAndreas Gohr 34183b3acccSAndreas Gohr $added = false; 34283b3acccSAndreas Gohr foreach ($keys as $key) { 34383b3acccSAndreas Gohr if (!in_array($key, $existing)) { 34483b3acccSAndreas Gohr $existing[] = $key; 34583b3acccSAndreas Gohr $added = true; 34683b3acccSAndreas Gohr } 34783b3acccSAndreas Gohr } 34883b3acccSAndreas Gohr 34983b3acccSAndreas Gohr if ($added) { 35083b3acccSAndreas Gohr io_saveFile($fn, implode("\n", $existing) . "\n"); 35183b3acccSAndreas Gohr } 35283b3acccSAndreas Gohr } 353*e1272c08SAndreas Gohr 354*e1272c08SAndreas Gohr // region Deprecated methods 355*e1272c08SAndreas Gohr 356*e1272c08SAndreas Gohr /** 357*e1272c08SAndreas Gohr * Find pages containing a metadata value 358*e1272c08SAndreas Gohr * 359*e1272c08SAndreas Gohr * @param string $key metadata key name 360*e1272c08SAndreas Gohr * @param string|string[] $value search term(s) 361*e1272c08SAndreas Gohr * @param callable|null $func comparison function 362*e1272c08SAndreas Gohr * @return array 363*e1272c08SAndreas Gohr * 364*e1272c08SAndreas Gohr * @deprecated 2026-04-07 use MetadataSearch::lookupKey() instead 365*e1272c08SAndreas Gohr */ 366*e1272c08SAndreas Gohr public function lookupKey($key, &$value, $func = null) 367*e1272c08SAndreas Gohr { 368*e1272c08SAndreas Gohr DebugHelper::dbgDeprecatedFunction(MetadataSearch::class . '::lookupKey()'); 369*e1272c08SAndreas Gohr return (new MetadataSearch())->lookupKey($key, $value, $func); 370*e1272c08SAndreas Gohr } 371*e1272c08SAndreas Gohr 372*e1272c08SAndreas Gohr /** 373*e1272c08SAndreas Gohr * Return a list of all indexed pages, optionally filtered by metadata key 374*e1272c08SAndreas Gohr * 375*e1272c08SAndreas Gohr * @param string|null $key metadata key name 376*e1272c08SAndreas Gohr * @return string[] 377*e1272c08SAndreas Gohr * 378*e1272c08SAndreas Gohr * @deprecated 2026-04-07 use MetadataSearch::getPages() or Indexer::getAllPages() instead 379*e1272c08SAndreas Gohr */ 380*e1272c08SAndreas Gohr public function getPages($key = null) 381*e1272c08SAndreas Gohr { 382*e1272c08SAndreas Gohr DebugHelper::dbgDeprecatedFunction(MetadataSearch::class . '::getPages()'); 383*e1272c08SAndreas Gohr return (new MetadataSearch())->getPages($key); 384*e1272c08SAndreas Gohr } 385*e1272c08SAndreas Gohr 386*e1272c08SAndreas Gohr /** 387*e1272c08SAndreas Gohr * Add metadata values for a page 388*e1272c08SAndreas Gohr * 389*e1272c08SAndreas Gohr * @param string $page page name 390*e1272c08SAndreas Gohr * @param string $key metadata key name 391*e1272c08SAndreas Gohr * @param string|string[]|null $value value(s) to add 392*e1272c08SAndreas Gohr * @return bool|string 393*e1272c08SAndreas Gohr * 394*e1272c08SAndreas Gohr * @deprecated 2026-04-07 use Collection classes directly instead 395*e1272c08SAndreas Gohr */ 396*e1272c08SAndreas Gohr public function addMetaKeys($page, $key, $value = null) 397*e1272c08SAndreas Gohr { 398*e1272c08SAndreas Gohr DebugHelper::dbgDeprecatedFunction('Collection classes'); 399*e1272c08SAndreas Gohr try { 400*e1272c08SAndreas Gohr if ($key === 'title') { 401*e1272c08SAndreas Gohr $collection = new PageTitleCollection(); 402*e1272c08SAndreas Gohr } else { 403*e1272c08SAndreas Gohr $collection = new PageMetaCollection($key); 404*e1272c08SAndreas Gohr } 405*e1272c08SAndreas Gohr $values = is_array($value) ? $value : ($value !== null && $value !== '' ? [$value] : []); 406*e1272c08SAndreas Gohr $collection->lock()->addEntity($page, $values)->unlock(); 407*e1272c08SAndreas Gohr $this->updateMetadataRegistry([$key]); 408*e1272c08SAndreas Gohr return true; 409*e1272c08SAndreas Gohr } catch (SearchException $e) { 410*e1272c08SAndreas Gohr return false; 411*e1272c08SAndreas Gohr } 412*e1272c08SAndreas Gohr } 413*e1272c08SAndreas Gohr 414*e1272c08SAndreas Gohr /** 415*e1272c08SAndreas Gohr * Rename a metadata value in the index 416*e1272c08SAndreas Gohr * 417*e1272c08SAndreas Gohr * @param string $key metadata key name 418*e1272c08SAndreas Gohr * @param string $oldvalue old value 419*e1272c08SAndreas Gohr * @param string $newvalue new value 420*e1272c08SAndreas Gohr * @return bool|string 421*e1272c08SAndreas Gohr * 422*e1272c08SAndreas Gohr * @deprecated 2026-04-07 use Collection classes directly instead 423*e1272c08SAndreas Gohr */ 424*e1272c08SAndreas Gohr public function renameMetaValue($key, $oldvalue, $newvalue) 425*e1272c08SAndreas Gohr { 426*e1272c08SAndreas Gohr DebugHelper::dbgDeprecatedFunction('Collection classes'); 427*e1272c08SAndreas Gohr try { 428*e1272c08SAndreas Gohr $collection = new PageMetaCollection($key); 429*e1272c08SAndreas Gohr $collection->lock(); 430*e1272c08SAndreas Gohr 431*e1272c08SAndreas Gohr $tokenIndex = $collection->getTokenIndex(0); 432*e1272c08SAndreas Gohr 433*e1272c08SAndreas Gohr // find old value — search() is read-only, won't create entries 434*e1272c08SAndreas Gohr $matches = $tokenIndex->search('/^' . preg_quote($oldvalue, '/') . '$/'); 435*e1272c08SAndreas Gohr if (empty($matches)) { 436*e1272c08SAndreas Gohr $collection->unlock(); 437*e1272c08SAndreas Gohr return true; 438*e1272c08SAndreas Gohr } 439*e1272c08SAndreas Gohr $oldid = array_key_first($matches); 440*e1272c08SAndreas Gohr 441*e1272c08SAndreas Gohr // check if new value already exists (read-only lookup) 442*e1272c08SAndreas Gohr $newMatches = $tokenIndex->search('/^' . preg_quote($newvalue, '/') . '$/'); 443*e1272c08SAndreas Gohr 444*e1272c08SAndreas Gohr if (!empty($newMatches)) { 445*e1272c08SAndreas Gohr // both values exist — merge frequency data from old to new 446*e1272c08SAndreas Gohr $newid = array_key_first($newMatches); 447*e1272c08SAndreas Gohr $freqIndex = $collection->getFrequencyIndex(0); 448*e1272c08SAndreas Gohr $reverseIndex = $collection->getReverseIndex(); 449*e1272c08SAndreas Gohr $oldFreqLine = $freqIndex->retrieveRow($oldid); 450*e1272c08SAndreas Gohr 451*e1272c08SAndreas Gohr if ($oldFreqLine !== '') { 452*e1272c08SAndreas Gohr $newFreqLine = $freqIndex->retrieveRow($newid); 453*e1272c08SAndreas Gohr foreach (TupleOps::parseTuples($oldFreqLine) as $entityId => $count) { 454*e1272c08SAndreas Gohr $newFreqLine = TupleOps::updateTuple($newFreqLine, $entityId, $count); 455*e1272c08SAndreas Gohr 456*e1272c08SAndreas Gohr // update reverse index: remove old token, add new 457*e1272c08SAndreas Gohr $reverseRow = $reverseIndex->retrieveRow((int)$entityId); 458*e1272c08SAndreas Gohr $keyline = explode(':', $reverseRow); 459*e1272c08SAndreas Gohr $keyline = array_diff($keyline, [(string)$oldid]); 460*e1272c08SAndreas Gohr if (!in_array((string)$newid, $keyline)) { 461*e1272c08SAndreas Gohr $keyline[] = $newid; 462*e1272c08SAndreas Gohr } 463*e1272c08SAndreas Gohr $reverseIndex->changeRow((int)$entityId, implode(':', array_filter($keyline, fn($v) => $v !== ''))); 464*e1272c08SAndreas Gohr } 465*e1272c08SAndreas Gohr $freqIndex->changeRow($oldid, ''); 466*e1272c08SAndreas Gohr $freqIndex->changeRow($newid, $newFreqLine); 467*e1272c08SAndreas Gohr } 468*e1272c08SAndreas Gohr } else { 469*e1272c08SAndreas Gohr // new value doesn't exist — simple rename 470*e1272c08SAndreas Gohr $tokenIndex->changeRow($oldid, $newvalue); 471*e1272c08SAndreas Gohr } 472*e1272c08SAndreas Gohr 473*e1272c08SAndreas Gohr $collection->unlock(); 474*e1272c08SAndreas Gohr return true; 475*e1272c08SAndreas Gohr } catch (SearchException $e) { 476*e1272c08SAndreas Gohr return false; 477*e1272c08SAndreas Gohr } 478*e1272c08SAndreas Gohr } 479*e1272c08SAndreas Gohr 480*e1272c08SAndreas Gohr /** 481*e1272c08SAndreas Gohr * Get the page ID for a page name 482*e1272c08SAndreas Gohr * 483*e1272c08SAndreas Gohr * @param string $page page name 484*e1272c08SAndreas Gohr * @return int|false 485*e1272c08SAndreas Gohr * 486*e1272c08SAndreas Gohr * @deprecated 2026-04-07 use FileIndex directly instead 487*e1272c08SAndreas Gohr */ 488*e1272c08SAndreas Gohr public function getPID($page) 489*e1272c08SAndreas Gohr { 490*e1272c08SAndreas Gohr DebugHelper::dbgDeprecatedFunction(FileIndex::class); 491*e1272c08SAndreas Gohr try { 492*e1272c08SAndreas Gohr return (new FileIndex('page', '', true))->accessCachedValue($page); 493*e1272c08SAndreas Gohr } catch (SearchException $e) { 494*e1272c08SAndreas Gohr return false; 495*e1272c08SAndreas Gohr } 496*e1272c08SAndreas Gohr } 497*e1272c08SAndreas Gohr 498*e1272c08SAndreas Gohr /** 499*e1272c08SAndreas Gohr * Find tokens in the fulltext index 500*e1272c08SAndreas Gohr * 501*e1272c08SAndreas Gohr * @param array $tokens list of words to search for 502*e1272c08SAndreas Gohr * @return array list of pages found [word => [page => count, ...]] 503*e1272c08SAndreas Gohr * 504*e1272c08SAndreas Gohr * @deprecated 2026-04-07 use CollectionSearch on PageFulltextCollection instead 505*e1272c08SAndreas Gohr */ 506*e1272c08SAndreas Gohr public function lookup(&$tokens) 507*e1272c08SAndreas Gohr { 508*e1272c08SAndreas Gohr DebugHelper::dbgDeprecatedFunction(CollectionSearch::class); 509*e1272c08SAndreas Gohr $collection = new PageFulltextCollection(); 510*e1272c08SAndreas Gohr $search = new CollectionSearch($collection); 511*e1272c08SAndreas Gohr $termMap = []; 512*e1272c08SAndreas Gohr foreach ($tokens as $token) { 513*e1272c08SAndreas Gohr try { 514*e1272c08SAndreas Gohr $term = $search->addTerm($token); 515*e1272c08SAndreas Gohr $termMap[$token] = $term; 516*e1272c08SAndreas Gohr } catch (SearchException $e) { 517*e1272c08SAndreas Gohr // skip invalid terms 518*e1272c08SAndreas Gohr } 519*e1272c08SAndreas Gohr } 520*e1272c08SAndreas Gohr 521*e1272c08SAndreas Gohr if (empty($termMap)) return []; 522*e1272c08SAndreas Gohr $search->execute(); 523*e1272c08SAndreas Gohr 524*e1272c08SAndreas Gohr $result = []; 525*e1272c08SAndreas Gohr foreach ($termMap as $word => $term) { 526*e1272c08SAndreas Gohr $freqs = $term->getEntityFrequencies(); 527*e1272c08SAndreas Gohr // filter to only existing pages 528*e1272c08SAndreas Gohr $filtered = []; 529*e1272c08SAndreas Gohr foreach ($freqs as $page => $count) { 530*e1272c08SAndreas Gohr if (page_exists($page, '', false)) { 531*e1272c08SAndreas Gohr $filtered[$page] = $count; 532*e1272c08SAndreas Gohr } 533*e1272c08SAndreas Gohr } 534*e1272c08SAndreas Gohr $result[$word] = $filtered; 535*e1272c08SAndreas Gohr } 536*e1272c08SAndreas Gohr return $result; 537*e1272c08SAndreas Gohr } 538*e1272c08SAndreas Gohr 539*e1272c08SAndreas Gohr // endregion 5406225b270SMichael Große} 541