16225b270SMichael Große<?php 26225b270SMichael Große 36225b270SMichael Großenamespace dokuwiki\Search; 46225b270SMichael Große 5e1272c08SAndreas Gohruse dokuwiki\Debug\DebugHelper; 66225b270SMichael Großeuse dokuwiki\Extension\Event; 783b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageFulltextCollection; 883b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageMetaCollection; 983b3acccSAndreas Gohruse dokuwiki\Search\Collection\PageTitleCollection; 1015f699acSAndreas Gohruse dokuwiki\Search\Exception\IndexAccessException; 1121fbd01bSAndreas Gohruse dokuwiki\Search\Exception\IndexIntegrityException; 12a16bd548SSatoshi Saharause dokuwiki\Search\Exception\IndexLockException; 13a16bd548SSatoshi Saharause dokuwiki\Search\Exception\IndexWriteException; 1483b3acccSAndreas Gohruse dokuwiki\Search\Index\FileIndex; 1583b3acccSAndreas Gohruse dokuwiki\Search\Index\Lock; 16e1272c08SAndreas Gohruse dokuwiki\Search\Index\MemoryIndex; 174027a91aSSatoshi Sahara 184027a91aSSatoshi Sahara// Version tag used to force rebuild on upgrade 195d034a75SAndreas Gohrconst INDEXER_VERSION = 9; 206225b270SMichael Große 216225b270SMichael Große/** 22a32da6ddSSatoshi Sahara * Class DokuWiki Indexer 236225b270SMichael Große * 2483b3acccSAndreas Gohr * Manages the page search index by delegating to Collection classes. 2583b3acccSAndreas Gohr * 264027a91aSSatoshi Sahara * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 276225b270SMichael Große * @author Andreas Gohr <andi@splitbrain.org> 284027a91aSSatoshi Sahara * @author Tom N Harris <tnharris@whoopdedo.org> 296225b270SMichael Große */ 3083b3acccSAndreas Gohrclass Indexer 314027a91aSSatoshi Sahara{ 3283b3acccSAndreas Gohr /** @var callable|null Logging callback, receives a string message */ 3383b3acccSAndreas Gohr protected $logger; 346225b270SMichael Große 354027a91aSSatoshi Sahara /** 3683b3acccSAndreas Gohr * Set a logging callback 374027a91aSSatoshi Sahara * 3883b3acccSAndreas Gohr * The callback receives a single string message. Use this to integrate 3983b3acccSAndreas Gohr * with different output mechanisms (TaskRunner echo, CLI output, Logger, etc.) 4083b3acccSAndreas Gohr * 4183b3acccSAndreas Gohr * @param callable $logger 4283b3acccSAndreas Gohr * @return static 434027a91aSSatoshi Sahara */ 4483b3acccSAndreas Gohr public function setLogger(callable $logger): static 454027a91aSSatoshi Sahara { 4683b3acccSAndreas Gohr $this->logger = $logger; 4783b3acccSAndreas Gohr return $this; 486225b270SMichael Große } 496225b270SMichael Große 506225b270SMichael Große /** 5183b3acccSAndreas Gohr * Send a message to the registered logger 526225b270SMichael Große * 5383b3acccSAndreas Gohr * @param string $message 546225b270SMichael Große */ 5583b3acccSAndreas Gohr protected function log(string $message): void 564027a91aSSatoshi Sahara { 5783b3acccSAndreas Gohr if ($this->logger)($this->logger)($message); 586225b270SMichael Große } 596225b270SMichael Große 606225b270SMichael Große /** 614027a91aSSatoshi Sahara * Version of the indexer taking into consideration the external tokenizer. 624027a91aSSatoshi Sahara * The indexer is only compatible with data written by the same version. 636225b270SMichael Große * 644027a91aSSatoshi Sahara * @triggers INDEXER_VERSION_GET 654027a91aSSatoshi Sahara * Plugins that modify what gets indexed should hook this event and 664027a91aSSatoshi Sahara * add their version info to the event data like so: 674027a91aSSatoshi Sahara * $data[$plugin_name] = $plugin_version; 686225b270SMichael Große * 694027a91aSSatoshi Sahara * @return int|string 706225b270SMichael Große */ 719369b4a9SAndreas Gohr public function getVersion(): int|string 724027a91aSSatoshi Sahara { 734027a91aSSatoshi Sahara static $indexer_version = null; 744027a91aSSatoshi Sahara if ($indexer_version == null) { 754027a91aSSatoshi Sahara $version = INDEXER_VERSION; 764027a91aSSatoshi Sahara 7783b3acccSAndreas Gohr $data = ['dokuwiki' => $version]; 784027a91aSSatoshi Sahara Event::createAndTrigger('INDEXER_VERSION_GET', $data, null, false); 794027a91aSSatoshi Sahara unset($data['dokuwiki']); // this needs to be first 804027a91aSSatoshi Sahara ksort($data); 814027a91aSSatoshi Sahara foreach ($data as $plugin => $vers) { 824027a91aSSatoshi Sahara $version .= '+' . $plugin . '=' . $vers; 834027a91aSSatoshi Sahara } 844027a91aSSatoshi Sahara $indexer_version = $version; 854027a91aSSatoshi Sahara } 864027a91aSSatoshi Sahara return $indexer_version; 876225b270SMichael Große } 886225b270SMichael Große 894027a91aSSatoshi Sahara /** 9083b3acccSAndreas Gohr * Return a list of all indexed pages 9183b3acccSAndreas Gohr * 9283b3acccSAndreas Gohr * @param bool $existsFilter only return pages that exist on disk 9383b3acccSAndreas Gohr * @return string[] list of page names (keys are the RIDs in the page index) 9483b3acccSAndreas Gohr */ 9583b3acccSAndreas Gohr public function getAllPages(bool $existsFilter = false): array 9683b3acccSAndreas Gohr { 979369b4a9SAndreas Gohr $pageIndex = new MemoryIndex('page'); 9883b3acccSAndreas Gohr return array_filter( 9983b3acccSAndreas Gohr iterator_to_array($pageIndex), 10083b3acccSAndreas Gohr static fn($v) => $v !== '' && (!$existsFilter || page_exists($v, '', false)) 10183b3acccSAndreas Gohr ); 10283b3acccSAndreas Gohr } 10383b3acccSAndreas Gohr 10483b3acccSAndreas Gohr /** 10583b3acccSAndreas Gohr * Check if a page needs (re-)indexing 10683b3acccSAndreas Gohr * 10783b3acccSAndreas Gohr * @param string $page 10883b3acccSAndreas Gohr * @param bool $force 10983b3acccSAndreas Gohr * @return bool true if indexing is needed 11083b3acccSAndreas Gohr */ 11183b3acccSAndreas Gohr public function needsIndexing(string $page, bool $force = false): bool 11283b3acccSAndreas Gohr { 11383b3acccSAndreas Gohr $idxtag = metaFN($page, '.indexed'); 11483b3acccSAndreas Gohr if ($force || !file_exists($idxtag)) return true; 11583b3acccSAndreas Gohr 11683b3acccSAndreas Gohr if (trim(io_readFile($idxtag)) != $this->getVersion()) return true; 11783b3acccSAndreas Gohr 11879dae64dSAndreas Gohr // the index tag is written when the page is indexed; the page only needs 11979dae64dSAndreas Gohr // (re-)indexing if it was changed *after* that - an equal mtime means it was 12079dae64dSAndreas Gohr // saved and indexed within the same second and is therefore up to date 12183b3acccSAndreas Gohr $last = @filemtime($idxtag); 12279dae64dSAndreas Gohr return $last < @filemtime(wikiFN($page)); 12383b3acccSAndreas Gohr } 12483b3acccSAndreas Gohr 12583b3acccSAndreas Gohr /** 12683b3acccSAndreas Gohr * Add/update the search index for a page 1274027a91aSSatoshi Sahara * 1284027a91aSSatoshi Sahara * Locking is handled internally. 1294027a91aSSatoshi Sahara * 13083b3acccSAndreas Gohr * @param string $page The page to index 1314027a91aSSatoshi Sahara * @param bool $force force reindexing even when the index is up to date 1324027a91aSSatoshi Sahara * 133*2cda0166SAndreas Gohr * @return bool true if the page was indexed, false if there was nothing to do 134a32da6ddSSatoshi Sahara * @throws IndexAccessException 135a16bd548SSatoshi Sahara * @throws IndexLockException 136a16bd548SSatoshi Sahara * @throws IndexWriteException 1374027a91aSSatoshi Sahara */ 138*2cda0166SAndreas Gohr public function addPage(string $page, bool $force = false): bool 1394027a91aSSatoshi Sahara { 14083b3acccSAndreas Gohr if (!$this->needsIndexing($page, $force)) { 1419369b4a9SAndreas Gohr $this->log("Indexer: index for $page up to date"); 142*2cda0166SAndreas Gohr return false; 143a32da6ddSSatoshi Sahara } 144a32da6ddSSatoshi Sahara 14583b3acccSAndreas Gohr // create shared writable page index early so we can resolve the PID for plugins 14683b3acccSAndreas Gohr $pageIndex = new FileIndex('page', '', true); 1476225b270SMichael Große 14883b3acccSAndreas Gohr // prepare event data 14983b3acccSAndreas Gohr $data = [ 15083b3acccSAndreas Gohr 'page' => $page, 15183b3acccSAndreas Gohr 'body' => '', 15283b3acccSAndreas Gohr 'metadata' => [ 15383b3acccSAndreas Gohr 'title' => p_get_metadata($page, 'title', METADATA_RENDER_UNLIMITED), 15483b3acccSAndreas Gohr 'relation_references' => array_keys( 15583b3acccSAndreas Gohr p_get_metadata($page, 'relation references', METADATA_RENDER_UNLIMITED) ?? [] 15683b3acccSAndreas Gohr ), 15783b3acccSAndreas Gohr 'relation_media' => array_keys( 15883b3acccSAndreas Gohr p_get_metadata($page, 'relation media', METADATA_RENDER_UNLIMITED) ?? [] 15983b3acccSAndreas Gohr ), 16083b3acccSAndreas Gohr 'internal_index' => p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED) !== false, 16183b3acccSAndreas Gohr ], 16283b3acccSAndreas Gohr 'pid' => $pageIndex->accessCachedValue($page), 16383b3acccSAndreas Gohr ]; 1646225b270SMichael Große 16583b3acccSAndreas Gohr // let plugins modify the data 1664027a91aSSatoshi Sahara $event = new Event('INDEXER_PAGE_ADD', $data); 16783b3acccSAndreas Gohr if ($event->advise_before()) { 16883b3acccSAndreas Gohr $data['body'] = $data['body'] . ' ' . rawWiki($data['page']); 16983b3acccSAndreas Gohr } 1704027a91aSSatoshi Sahara $event->advise_after(); 1714027a91aSSatoshi Sahara unset($event); 1726225b270SMichael Große 17383b3acccSAndreas Gohr // index title 17483b3acccSAndreas Gohr (new PageTitleCollection($pageIndex))->lock() 17583b3acccSAndreas Gohr ->addEntity($data['page'], [$data['metadata']['title']])->unlock(); 17683b3acccSAndreas Gohr unset($data['metadata']['title']); 1776225b270SMichael Große 17883b3acccSAndreas Gohr // index fulltext 17983b3acccSAndreas Gohr if ($data['metadata']['internal_index']) { 18083b3acccSAndreas Gohr $words = Tokenizer::getWords($data['body']); 18183b3acccSAndreas Gohr (new PageFulltextCollection($pageIndex))->lock()->addEntity($data['page'], $words)->unlock(); 1826225b270SMichael Große } else { 18383b3acccSAndreas Gohr $this->log("Indexer: full text indexing disabled for {$data['page']}"); 18483b3acccSAndreas Gohr // clear any previously stored fulltext data 18583b3acccSAndreas Gohr (new PageFulltextCollection($pageIndex))->lock()->addEntity($data['page'], [])->unlock(); 1866225b270SMichael Große } 18783b3acccSAndreas Gohr unset($data['metadata']['internal_index']); 18883b3acccSAndreas Gohr 18983b3acccSAndreas Gohr // index metadata keys 19083b3acccSAndreas Gohr foreach ($data['metadata'] as $key => $values) { 19183b3acccSAndreas Gohr if (!is_array($values)) { 19283b3acccSAndreas Gohr $values = ($values !== null && $values !== '') ? [$values] : []; 1936225b270SMichael Große } 19483b3acccSAndreas Gohr (new PageMetaCollection($key, $pageIndex))->lock()->addEntity($data['page'], $values)->unlock(); 19583b3acccSAndreas Gohr } 19683b3acccSAndreas Gohr 19783b3acccSAndreas Gohr // update metadata registry 19883b3acccSAndreas Gohr $this->updateMetadataRegistry(array_keys($data['metadata'])); 1996225b270SMichael Große 2004027a91aSSatoshi Sahara // update index tag file 20183b3acccSAndreas Gohr io_saveFile(metaFN($data['page'], '.indexed'), $this->getVersion()); 20283b3acccSAndreas Gohr $this->log("Indexer: finished indexing {$data['page']}"); 203*2cda0166SAndreas Gohr return true; 2046225b270SMichael Große } 2056225b270SMichael Große 2066225b270SMichael Große /** 2075f9bd525SSatoshi Sahara * Remove a page from the index 2086225b270SMichael Große * 20983b3acccSAndreas Gohr * Clears the page's data from all collections. The entity persists in page.idx. 2106225b270SMichael Große * 21183b3acccSAndreas Gohr * @param string $page The page to remove 21283b3acccSAndreas Gohr * @param bool $force force deletion even when no .indexed tag exists 2136225b270SMichael Große * 214*2cda0166SAndreas Gohr * @return bool true if the page was removed, false if there was nothing to do 215a32da6ddSSatoshi Sahara * @throws IndexAccessException 216a16bd548SSatoshi Sahara * @throws IndexLockException 217a16bd548SSatoshi Sahara * @throws IndexWriteException 2186225b270SMichael Große */ 219*2cda0166SAndreas Gohr public function deletePage(string $page, bool $force = false): bool 2204027a91aSSatoshi Sahara { 2214027a91aSSatoshi Sahara $idxtag = metaFN($page, '.indexed'); 2224027a91aSSatoshi Sahara if (!$force && !file_exists($idxtag)) { 2239369b4a9SAndreas Gohr $this->log("Indexer: $page.indexed file does not exist, ignoring"); 224*2cda0166SAndreas Gohr return false; 2254027a91aSSatoshi Sahara } 2266225b270SMichael Große 22783b3acccSAndreas Gohr $pageIndex = new FileIndex('page', '', true); 228725e8e5fSSatoshi Sahara 22983b3acccSAndreas Gohr (new PageTitleCollection($pageIndex))->lock()->addEntity($page, [])->unlock(); 23083b3acccSAndreas Gohr (new PageFulltextCollection($pageIndex))->lock()->addEntity($page, [])->unlock(); 23183b3acccSAndreas Gohr 23283b3acccSAndreas Gohr foreach ($this->getMetadataRegistryKeys() as $key) { 23383b3acccSAndreas Gohr (new PageMetaCollection($key, $pageIndex))->lock()->addEntity($page, [])->unlock(); 2344027a91aSSatoshi Sahara } 2356225b270SMichael Große 2369369b4a9SAndreas Gohr $this->log("Indexer: deleted $page from index"); 2374027a91aSSatoshi Sahara @unlink($idxtag); 238*2cda0166SAndreas Gohr return true; 2394027a91aSSatoshi Sahara } 2404027a91aSSatoshi Sahara 2414027a91aSSatoshi Sahara /** 24283b3acccSAndreas Gohr * Rename a page in the search index 24383b3acccSAndreas Gohr * 2442ff7e61cSAndreas Gohr * This renames the page's entity entry in place: its entity ID (the row in the 2452ff7e61cSAndreas Gohr * page index) is kept and only its name is changed. Because every collection 2462ff7e61cSAndreas Gohr * (title, fulltext and all metadata keys such as relation_references) is keyed by 2472ff7e61cSAndreas Gohr * that entity ID, all token, frequency and reverse associations are preserved and 2482ff7e61cSAndreas Gohr * transparently belong to the new name afterwards. 2492ff7e61cSAndreas Gohr * 2502ff7e61cSAndreas Gohr * In particular this keeps the renamed page's *outgoing* references intact. That is 2512ff7e61cSAndreas Gohr * essential during multi-step operations such as namespace moves: a page renamed 2522ff7e61cSAndreas Gohr * early on must still be discoverable as a backlink source for pages that are moved 2532ff7e61cSAndreas Gohr * later. Re-indexing from disk instead would lose this, because the destination page 2542ff7e61cSAndreas Gohr * has usually not been written to disk yet when this method is called. 2554027a91aSSatoshi Sahara * 2564027a91aSSatoshi Sahara * @param string $oldpage The old page name 2574027a91aSSatoshi Sahara * @param string $newpage The new page name 25883b3acccSAndreas Gohr * 259*2cda0166SAndreas Gohr * @return bool true if the page was renamed, false if there was nothing to do 26083b3acccSAndreas Gohr * @throws IndexAccessException 261a16bd548SSatoshi Sahara * @throws IndexLockException 262a16bd548SSatoshi Sahara * @throws IndexWriteException 2634027a91aSSatoshi Sahara */ 264*2cda0166SAndreas Gohr public function renamePage(string $oldpage, string $newpage): bool 2654027a91aSSatoshi Sahara { 266*2cda0166SAndreas Gohr if ($oldpage === $newpage) return false; 2672ff7e61cSAndreas Gohr 2682ff7e61cSAndreas Gohr $pageIndex = new FileIndex('page', '', true); 2692ff7e61cSAndreas Gohr 2702ff7e61cSAndreas Gohr // locate the existing entity rows; stop as soon as both are known 2712ff7e61cSAndreas Gohr $oldId = null; 2722ff7e61cSAndreas Gohr $newId = null; 2732ff7e61cSAndreas Gohr foreach ($pageIndex as $rid => $value) { 2742ff7e61cSAndreas Gohr if ($value === $oldpage) $oldId = $rid; 2752ff7e61cSAndreas Gohr if ($value === $newpage) $newId = $rid; 2762ff7e61cSAndreas Gohr if ($oldId !== null && $newId !== null) break; 2772ff7e61cSAndreas Gohr } 2782ff7e61cSAndreas Gohr 2792ff7e61cSAndreas Gohr // nothing to rename if the old page was never indexed 2802ff7e61cSAndreas Gohr if ($oldId === null) { 2812ff7e61cSAndreas Gohr $pageIndex->unlock(); 2822ff7e61cSAndreas Gohr $this->log("Indexer: $oldpage is not in the index, nothing to rename"); 283*2cda0166SAndreas Gohr return false; 2842ff7e61cSAndreas Gohr } 2852ff7e61cSAndreas Gohr 2862ff7e61cSAndreas Gohr // If the new name already has its own entity, drop its indexed data first. 2872ff7e61cSAndreas Gohr // deletePage() intentionally keeps the entity row in page.idx, so we additionally 2882ff7e61cSAndreas Gohr // blank that row - an empty entry is the index's "removed" marker (see getAllPages()). 2892ff7e61cSAndreas Gohr // Otherwise two rows would carry the new name and a lookup could resolve to the 2902ff7e61cSAndreas Gohr // now-empty one instead of the renamed entity that holds the data. 2912ff7e61cSAndreas Gohr if ($newId !== null) { 2922ff7e61cSAndreas Gohr $this->deletePage($newpage, true); 2932ff7e61cSAndreas Gohr $pageIndex->changeRow($newId, ''); 2942ff7e61cSAndreas Gohr } 2952ff7e61cSAndreas Gohr 2962ff7e61cSAndreas Gohr // rename in place — keeps the entity ID and thus all index associations 2972ff7e61cSAndreas Gohr $pageIndex->changeRow($oldId, $newpage); 2982ff7e61cSAndreas Gohr 2992ff7e61cSAndreas Gohr $pageIndex->unlock(); 3002ff7e61cSAndreas Gohr $this->log("Indexer: renamed $oldpage to $newpage in index"); 301*2cda0166SAndreas Gohr return true; 3026225b270SMichael Große } 3036225b270SMichael Große 3046225b270SMichael Große /** 30583b3acccSAndreas Gohr * Clear all page indexes 3066225b270SMichael Große */ 30783b3acccSAndreas Gohr public function clear(): void 3084027a91aSSatoshi Sahara { 3096225b270SMichael Große global $conf; 3106225b270SMichael Große 31183b3acccSAndreas Gohr Lock::acquire('page'); 3124027a91aSSatoshi Sahara 31383b3acccSAndreas Gohr // clear metadata indexes 31483b3acccSAndreas Gohr foreach ($this->getMetadataRegistryKeys() as $key) { 31583b3acccSAndreas Gohr $clean = PageMetaCollection::cleanName($key); 31683b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/' . $clean . '_w.idx'); 31783b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/' . $clean . '_i.idx'); 31883b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/' . $clean . '_p.idx'); 3196225b270SMichael Große } 3206225b270SMichael Große 32183b3acccSAndreas Gohr // clear fulltext indexes 32283b3acccSAndreas Gohr $files = glob($conf['indexdir'] . '/i*.idx'); 32383b3acccSAndreas Gohr if ($files) foreach ($files as $f) @unlink($f); 32483b3acccSAndreas Gohr $files = glob($conf['indexdir'] . '/w*.idx'); 32583b3acccSAndreas Gohr if ($files) foreach ($files as $f) @unlink($f); 32683b3acccSAndreas Gohr 32783b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/pageword.idx'); 32883b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/lengths.idx'); 32983b3acccSAndreas Gohr 33083b3acccSAndreas Gohr // clear title and page indexes 33183b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/title.idx'); 33283b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/page.idx'); 33383b3acccSAndreas Gohr @unlink($conf['indexdir'] . '/metadata.idx'); 33483b3acccSAndreas Gohr 33583b3acccSAndreas Gohr Lock::release('page'); 33683b3acccSAndreas Gohr } 33783b3acccSAndreas Gohr 33883b3acccSAndreas Gohr /** 33921fbd01bSAndreas Gohr * Check the structural integrity of all search indexes 34021fbd01bSAndreas Gohr * 34121fbd01bSAndreas Gohr * @throws IndexIntegrityException when a structural inconsistency is found 34221fbd01bSAndreas Gohr */ 34321fbd01bSAndreas Gohr public function checkIntegrity(): void 34421fbd01bSAndreas Gohr { 34521fbd01bSAndreas Gohr (new PageFulltextCollection())->checkIntegrity(); 34621fbd01bSAndreas Gohr (new PageTitleCollection())->checkIntegrity(); 34721fbd01bSAndreas Gohr 34821fbd01bSAndreas Gohr foreach ($this->getMetadataRegistryKeys() as $key) { 34921fbd01bSAndreas Gohr (new PageMetaCollection($key))->checkIntegrity(); 35021fbd01bSAndreas Gohr } 35121fbd01bSAndreas Gohr } 35221fbd01bSAndreas Gohr 35321fbd01bSAndreas Gohr /** 35421fbd01bSAndreas Gohr * Whether the search index is empty (no fulltext data indexed yet) 35521fbd01bSAndreas Gohr * 35621fbd01bSAndreas Gohr * @return bool 35721fbd01bSAndreas Gohr */ 35821fbd01bSAndreas Gohr public function isIndexEmpty(): bool 35921fbd01bSAndreas Gohr { 36021fbd01bSAndreas Gohr return (new PageFulltextCollection())->getTokenIndexMaximum() === 0; 36121fbd01bSAndreas Gohr } 36221fbd01bSAndreas Gohr 36321fbd01bSAndreas Gohr /** 36483b3acccSAndreas Gohr * Get the list of known metadata keys from the metadata registry 36583b3acccSAndreas Gohr * 36683b3acccSAndreas Gohr * @return string[] list of metadata key names 36783b3acccSAndreas Gohr */ 36883b3acccSAndreas Gohr protected function getMetadataRegistryKeys(): array 36983b3acccSAndreas Gohr { 37083b3acccSAndreas Gohr global $conf; 37183b3acccSAndreas Gohr $fn = $conf['indexdir'] . '/metadata.idx'; 37283b3acccSAndreas Gohr if (!file_exists($fn)) return []; 37383b3acccSAndreas Gohr $keys = file($fn, FILE_IGNORE_NEW_LINES); 37483b3acccSAndreas Gohr return $keys ?: []; 37583b3acccSAndreas Gohr } 37683b3acccSAndreas Gohr 37783b3acccSAndreas Gohr /** 37883b3acccSAndreas Gohr * Update the metadata registry with new keys 37983b3acccSAndreas Gohr * 38083b3acccSAndreas Gohr * @param string[] $keys metadata key names to ensure are registered 3816e39b4e3SAndreas Gohr * 3826e39b4e3SAndreas Gohr * @internal Only marked public for access via LegacyIndexer 38383b3acccSAndreas Gohr */ 3846e39b4e3SAndreas Gohr public function updateMetadataRegistry(array $keys): void 38583b3acccSAndreas Gohr { 38683b3acccSAndreas Gohr global $conf; 38783b3acccSAndreas Gohr $fn = $conf['indexdir'] . '/metadata.idx'; 38883b3acccSAndreas Gohr $existing = file_exists($fn) ? file($fn, FILE_IGNORE_NEW_LINES) : []; 38983b3acccSAndreas Gohr if (!$existing) $existing = []; 39083b3acccSAndreas Gohr 39183b3acccSAndreas Gohr $added = false; 39283b3acccSAndreas Gohr foreach ($keys as $key) { 39383b3acccSAndreas Gohr if (!in_array($key, $existing)) { 39483b3acccSAndreas Gohr $existing[] = $key; 39583b3acccSAndreas Gohr $added = true; 39683b3acccSAndreas Gohr } 39783b3acccSAndreas Gohr } 39883b3acccSAndreas Gohr 39983b3acccSAndreas Gohr if ($added) { 40083b3acccSAndreas Gohr io_saveFile($fn, implode("\n", $existing) . "\n"); 40183b3acccSAndreas Gohr } 40283b3acccSAndreas Gohr } 403e1272c08SAndreas Gohr 404e1272c08SAndreas Gohr /** 405e1272c08SAndreas Gohr * Return a list of all indexed pages, optionally filtered by metadata key 406e1272c08SAndreas Gohr * 4076e39b4e3SAndreas Gohr * Kept on Indexer (not just LegacyIndexer) because several plugins call it 4086e39b4e3SAndreas Gohr * directly on `new Indexer()` instances rather than going through 4096e39b4e3SAndreas Gohr * idx_get_indexer(). 4106e39b4e3SAndreas Gohr * 411e1272c08SAndreas Gohr * @param string|null $key metadata key name 412e1272c08SAndreas Gohr * @return string[] 413e1272c08SAndreas Gohr * 414e1272c08SAndreas Gohr * @deprecated 2026-04-07 use MetadataSearch::getPages() or Indexer::getAllPages() instead 415e1272c08SAndreas Gohr */ 416e1272c08SAndreas Gohr public function getPages($key = null) 417e1272c08SAndreas Gohr { 418e1272c08SAndreas Gohr DebugHelper::dbgDeprecatedFunction(MetadataSearch::class . '::getPages()'); 419e1272c08SAndreas Gohr return (new MetadataSearch())->getPages($key); 420e1272c08SAndreas Gohr } 4216225b270SMichael Große} 422