1<?php 2 3namespace dokuwiki\Search\Collection; 4 5/** 6 * Search a collection for one or more terms with wildcards 7 * 8 * Works with any AbstractCollection (Frequency, Lookup, Direct) and handles both 9 * split-by-length and non-split index layouts transparently. 10 * 11 * Use addTerm() to register search terms (with optional wildcards), then call execute(). 12 * Set caseInsensitive() on the search or on individual terms for case-insensitive matching. 13 */ 14class CollectionSearch 15{ 16 /** @var Term[] all terms indexed by original term name */ 17 protected array $allTerms = []; 18 19 /** @var array<int, Term[]> references to terms indexed by group (length for split, 0 for non-split) */ 20 protected array $groupedTerms = []; 21 22 /** @var AbstractCollection The collection this search works on */ 23 protected AbstractCollection $collection; 24 25 /** @var ?int the maximum token index suffix as currently indexed */ 26 protected ?int $max = null; 27 28 /** @var bool default case sensitivity for new terms */ 29 protected bool $defaultCaseInsensitive = false; 30 31 /** 32 * Initialize a search on the given collection 33 * 34 * @param AbstractCollection $collection 35 */ 36 public function __construct(AbstractCollection $collection) 37 { 38 $this->collection = $collection; 39 } 40 41 /** 42 * Enable case-insensitive matching for all subsequently added terms 43 * 44 * @return static 45 */ 46 public function caseInsensitive(): static 47 { 48 $this->defaultCaseInsensitive = true; 49 return $this; 50 } 51 52 /** 53 * Add a term that will be looked up in the index later 54 * 55 * @param string $term the search term, may include * wildcards at start/end 56 * @return Term the internal representation of the term, it will not be complete before the search has been executed 57 */ 58 public function addTerm(string $term): Term 59 { 60 $term = new Term($term); 61 62 if ($this->defaultCaseInsensitive) { 63 $term->caseInsensitive(); 64 } 65 66 // we keep all terms in an array 67 $this->allTerms[$term->getOriginal()] = $term; 68 69 if ($this->collection->isSplitByLength()) { 70 // for wildcards, we need to find tokens from all indexes equal or larger than the term length 71 if ($term->getWildcard()) { 72 if ($this->max === null) { 73 $this->max = $this->collection->getTokenIndexMaximum(); 74 } 75 $max = $this->max; 76 } else { 77 $max = $term->getLength(); 78 } 79 80 for ($i = $term->getLength(); $i <= $max; $i++) { 81 $this->groupedTerms[$i][] = $term; 82 } 83 } else { 84 // non-split: all terms go into a single group 85 $this->groupedTerms[0][] = $term; 86 } 87 88 return $term; 89 } 90 91 /** 92 * Execute the search 93 * 94 * For each index group, scans the token index once testing all terms, then resolves 95 * which entities have the matched tokens (via the frequency index). After all groups 96 * are processed, entity IDs are batch-resolved to names via the entity index, and 97 * each Term is populated with the final results: entity name → token name → frequency. 98 * 99 * @return Term[] All defined terms keyed by original term string 100 */ 101 public function execute(): array 102 { 103 // Pass 1: per group, scan tokens and resolve frequencies 104 $allEntityIds = []; 105 $groupResults = []; 106 foreach ($this->groupedTerms as $group => $terms) { 107 $tokenIndex = $this->collection->getTokenIndex($group); 108 if (!$tokenIndex->exists()) continue; 109 110 // single-pass token scan for all terms in this group 111 $tokenMatches = []; // [tokenId => [{term, token}, ...]] 112 foreach ($tokenIndex as $tokenId => $tokenValue) { 113 if ($tokenValue === '') continue; 114 foreach ($terms as $term) { 115 if ($term->matches($tokenValue)) { 116 $tokenMatches[$tokenId][] = ['term' => $term, 'token' => $tokenValue]; 117 } 118 } 119 } 120 if ($tokenMatches === []) continue; 121 122 // resolve which entities have these tokens 123 $freqs = $this->collection->resolveTokenFrequencies($group, array_keys($tokenMatches)); 124 125 // collect entity IDs for batch name resolution 126 foreach ($freqs as $entityFreqs) { 127 foreach (array_keys($entityFreqs) as $entityId) { 128 $allEntityIds[$entityId] = true; 129 } 130 } 131 132 $groupResults[] = ['matches' => $tokenMatches, 'freqs' => $freqs]; 133 } 134 135 if ($allEntityIds === []) return $this->allTerms; 136 137 // Batch resolve entity IDs to names (single sequential file read) 138 $entityMap = $this->collection->getEntityIndex()->retrieveRows(array_keys($allEntityIds)); 139 140 // Pass 2: populate Terms with fully resolved data 141 foreach ($groupResults as $data) { 142 foreach ($data['freqs'] as $tokenId => $entityFreqs) { 143 foreach ($data['matches'][$tokenId] as $match) { 144 foreach ($entityFreqs as $entityId => $freq) { 145 $entityName = $entityMap[$entityId] ?? ''; 146 if ($entityName === '') continue; 147 $match['term']->addMatch($entityName, $match['token'], $freq); 148 } 149 } 150 } 151 } 152 153 return $this->allTerms; 154 } 155} 156