1<?php 2 3namespace dokuwiki\Search\Collection; 4 5use dokuwiki\Search\Exception\SearchException; 6use dokuwiki\Search\Tokenizer; 7 8/** 9 * Represents a term that is searched on a frequency based index 10 * 11 * A term can contain wildcards and thus may refer to various tokens of different lengths. 12 */ 13class Term 14{ 15 16 const WILDCARD_NONE = 0; 17 const WILDCARD_START = 1; 18 const WILDCARD_END = 2; 19 20 /** @var string the original term including wildcard chars */ 21 protected string $original; 22 23 /** @var string the base of the term without wildcard chars */ 24 protected string $base; 25 26 /** @var string the quoted term to be used in a regular expression */ 27 protected string $quoted; 28 29 /** @var int the length of the base term (not counting wildcards) */ 30 protected int $length; 31 32 /** @var int The type of wildcards */ 33 protected int $wildcard; 34 35 /** @var array<int, array<int, string>> The matching tokens for this term, keyed by group then token ID */ 36 protected array $tokens = []; 37 38 /** @var array<int|string, int> The entity frequencies this term matches (aggregated over all tokens), keyed by entity ID or name */ 39 protected array $frequencies = []; 40 41 /** 42 * @param string $term 43 * @throws SearchException 44 */ 45 public function __construct(string $term) 46 { 47 $this->original = $term; 48 $this->base = trim($term, '*'); 49 $this->quoted = preg_quote_cb($this->base); 50 $this->wildcard = self::WILDCARD_NONE; 51 $this->length = Tokenizer::tokenLength($this->base); 52 53 // handle wildcard 54 if (substr($term, 0, 1) === '*') { 55 $this->quoted = '.*' . $this->quoted; 56 $this->wildcard += self::WILDCARD_START; 57 } 58 59 if (substr($term, -1, 1) === '*') { 60 $this->quoted = $this->quoted . '.*'; 61 $this->wildcard += self::WILDCARD_END; 62 } 63 64 // ignore terms that are too short, with an exception on numbers 65 if ($this->length === 0 || ($this->length < Tokenizer::getMinWordLength() && !is_numeric($term))) { 66 throw new SearchException('Too short term'); 67 } 68 } 69 70 /** 71 * @return string 72 */ 73 public function getOriginal(): string 74 { 75 return $this->original; 76 } 77 78 /** 79 * @return string 80 */ 81 public function getBase(): string 82 { 83 return $this->base; 84 } 85 86 /** 87 * @return string 88 */ 89 public function getQuoted(): string 90 { 91 return $this->quoted; 92 } 93 94 /** 95 * @return int 96 */ 97 public function getLength(): int 98 { 99 return $this->length; 100 } 101 102 /** 103 * @return int 104 */ 105 public function getWildcard(): int 106 { 107 return $this->wildcard; 108 } 109 110 /** 111 * @return array [entity => frequency, ...] 112 */ 113 public function getEntityFrequencies(): array 114 { 115 return $this->frequencies; 116 } 117 118 /** 119 * Add found token IDs for a specific index group 120 * 121 * @param int $group Index group (length for split collections, 0 for non-split) 122 * @param array $tokens [tokenID => tokenName, ...] 123 * @return void 124 * @internal 125 */ 126 public function addTokens(int $group, array $tokens): void 127 { 128 $this->tokens[$group] = []; 129 foreach ($tokens as $tokenID => $tokenName) { 130 $this->tokens[$group][$tokenID] = $tokenName; 131 } 132 } 133 134 /** 135 * Return all tokens that match the given term 136 * 137 * @return string[] 138 */ 139 public function getTokens(): array 140 { 141 if (empty($this->tokens)) return []; 142 return array_merge(...array_map('array_values', array_values($this->tokens))); 143 } 144 145 /** 146 * Return all token IDs for a specific index group 147 * 148 * @param int $group Index group (length for split collections, 0 for non-split) 149 * @return int[] 150 */ 151 public function getTokenIDsByGroup(int $group): array 152 { 153 return isset($this->tokens[$group]) ? array_keys($this->tokens[$group]) : []; 154 } 155 156 /** 157 * Mathematically add the given frequency to existing frequency for the entityID 158 * 159 * @param int $entityID 160 * @param int $frequency 161 * @return void 162 * @internal 163 */ 164 public function addEntityFrequency(int $entityID, int $frequency): void 165 { 166 if (!isset($this->frequencies[$entityID])) { 167 $this->frequencies[$entityID] = 0; 168 } 169 170 $this->frequencies[$entityID] += $frequency; 171 } 172 173 /** 174 * Update the entity frequencies to use actual entity names 175 * 176 * @param array<int, string> $entityMap [entityID => entityName] 177 * @return void 178 */ 179 public function resolveEntities(array $entityMap): void 180 { 181 $resolved = []; 182 foreach ($this->frequencies as $eid => $freq) { 183 $name = $entityMap[$eid]; 184 $resolved[$name] = $freq; 185 } 186 $this->frequencies = $resolved; 187 } 188} 189