1<?php 2 3namespace dokuwiki\Search\Collection; 4 5use dokuwiki\Search\Exception\SearchException; 6use dokuwiki\Search\Tokenizer; 7 8/** 9 * Represents a term that is searched on a frequency based index 10 * 11 * A term can contain wildcards and thus may refer to various tokens of different lengths. 12 */ 13class Term 14{ 15 16 const WILDCARD_NONE = 0; 17 const WILDCARD_START = 1; 18 const WILDCARD_END = 2; 19 20 /** @var string the original term including wildcard chars */ 21 protected $original; 22 23 /** @var string the base of the term without wildcard chars FIXME */ 24 protected $base; 25 26 /** @var string the quoted term to be used in a regular expression */ 27 protected $quoted; 28 29 /** @var int the length of the base term (not counting wildcards) */ 30 protected $length; 31 32 /** @var int The type of wildcards */ 33 protected $wildcard; 34 35 /** @var array The matching tokens for this term [length => [tokenID => tokenName, ...], ...] */ 36 protected $tokens; 37 38 /** @var array The entity frequencies this term matches (aggregated over all tokens) [entity => frequency] */ 39 protected $frequencies; 40 41 /** 42 * @throws SearchException 43 */ 44 public function __construct($term) 45 { 46 $this->original = $term; 47 $this->base = trim($term, '*'); 48 $this->quoted = preg_quote_cb($this->base); 49 $this->wildcard = self::WILDCARD_NONE; 50 $this->length = Tokenizer::tokenLength($this->base); 51 52 // handle wildcard 53 if (substr($term, 0, 1) === '*') { 54 $this->quoted = '.*' . $this->quoted; 55 $this->wildcard += self::WILDCARD_START; 56 } 57 58 if (substr($term, -1, 1) === '*') { 59 $this->quoted = $this->quoted . '.*'; 60 $this->wildcard += self::WILDCARD_END; 61 } 62 63 // ignore terms that are too short, with an exception on numbers 64 if ($this->length === 0 || ($this->length < Tokenizer::getMinWordLength() && !is_numeric($term))) { 65 throw new SearchException('Too short term'); 66 } 67 } 68 69 /** 70 * @return string 71 */ 72 public function getOriginal() 73 { 74 return $this->original; 75 } 76 77 /** 78 * @return string 79 */ 80 public function getBase() 81 { 82 return $this->base; 83 } 84 85 /** 86 * @return string 87 */ 88 public function getQuoted() 89 { 90 return $this->quoted; 91 } 92 93 /** 94 * @return int 95 */ 96 public function getLength() 97 { 98 return $this->length; 99 } 100 101 /** 102 * @return int 103 */ 104 public function getWildcard() 105 { 106 return $this->wildcard; 107 } 108 109 /** 110 * @return array [entity => frequency, ...] 111 */ 112 public function getEntityFrequencies() 113 { 114 return $this->frequencies; 115 } 116 117 /** 118 * Add found tokens IDs of a specific length 119 * @param int $length 120 * @param array $tokens [tokenID => tokenName, ...] 121 * @return void 122 * @internal 123 */ 124 public function addTokens($length, $tokens) 125 { 126 $this->tokens[$length] = []; 127 foreach ($tokens as $tokenID => $tokenName) { 128 $this->tokens[$length][$tokenID] = $tokenName; 129 } 130 } 131 132 /** 133 * Return all tokens that match the given term 134 * 135 * @return string[] 136 */ 137 public function getTokens() 138 { 139 return array_merge(...array_map('array_values', array_values($this->tokens))); 140 } 141 142 /** 143 * Return all token IDs of the given length 144 * 145 * @param $length 146 * @return int[] 147 */ 148 public function getTokenIDsByLength($length) 149 { 150 return isset($this->tokens[$length]) ? array_keys($this->tokens[$length]) : []; 151 } 152 153 /** 154 * Mathematically add the given frequency to existing frequency for the entityID 155 * 156 * @param int $entityID 157 * @param int $frequency 158 * @return void 159 * @internal 160 */ 161 public function addEntityFrequency($entityID, $frequency) 162 { 163 if (!isset($this->frequencies[$entityID])) { 164 $this->frequencies[$entityID] = 0; 165 } 166 167 $this->frequencies[$entityID] += $frequency; 168 } 169 170 /** 171 * Update the entity frequencies to use actual entity names 172 * 173 * @param array $entityMap [entityID => entityName] 174 * @return void 175 */ 176 public function resolveEntities($entityMap) { 177 $resolved = []; 178 foreach ($this->frequencies as $eid => $freq) { 179 $name = $entityMap[$eid]; 180 $resolved[$name] = $freq; 181 } 182 $this->frequencies = $resolved; 183 } 184} 185