1<?php 2 3namespace dokuwiki\Search\Collection; 4 5use dokuwiki\Search\Exception\SearchException; 6use dokuwiki\Search\Tokenizer; 7 8/** 9 * Represents a term that is searched on a frequency based index 10 * 11 * A term can contain wildcards and thus may refer to various tokens of different lengths. 12 * 13 * @fixme add standalone tests for this class 14 */ 15class Term 16{ 17 18 const WILDCARD_NONE = 0; 19 const WILDCARD_START = 1; 20 const WILDCARD_END = 2; 21 22 /** @var string the original term including wildcard chars */ 23 protected $original; 24 25 /** @var string the base of the term without wildcard chars FIXME */ 26 protected $base; 27 28 /** @var string the quoted term to be used in a regular expression */ 29 protected $quoted; 30 31 /** @var int the length of the base term (not counting wildcards) */ 32 protected $length; 33 34 /** @var int The type of wildcards */ 35 protected $wildcard; 36 37 /** @var array The matching tokens for this term [length => [tokenID => tokenName, ...], ...] */ 38 protected $tokens; 39 40 /** @var array The entity frequencies this term matches (aggregated over all tokens) [entity => frequency] */ 41 protected $frequencies; 42 43 /** 44 * @throws SearchException 45 */ 46 public function __construct($term) 47 { 48 $this->original = $term; 49 $this->base = trim($term, '*'); 50 $this->quoted = preg_quote_cb($this->base); 51 $this->wildcard = self::WILDCARD_NONE; 52 $this->length = Tokenizer::tokenLength($this->base); 53 54 // handle wildcard 55 if (substr($term, 0, 1) === '*') { 56 $this->quoted = '.*' . $this->quoted; 57 $this->wildcard += self::WILDCARD_START; 58 } 59 60 if (substr($term, -1, 1) === '*') { 61 $this->quoted = $this->quoted . '.*'; 62 $this->wildcard += self::WILDCARD_END; 63 } 64 65 // ignore terms that are too short, with an exception on numbers 66 if ($this->length === 0 || ($this->length < Tokenizer::getMinWordLength() && !is_numeric($term))) { 67 throw new SearchException('Too short term'); 68 } 69 } 70 71 /** 72 * @return string 73 */ 74 public function getOriginal() 75 { 76 return $this->original; 77 } 78 79 /** 80 * @return string 81 */ 82 public function getBase() 83 { 84 return $this->base; 85 } 86 87 /** 88 * @return string 89 */ 90 public function getQuoted() 91 { 92 return $this->quoted; 93 } 94 95 /** 96 * @return int 97 */ 98 public function getLength() 99 { 100 return $this->length; 101 } 102 103 /** 104 * @return int 105 */ 106 public function getWildcard() 107 { 108 return $this->wildcard; 109 } 110 111 /** 112 * @return array [entity => frequency, ...] 113 */ 114 public function getEntityFrequencies() 115 { 116 return $this->frequencies; 117 } 118 119 /** 120 * Add found tokens IDs of a specific length 121 * @param int $length 122 * @param array $tokens [tokenID => tokenName, ...] 123 * @return void 124 * @internal 125 */ 126 public function addTokens($length, $tokens) 127 { 128 $this->tokens[$length] = []; 129 foreach ($tokens as $tokenID => $tokenName) { 130 $this->tokens[$length][$tokenID] = $tokenName; 131 } 132 } 133 134 /** 135 * Return all tokens that match the given term 136 * 137 * @return string 138 */ 139 public function getTokens() 140 { 141 return array_merge(...array_map('array_values', array_values($this->tokens))); 142 } 143 144 /** 145 * Return all token IDs of the given length 146 * 147 * @param $length 148 * @return int[] 149 */ 150 public function getTokenIDsByLength($length) 151 { 152 return isset($this->tokens[$length]) ? array_keys($this->tokens[$length]) : []; 153 } 154 155 /** 156 * Mathematically add the given frequency to existing frequency for the entityID 157 * 158 * @param int $entityID 159 * @param int $frequency 160 * @return void 161 * @internal 162 */ 163 public function addEntityFrequency($entityID, $frequency) 164 { 165 if (!isset($this->frequencies[$entityID])) { 166 $this->frequencies[$entityID] = 0; 167 } 168 169 $this->frequencies[$entityID] += $frequency; 170 } 171 172 /** 173 * Update the entity frequencies to use actual entity names 174 * 175 * @param array $entityMap [entityID => entityName] 176 * @return void 177 */ 178 public function resolveEntities($entityMap) { 179 $resolved = []; 180 foreach ($this->frequencies as $eid => $freq) { 181 $name = $entityMap[$eid]; 182 $resolved[$name] = $freq; 183 } 184 $this->frequencies = $resolved; 185 } 186} 187