1*596d5287SAndreas Gohr<?php 2*596d5287SAndreas Gohr 3*596d5287SAndreas Gohrnamespace dokuwiki\Search\Collection; 4*596d5287SAndreas Gohr 5*596d5287SAndreas Gohruse dokuwiki\Search\Exception\SearchException; 6*596d5287SAndreas Gohruse dokuwiki\Search\Tokenizer; 7*596d5287SAndreas Gohr 8*596d5287SAndreas Gohr/** 9*596d5287SAndreas Gohr * Represents a term that is searched on a frequency based index 10*596d5287SAndreas Gohr * 11*596d5287SAndreas Gohr * A term can contain wildcards and thus may refer to various tokens of different lengths. 12*596d5287SAndreas Gohr * 13*596d5287SAndreas Gohr * @fixme add standalone tests for this class 14*596d5287SAndreas Gohr */ 15*596d5287SAndreas Gohrclass Term 16*596d5287SAndreas Gohr{ 17*596d5287SAndreas Gohr 18*596d5287SAndreas Gohr const WILDCARD_NONE = 0; 19*596d5287SAndreas Gohr const WILDCARD_START = 1; 20*596d5287SAndreas Gohr const WILDCARD_END = 2; 21*596d5287SAndreas Gohr 22*596d5287SAndreas Gohr /** @var string the original term including wildcard chars */ 23*596d5287SAndreas Gohr protected $original; 24*596d5287SAndreas Gohr 25*596d5287SAndreas Gohr /** @var string the base of the term without wildcard chars FIXME */ 26*596d5287SAndreas Gohr protected $base; 27*596d5287SAndreas Gohr 28*596d5287SAndreas Gohr /** @var string the quoted term to be used in a regular expression */ 29*596d5287SAndreas Gohr protected $quoted; 30*596d5287SAndreas Gohr 31*596d5287SAndreas Gohr /** @var int the length of the base term (not counting wildcards) */ 32*596d5287SAndreas Gohr protected $length; 33*596d5287SAndreas Gohr 34*596d5287SAndreas Gohr /** @var int The type of wildcards */ 35*596d5287SAndreas Gohr protected $wildcard; 36*596d5287SAndreas Gohr 37*596d5287SAndreas Gohr /** @var array The matching tokens for this term [length => [tokenID => tokenName, ...], ...] */ 38*596d5287SAndreas Gohr protected $tokens; 39*596d5287SAndreas Gohr 40*596d5287SAndreas Gohr /** @var array The entity frequencies this term matches (aggregated over all tokens) [entity => frequency] */ 41*596d5287SAndreas Gohr protected $frequencies; 42*596d5287SAndreas Gohr 43*596d5287SAndreas Gohr /** 44*596d5287SAndreas Gohr * @throws SearchException 45*596d5287SAndreas Gohr */ 46*596d5287SAndreas Gohr public function __construct($term) 47*596d5287SAndreas Gohr { 48*596d5287SAndreas Gohr $this->original = $term; 49*596d5287SAndreas Gohr $this->base = trim($term, '*'); 50*596d5287SAndreas Gohr $this->quoted = preg_quote_cb($this->base); 51*596d5287SAndreas Gohr $this->wildcard = self::WILDCARD_NONE; 52*596d5287SAndreas Gohr $this->length = Tokenizer::tokenLength($this->base); 53*596d5287SAndreas Gohr 54*596d5287SAndreas Gohr // handle wildcard 55*596d5287SAndreas Gohr if (substr($term, 0, 1) === '*') { 56*596d5287SAndreas Gohr $this->quoted = '.*' . $this->quoted; 57*596d5287SAndreas Gohr $this->wildcard += self::WILDCARD_START; 58*596d5287SAndreas Gohr } 59*596d5287SAndreas Gohr 60*596d5287SAndreas Gohr if (substr($term, -1, 1) === '*') { 61*596d5287SAndreas Gohr $this->quoted = $this->quoted . '.*'; 62*596d5287SAndreas Gohr $this->wildcard += self::WILDCARD_END; 63*596d5287SAndreas Gohr } 64*596d5287SAndreas Gohr 65*596d5287SAndreas Gohr // ignore terms that are too short, with an exception on numbers 66*596d5287SAndreas Gohr if ($this->length === 0 || ($this->length < Tokenizer::getMinWordLength() && !is_numeric($term))) { 67*596d5287SAndreas Gohr throw new SearchException('Too short term'); 68*596d5287SAndreas Gohr } 69*596d5287SAndreas Gohr } 70*596d5287SAndreas Gohr 71*596d5287SAndreas Gohr /** 72*596d5287SAndreas Gohr * @return string 73*596d5287SAndreas Gohr */ 74*596d5287SAndreas Gohr public function getOriginal() 75*596d5287SAndreas Gohr { 76*596d5287SAndreas Gohr return $this->original; 77*596d5287SAndreas Gohr } 78*596d5287SAndreas Gohr 79*596d5287SAndreas Gohr /** 80*596d5287SAndreas Gohr * @return string 81*596d5287SAndreas Gohr */ 82*596d5287SAndreas Gohr public function getBase() 83*596d5287SAndreas Gohr { 84*596d5287SAndreas Gohr return $this->base; 85*596d5287SAndreas Gohr } 86*596d5287SAndreas Gohr 87*596d5287SAndreas Gohr /** 88*596d5287SAndreas Gohr * @return string 89*596d5287SAndreas Gohr */ 90*596d5287SAndreas Gohr public function getQuoted() 91*596d5287SAndreas Gohr { 92*596d5287SAndreas Gohr return $this->quoted; 93*596d5287SAndreas Gohr } 94*596d5287SAndreas Gohr 95*596d5287SAndreas Gohr /** 96*596d5287SAndreas Gohr * @return int 97*596d5287SAndreas Gohr */ 98*596d5287SAndreas Gohr public function getLength() 99*596d5287SAndreas Gohr { 100*596d5287SAndreas Gohr return $this->length; 101*596d5287SAndreas Gohr } 102*596d5287SAndreas Gohr 103*596d5287SAndreas Gohr /** 104*596d5287SAndreas Gohr * @return int 105*596d5287SAndreas Gohr */ 106*596d5287SAndreas Gohr public function getWildcard() 107*596d5287SAndreas Gohr { 108*596d5287SAndreas Gohr return $this->wildcard; 109*596d5287SAndreas Gohr } 110*596d5287SAndreas Gohr 111*596d5287SAndreas Gohr /** 112*596d5287SAndreas Gohr * @return array [entity => frequency, ...] 113*596d5287SAndreas Gohr */ 114*596d5287SAndreas Gohr public function getEntityFrequencies() 115*596d5287SAndreas Gohr { 116*596d5287SAndreas Gohr return $this->frequencies; 117*596d5287SAndreas Gohr } 118*596d5287SAndreas Gohr 119*596d5287SAndreas Gohr /** 120*596d5287SAndreas Gohr * Add found tokens IDs of a specific length 121*596d5287SAndreas Gohr * @param int $length 122*596d5287SAndreas Gohr * @param array $tokens [tokenID => tokenName, ...] 123*596d5287SAndreas Gohr * @return void 124*596d5287SAndreas Gohr * @internal 125*596d5287SAndreas Gohr */ 126*596d5287SAndreas Gohr public function addTokens($length, $tokens) 127*596d5287SAndreas Gohr { 128*596d5287SAndreas Gohr $this->tokens[$length] = []; 129*596d5287SAndreas Gohr foreach ($tokens as $tokenID => $tokenName) { 130*596d5287SAndreas Gohr $this->tokens[$length][$tokenID] = $tokenName; 131*596d5287SAndreas Gohr } 132*596d5287SAndreas Gohr } 133*596d5287SAndreas Gohr 134*596d5287SAndreas Gohr /** 135*596d5287SAndreas Gohr * Return all tokens that match the given term 136*596d5287SAndreas Gohr * 137*596d5287SAndreas Gohr * @return string 138*596d5287SAndreas Gohr */ 139*596d5287SAndreas Gohr public function getTokens() 140*596d5287SAndreas Gohr { 141*596d5287SAndreas Gohr return array_merge(...array_map('array_values', array_values($this->tokens))); 142*596d5287SAndreas Gohr } 143*596d5287SAndreas Gohr 144*596d5287SAndreas Gohr /** 145*596d5287SAndreas Gohr * Return all token IDs of the given length 146*596d5287SAndreas Gohr * 147*596d5287SAndreas Gohr * @param $length 148*596d5287SAndreas Gohr * @return int[] 149*596d5287SAndreas Gohr */ 150*596d5287SAndreas Gohr public function getTokenIDsByLength($length) 151*596d5287SAndreas Gohr { 152*596d5287SAndreas Gohr return isset($this->tokens[$length]) ? array_keys($this->tokens[$length]) : []; 153*596d5287SAndreas Gohr } 154*596d5287SAndreas Gohr 155*596d5287SAndreas Gohr /** 156*596d5287SAndreas Gohr * Mathematically add the given frequency to existing frequency for the entityID 157*596d5287SAndreas Gohr * 158*596d5287SAndreas Gohr * @param int $entityID 159*596d5287SAndreas Gohr * @param int $frequency 160*596d5287SAndreas Gohr * @return void 161*596d5287SAndreas Gohr * @internal 162*596d5287SAndreas Gohr */ 163*596d5287SAndreas Gohr public function addEntityFrequency($entityID, $frequency) 164*596d5287SAndreas Gohr { 165*596d5287SAndreas Gohr if (!isset($this->frequencies[$entityID])) { 166*596d5287SAndreas Gohr $this->frequencies[$entityID] = 0; 167*596d5287SAndreas Gohr } 168*596d5287SAndreas Gohr 169*596d5287SAndreas Gohr $this->frequencies[$entityID] += $frequency; 170*596d5287SAndreas Gohr } 171*596d5287SAndreas Gohr 172*596d5287SAndreas Gohr /** 173*596d5287SAndreas Gohr * Update the entity frequencies to use actual entity names 174*596d5287SAndreas Gohr * 175*596d5287SAndreas Gohr * @param array $entityMap [entityID => entityName] 176*596d5287SAndreas Gohr * @return void 177*596d5287SAndreas Gohr */ 178*596d5287SAndreas Gohr public function resolveEntities($entityMap) { 179*596d5287SAndreas Gohr $resolved = []; 180*596d5287SAndreas Gohr foreach ($this->frequencies as $eid => $freq) { 181*596d5287SAndreas Gohr $name = $entityMap[$eid]; 182*596d5287SAndreas Gohr $resolved[$name] = $freq; 183*596d5287SAndreas Gohr } 184*596d5287SAndreas Gohr $this->frequencies = $resolved; 185*596d5287SAndreas Gohr } 186*596d5287SAndreas Gohr} 187