xref: /dokuwiki/inc/Search/Collection/Term.php (revision e05998d5d6388950e9732477c1bca8f3aff6f193)
1<?php
2
3namespace dokuwiki\Search\Collection;
4
5use dokuwiki\Search\Exception\SearchException;
6use dokuwiki\Search\Tokenizer;
7
8/**
9 * Represents a term that is searched on a frequency based index
10 *
11 * A term can contain wildcards and thus may refer to various tokens of different lengths.
12 */
13class Term
14{
15
16    const WILDCARD_NONE = 0;
17    const WILDCARD_START = 1;
18    const WILDCARD_END = 2;
19
20    /** @var string the original term including wildcard chars */
21    protected $original;
22
23    /** @var string the base of the term without wildcard chars FIXME */
24    protected $base;
25
26    /** @var string the quoted term to be used in a regular expression */
27    protected $quoted;
28
29    /** @var int the length of the base term (not counting wildcards) */
30    protected $length;
31
32    /** @var int The type of wildcards */
33    protected $wildcard;
34
35    /** @var array The matching tokens for this term [length => [tokenID => tokenName, ...], ...] */
36    protected $tokens;
37
38    /** @var array The entity frequencies this term matches (aggregated over all tokens) [entity => frequency] */
39    protected $frequencies;
40
41    /**
42     * @throws SearchException
43     */
44    public function __construct($term)
45    {
46        $this->original = $term;
47        $this->base = trim($term, '*');
48        $this->quoted = preg_quote_cb($this->base);
49        $this->wildcard = self::WILDCARD_NONE;
50        $this->length = Tokenizer::tokenLength($this->base);
51
52        // handle wildcard
53        if (substr($term, 0, 1) === '*') {
54            $this->quoted = '.*' . $this->quoted;
55            $this->wildcard += self::WILDCARD_START;
56        }
57
58        if (substr($term, -1, 1) === '*') {
59            $this->quoted = $this->quoted . '.*';
60            $this->wildcard += self::WILDCARD_END;
61        }
62
63        // ignore terms that are too short, with an exception on numbers
64        if ($this->length === 0 || ($this->length < Tokenizer::getMinWordLength() && !is_numeric($term))) {
65            throw new SearchException('Too short term');
66        }
67    }
68
69    /**
70     * @return string
71     */
72    public function getOriginal()
73    {
74        return $this->original;
75    }
76
77    /**
78     * @return string
79     */
80    public function getBase()
81    {
82        return $this->base;
83    }
84
85    /**
86     * @return string
87     */
88    public function getQuoted()
89    {
90        return $this->quoted;
91    }
92
93    /**
94     * @return int
95     */
96    public function getLength()
97    {
98        return $this->length;
99    }
100
101    /**
102     * @return int
103     */
104    public function getWildcard()
105    {
106        return $this->wildcard;
107    }
108
109    /**
110     * @return array [entity => frequency, ...]
111     */
112    public function getEntityFrequencies()
113    {
114        return $this->frequencies;
115    }
116
117    /**
118     * Add found tokens IDs of a specific length
119     * @param int $length
120     * @param array $tokens [tokenID => tokenName, ...]
121     * @return void
122     * @internal
123     */
124    public function addTokens($length, $tokens)
125    {
126        $this->tokens[$length] = [];
127        foreach ($tokens as $tokenID => $tokenName) {
128            $this->tokens[$length][$tokenID] = $tokenName;
129        }
130    }
131
132    /**
133     * Return all tokens that match the given term
134     *
135     * @return string[]
136     */
137    public function getTokens()
138    {
139        return array_merge(...array_map('array_values', array_values($this->tokens)));
140    }
141
142    /**
143     * Return all token IDs of the given length
144     *
145     * @param $length
146     * @return int[]
147     */
148    public function getTokenIDsByLength($length)
149    {
150        return isset($this->tokens[$length]) ? array_keys($this->tokens[$length]) : [];
151    }
152
153    /**
154     * Mathematically add the given frequency to existing frequency for the entityID
155     *
156     * @param int $entityID
157     * @param int $frequency
158     * @return void
159     * @internal
160     */
161    public function addEntityFrequency($entityID, $frequency)
162    {
163        if (!isset($this->frequencies[$entityID])) {
164            $this->frequencies[$entityID] = 0;
165        }
166
167        $this->frequencies[$entityID] += $frequency;
168    }
169
170    /**
171     * Update the entity frequencies to use actual entity names
172     *
173     * @param array $entityMap [entityID => entityName]
174     * @return void
175     */
176    public function resolveEntities($entityMap) {
177        $resolved = [];
178        foreach ($this->frequencies as $eid => $freq) {
179            $name = $entityMap[$eid];
180            $resolved[$name] = $freq;
181        }
182        $this->frequencies = $resolved;
183    }
184}
185