xref: /dokuwiki/inc/Search/Collection/Term.php (revision 1db169408cb6f6c0deb6231d57207dd563f36013)
1<?php
2
3namespace dokuwiki\Search\Collection;
4
5use dokuwiki\Search\Exception\SearchException;
6use dokuwiki\Search\Tokenizer;
7
8/**
9 * Represents a term that is searched on a frequency based index
10 *
11 * A term can contain wildcards and thus may refer to various tokens of different lengths.
12 *
13 * @fixme add standalone tests for this class
14 */
15class Term
16{
17
18    const WILDCARD_NONE = 0;
19    const WILDCARD_START = 1;
20    const WILDCARD_END = 2;
21
22    /** @var string the original term including wildcard chars */
23    protected $original;
24
25    /** @var string the base of the term without wildcard chars FIXME */
26    protected $base;
27
28    /** @var string the quoted term to be used in a regular expression */
29    protected $quoted;
30
31    /** @var int the length of the base term (not counting wildcards) */
32    protected $length;
33
34    /** @var int The type of wildcards */
35    protected $wildcard;
36
37    /** @var array The matching tokens for this term [length => [tokenID => tokenName, ...], ...] */
38    protected $tokens;
39
40    /** @var array The entity frequencies this term matches (aggregated over all tokens) [entity => frequency] */
41    protected $frequencies;
42
43    /**
44     * @throws SearchException
45     */
46    public function __construct($term)
47    {
48        $this->original = $term;
49        $this->base = trim($term, '*');
50        $this->quoted = preg_quote_cb($this->base);
51        $this->wildcard = self::WILDCARD_NONE;
52        $this->length = Tokenizer::tokenLength($this->base);
53
54        // handle wildcard
55        if (substr($term, 0, 1) === '*') {
56            $this->quoted = '.*' . $this->quoted;
57            $this->wildcard += self::WILDCARD_START;
58        }
59
60        if (substr($term, -1, 1) === '*') {
61            $this->quoted = $this->quoted . '.*';
62            $this->wildcard += self::WILDCARD_END;
63        }
64
65        // ignore terms that are too short, with an exception on numbers
66        if ($this->length === 0 || ($this->length < Tokenizer::getMinWordLength() && !is_numeric($term))) {
67            throw new SearchException('Too short term');
68        }
69    }
70
71    /**
72     * @return string
73     */
74    public function getOriginal()
75    {
76        return $this->original;
77    }
78
79    /**
80     * @return string
81     */
82    public function getBase()
83    {
84        return $this->base;
85    }
86
87    /**
88     * @return string
89     */
90    public function getQuoted()
91    {
92        return $this->quoted;
93    }
94
95    /**
96     * @return int
97     */
98    public function getLength()
99    {
100        return $this->length;
101    }
102
103    /**
104     * @return int
105     */
106    public function getWildcard()
107    {
108        return $this->wildcard;
109    }
110
111    /**
112     * @return array [entity => frequency, ...]
113     */
114    public function getEntityFrequencies()
115    {
116        return $this->frequencies;
117    }
118
119    /**
120     * Add found tokens IDs of a specific length
121     * @param int $length
122     * @param array $tokens [tokenID => tokenName, ...]
123     * @return void
124     * @internal
125     */
126    public function addTokens($length, $tokens)
127    {
128        $this->tokens[$length] = [];
129        foreach ($tokens as $tokenID => $tokenName) {
130            $this->tokens[$length][$tokenID] = $tokenName;
131        }
132    }
133
134    /**
135     * Return all tokens that match the given term
136     *
137     * @return string
138     */
139    public function getTokens()
140    {
141        return array_merge(...array_map('array_values', array_values($this->tokens)));
142    }
143
144    /**
145     * Return all token IDs of the given length
146     *
147     * @param $length
148     * @return int[]
149     */
150    public function getTokenIDsByLength($length)
151    {
152        return isset($this->tokens[$length]) ? array_keys($this->tokens[$length]) : [];
153    }
154
155    /**
156     * Mathematically add the given frequency to existing frequency for the entityID
157     *
158     * @param int $entityID
159     * @param int $frequency
160     * @return void
161     * @internal
162     */
163    public function addEntityFrequency($entityID, $frequency)
164    {
165        if (!isset($this->frequencies[$entityID])) {
166            $this->frequencies[$entityID] = 0;
167        }
168
169        $this->frequencies[$entityID] += $frequency;
170    }
171
172    /**
173     * Update the entity frequencies to use actual entity names
174     *
175     * @param array $entityMap [entityID => entityName]
176     * @return void
177     */
178    public function resolveEntities($entityMap) {
179        $resolved = [];
180        foreach ($this->frequencies as $eid => $freq) {
181            $name = $entityMap[$eid];
182            $resolved[$name] = $freq;
183        }
184        $this->frequencies = $resolved;
185    }
186}
187