xref: /dokuwiki/_test/tests/Search/Collection/TermTest.php (revision 1148921de6af6909f19cb5b30b698d0f27d7751e)
1<?php
2
3namespace dokuwiki\test\Search\Collection;
4
5use dokuwiki\Search\Collection\Term;
6use dokuwiki\Search\Tokenizer;
7
8class TermTest extends \DokuWikiTest
9{
10    public function testBasicExact()
11    {
12        $term = new Term('dokuwiki');
13
14        $this->assertEquals('dokuwiki', $term->getOriginal());
15        $this->assertEquals('dokuwiki', $term->getBase());
16        $this->assertEquals('dokuwiki', $term->getQuoted());
17        $this->assertEquals(8, $term->getLength());
18        $this->assertEquals(Term::WILDCARD_NONE, $term->getWildcard());
19    }
20
21    public function testBasicLeftWildcard()
22    {
23        $term = new Term('*wiki');
24
25        $this->assertEquals('*wiki', $term->getOriginal());
26        $this->assertEquals('wiki', $term->getBase());
27        $this->assertEquals('.*wiki', $term->getQuoted());
28        $this->assertEquals(4, $term->getLength());
29        $this->assertEquals(Term::WILDCARD_START, $term->getWildcard());
30    }
31
32    public function testBasicRightWildcard()
33    {
34        $term = new Term('wiki*');
35
36        $this->assertEquals('wiki*', $term->getOriginal());
37        $this->assertEquals('wiki', $term->getBase());
38        $this->assertEquals('wiki.*', $term->getQuoted());
39        $this->assertEquals(4, $term->getLength());
40        $this->assertEquals(Term::WILDCARD_END, $term->getWildcard());
41    }
42
43    public function testBasicBothWildcard()
44    {
45        $term = new Term('*wiki*');
46
47        $this->assertEquals('*wiki*', $term->getOriginal());
48        $this->assertEquals('wiki', $term->getBase());
49        $this->assertEquals('.*wiki.*', $term->getQuoted());
50        $this->assertEquals(4, $term->getLength());
51        $this->assertEquals(Term::WILDCARD_START + Term::WILDCARD_END, $term->getWildcard());
52    }
53
54    public function testEmptyTerm()
55    {
56        $term = new Term('');
57        $this->assertEquals('', $term->getOriginal());
58        $this->assertEquals('', $term->getBase());
59        $this->assertEquals(0, $term->getLength());
60    }
61
62    public function testAddMatch()
63    {
64        $term = new Term('dokuwiki');
65
66        $term->addMatch('page1', 'dokuwiki', 7);
67        $term->addMatch('page1', 'dokuwiki', 7);
68        $term->addMatch('page2', 'dokuwiki', 1);
69
70        $this->assertEquals(['page1' => 14, 'page2' => 1], $term->getEntityFrequencies());
71        $this->assertEquals(['dokuwiki'], $term->getTokens());
72        $this->assertEquals(['page1' => ['dokuwiki'], 'page2' => ['dokuwiki']], $term->getEntityTokens());
73    }
74
75    public function testNumericTerm()
76    {
77        // Numeric terms should be allowed even if they're shorter than minimum word length
78        $term = new Term('42');
79
80        $this->assertEquals('42', $term->getOriginal());
81        $this->assertEquals('42', $term->getBase());
82        $this->assertEquals(2, $term->getLength());
83        $this->assertEquals(Term::WILDCARD_NONE, $term->getWildcard());
84    }
85
86    public function testSpecialCharactersQuoting()
87    {
88        // Test that special regex characters are properly escaped
89        $term = new Term('test.doc');
90
91        $this->assertEquals('test.doc', $term->getOriginal());
92        $this->assertEquals('test.doc', $term->getBase());
93        // The dot should be escaped in the quoted version
94        $this->assertEquals('test\\.doc', $term->getQuoted());
95    }
96
97    public function testSpecialCharactersWithWildcard()
98    {
99        // Test special chars with wildcard
100        $term = new Term('test.*');
101
102        $this->assertEquals('test.*', $term->getOriginal());
103        $this->assertEquals('test.', $term->getBase());
104        // The dot should be escaped, but the wildcard * should become .*
105        $this->assertEquals('test\\..*', $term->getQuoted());
106        $this->assertEquals(Term::WILDCARD_END, $term->getWildcard());
107    }
108
109    public function testWildcardTrimming()
110    {
111        // Test that only wildcards (not spaces) are trimmed from base
112        $term = new Term('*wiki*');
113
114        $this->assertEquals('*wiki*', $term->getOriginal());
115        $this->assertEquals('wiki', $term->getBase());
116        $this->assertEquals('.*wiki.*', $term->getQuoted());
117        $this->assertEquals(Term::WILDCARD_START + Term::WILDCARD_END, $term->getWildcard());
118    }
119
120    public function testShortTerm()
121    {
122        // Short terms are now accepted — length filtering is the caller's responsibility
123        $term = new Term('a');
124        $this->assertEquals('a', $term->getBase());
125        $this->assertEquals(1, $term->getLength());
126    }
127
128    public function testOnlyWildcards()
129    {
130        // Wildcards-only terms are accepted but have an empty base
131        $term = new Term('***');
132        $this->assertEquals('', $term->getBase());
133        $this->assertEquals(0, $term->getLength());
134    }
135
136    public function testFrequencyAggregationAcrossTokens()
137    {
138        // Simulate a search where term matches multiple tokens on the same entity
139        $term = new Term('*wiki*');
140
141        $term->addMatch('page1', 'wiki', 5);
142        $term->addMatch('page1', 'dokuwiki', 3);
143        $term->addMatch('page1', 'wikitext', 2);
144        $term->addMatch('page2', 'wikipedia', 7);
145
146        $frequencies = $term->getEntityFrequencies();
147        $this->assertEquals(10, $frequencies['page1']); // 5 + 3 + 2
148        $this->assertEquals(7, $frequencies['page2']);
149
150        // getTokens returns all unique tokens
151        $tokens = $term->getTokens();
152        sort($tokens);
153        $this->assertEquals(['dokuwiki', 'wiki', 'wikipedia', 'wikitext'], $tokens);
154
155        // getEntityTokens returns tokens per entity
156        $entityTokens = $term->getEntityTokens();
157        $this->assertCount(3, $entityTokens['page1']);
158        $this->assertEquals(['wikipedia'], $entityTokens['page2']);
159
160        // getMatches returns full detail
161        $matches = $term->getMatches();
162        $this->assertEquals(['wiki' => 5, 'dokuwiki' => 3, 'wikitext' => 2], $matches['page1']);
163        $this->assertEquals(['wikipedia' => 7], $matches['page2']);
164    }
165
166    public function testZeroFrequency()
167    {
168        $term = new Term('dokuwiki');
169
170        $term->addMatch('page1', 'dokuwiki', 5);
171        $term->addMatch('page2', 'dokuwiki', 0);
172        $term->addMatch('page3', 'dokuwiki', 3);
173
174        $frequencies = $term->getEntityFrequencies();
175        $this->assertEquals(5, $frequencies['page1']);
176        $this->assertEquals(0, $frequencies['page2']);
177        $this->assertEquals(3, $frequencies['page3']);
178    }
179
180    public function testEmptyResults()
181    {
182        $term = new Term('dokuwiki');
183
184        $this->assertEquals([], $term->getMatches());
185        $this->assertEquals([], $term->getEntityFrequencies());
186        $this->assertEquals([], $term->getEntityTokens());
187        $this->assertEquals([], $term->getTokens());
188    }
189
190    public function testCaseSensitiveBase()
191    {
192        // Test that case is preserved
193        $term = new Term('DokuWiki');
194
195        $this->assertEquals('DokuWiki', $term->getOriginal());
196        $this->assertEquals('DokuWiki', $term->getBase());
197    }
198
199    public function testComplexRegexCharacters()
200    {
201        // Test multiple special regex characters
202        $term = new Term('test[0-9]+.txt');
203
204        $this->assertEquals('test[0-9]+.txt', $term->getOriginal());
205        $this->assertEquals('test[0-9]+.txt', $term->getBase());
206        // All special characters should be escaped
207        $quoted = $term->getQuoted();
208        $this->assertStringContainsString('\\[', $quoted);
209        $this->assertStringContainsString('\\]', $quoted);
210        $this->assertStringContainsString('\\+', $quoted);
211        $this->assertStringContainsString('\\.', $quoted);
212    }
213
214}
215