xref: /dokuwiki/_test/tests/Search/Collection/TermTest.php (revision 1148921de6af6909f19cb5b30b698d0f27d7751e)
1ede46466SAndreas Gohr<?php
2ede46466SAndreas Gohr
3ede46466SAndreas Gohrnamespace dokuwiki\test\Search\Collection;
4ede46466SAndreas Gohr
5ede46466SAndreas Gohruse dokuwiki\Search\Collection\Term;
6ede46466SAndreas Gohruse dokuwiki\Search\Tokenizer;
7ede46466SAndreas Gohr
8ede46466SAndreas Gohrclass TermTest extends \DokuWikiTest
9ede46466SAndreas Gohr{
10ede46466SAndreas Gohr    public function testBasicExact()
11ede46466SAndreas Gohr    {
12ede46466SAndreas Gohr        $term = new Term('dokuwiki');
13ede46466SAndreas Gohr
14ede46466SAndreas Gohr        $this->assertEquals('dokuwiki', $term->getOriginal());
15ede46466SAndreas Gohr        $this->assertEquals('dokuwiki', $term->getBase());
16ede46466SAndreas Gohr        $this->assertEquals('dokuwiki', $term->getQuoted());
17ede46466SAndreas Gohr        $this->assertEquals(8, $term->getLength());
18ede46466SAndreas Gohr        $this->assertEquals(Term::WILDCARD_NONE, $term->getWildcard());
19ede46466SAndreas Gohr    }
20ede46466SAndreas Gohr
21ede46466SAndreas Gohr    public function testBasicLeftWildcard()
22ede46466SAndreas Gohr    {
23ede46466SAndreas Gohr        $term = new Term('*wiki');
24ede46466SAndreas Gohr
25ede46466SAndreas Gohr        $this->assertEquals('*wiki', $term->getOriginal());
26ede46466SAndreas Gohr        $this->assertEquals('wiki', $term->getBase());
27ede46466SAndreas Gohr        $this->assertEquals('.*wiki', $term->getQuoted());
28ede46466SAndreas Gohr        $this->assertEquals(4, $term->getLength());
29ede46466SAndreas Gohr        $this->assertEquals(Term::WILDCARD_START, $term->getWildcard());
30ede46466SAndreas Gohr    }
31ede46466SAndreas Gohr
32ede46466SAndreas Gohr    public function testBasicRightWildcard()
33ede46466SAndreas Gohr    {
34ede46466SAndreas Gohr        $term = new Term('wiki*');
35ede46466SAndreas Gohr
36ede46466SAndreas Gohr        $this->assertEquals('wiki*', $term->getOriginal());
37ede46466SAndreas Gohr        $this->assertEquals('wiki', $term->getBase());
38ede46466SAndreas Gohr        $this->assertEquals('wiki.*', $term->getQuoted());
39ede46466SAndreas Gohr        $this->assertEquals(4, $term->getLength());
40ede46466SAndreas Gohr        $this->assertEquals(Term::WILDCARD_END, $term->getWildcard());
41ede46466SAndreas Gohr    }
42ede46466SAndreas Gohr
43ede46466SAndreas Gohr    public function testBasicBothWildcard()
44ede46466SAndreas Gohr    {
45ede46466SAndreas Gohr        $term = new Term('*wiki*');
46ede46466SAndreas Gohr
47ede46466SAndreas Gohr        $this->assertEquals('*wiki*', $term->getOriginal());
48ede46466SAndreas Gohr        $this->assertEquals('wiki', $term->getBase());
49ede46466SAndreas Gohr        $this->assertEquals('.*wiki.*', $term->getQuoted());
50ede46466SAndreas Gohr        $this->assertEquals(4, $term->getLength());
51ede46466SAndreas Gohr        $this->assertEquals(Term::WILDCARD_START + Term::WILDCARD_END, $term->getWildcard());
52ede46466SAndreas Gohr    }
53ede46466SAndreas Gohr
54ede46466SAndreas Gohr    public function testEmptyTerm()
55ede46466SAndreas Gohr    {
56*1148921dSAndreas Gohr        $term = new Term('');
57*1148921dSAndreas Gohr        $this->assertEquals('', $term->getOriginal());
58*1148921dSAndreas Gohr        $this->assertEquals('', $term->getBase());
59*1148921dSAndreas Gohr        $this->assertEquals(0, $term->getLength());
60ede46466SAndreas Gohr    }
61ede46466SAndreas Gohr
62*1148921dSAndreas Gohr    public function testAddMatch()
63ede46466SAndreas Gohr    {
64ede46466SAndreas Gohr        $term = new Term('dokuwiki');
65ede46466SAndreas Gohr
66*1148921dSAndreas Gohr        $term->addMatch('page1', 'dokuwiki', 7);
67*1148921dSAndreas Gohr        $term->addMatch('page1', 'dokuwiki', 7);
68*1148921dSAndreas Gohr        $term->addMatch('page2', 'dokuwiki', 1);
69ede46466SAndreas Gohr
70ede46466SAndreas Gohr        $this->assertEquals(['page1' => 14, 'page2' => 1], $term->getEntityFrequencies());
71*1148921dSAndreas Gohr        $this->assertEquals(['dokuwiki'], $term->getTokens());
72*1148921dSAndreas Gohr        $this->assertEquals(['page1' => ['dokuwiki'], 'page2' => ['dokuwiki']], $term->getEntityTokens());
73ede46466SAndreas Gohr    }
74ede46466SAndreas Gohr
75ede46466SAndreas Gohr    public function testNumericTerm()
76ede46466SAndreas Gohr    {
77ede46466SAndreas Gohr        // Numeric terms should be allowed even if they're shorter than minimum word length
78ede46466SAndreas Gohr        $term = new Term('42');
79ede46466SAndreas Gohr
80ede46466SAndreas Gohr        $this->assertEquals('42', $term->getOriginal());
81ede46466SAndreas Gohr        $this->assertEquals('42', $term->getBase());
82ede46466SAndreas Gohr        $this->assertEquals(2, $term->getLength());
83ede46466SAndreas Gohr        $this->assertEquals(Term::WILDCARD_NONE, $term->getWildcard());
84ede46466SAndreas Gohr    }
85ede46466SAndreas Gohr
86ede46466SAndreas Gohr    public function testSpecialCharactersQuoting()
87ede46466SAndreas Gohr    {
88ede46466SAndreas Gohr        // Test that special regex characters are properly escaped
89ede46466SAndreas Gohr        $term = new Term('test.doc');
90ede46466SAndreas Gohr
91ede46466SAndreas Gohr        $this->assertEquals('test.doc', $term->getOriginal());
92ede46466SAndreas Gohr        $this->assertEquals('test.doc', $term->getBase());
93ede46466SAndreas Gohr        // The dot should be escaped in the quoted version
94ede46466SAndreas Gohr        $this->assertEquals('test\\.doc', $term->getQuoted());
95ede46466SAndreas Gohr    }
96ede46466SAndreas Gohr
97ede46466SAndreas Gohr    public function testSpecialCharactersWithWildcard()
98ede46466SAndreas Gohr    {
99ede46466SAndreas Gohr        // Test special chars with wildcard
100ede46466SAndreas Gohr        $term = new Term('test.*');
101ede46466SAndreas Gohr
102ede46466SAndreas Gohr        $this->assertEquals('test.*', $term->getOriginal());
103ede46466SAndreas Gohr        $this->assertEquals('test.', $term->getBase());
104ede46466SAndreas Gohr        // The dot should be escaped, but the wildcard * should become .*
105ede46466SAndreas Gohr        $this->assertEquals('test\\..*', $term->getQuoted());
106ede46466SAndreas Gohr        $this->assertEquals(Term::WILDCARD_END, $term->getWildcard());
107ede46466SAndreas Gohr    }
108ede46466SAndreas Gohr
109ede46466SAndreas Gohr    public function testWildcardTrimming()
110ede46466SAndreas Gohr    {
111ede46466SAndreas Gohr        // Test that only wildcards (not spaces) are trimmed from base
112ede46466SAndreas Gohr        $term = new Term('*wiki*');
113ede46466SAndreas Gohr
114ede46466SAndreas Gohr        $this->assertEquals('*wiki*', $term->getOriginal());
115ede46466SAndreas Gohr        $this->assertEquals('wiki', $term->getBase());
116ede46466SAndreas Gohr        $this->assertEquals('.*wiki.*', $term->getQuoted());
117ede46466SAndreas Gohr        $this->assertEquals(Term::WILDCARD_START + Term::WILDCARD_END, $term->getWildcard());
118ede46466SAndreas Gohr    }
119ede46466SAndreas Gohr
120*1148921dSAndreas Gohr    public function testShortTerm()
121ede46466SAndreas Gohr    {
122*1148921dSAndreas Gohr        // Short terms are now accepted — length filtering is the caller's responsibility
123*1148921dSAndreas Gohr        $term = new Term('a');
124*1148921dSAndreas Gohr        $this->assertEquals('a', $term->getBase());
125*1148921dSAndreas Gohr        $this->assertEquals(1, $term->getLength());
126ede46466SAndreas Gohr    }
127ede46466SAndreas Gohr
128ede46466SAndreas Gohr    public function testOnlyWildcards()
129ede46466SAndreas Gohr    {
130*1148921dSAndreas Gohr        // Wildcards-only terms are accepted but have an empty base
131*1148921dSAndreas Gohr        $term = new Term('***');
132*1148921dSAndreas Gohr        $this->assertEquals('', $term->getBase());
133*1148921dSAndreas Gohr        $this->assertEquals(0, $term->getLength());
134ede46466SAndreas Gohr    }
135ede46466SAndreas Gohr
136ede46466SAndreas Gohr    public function testFrequencyAggregationAcrossTokens()
137ede46466SAndreas Gohr    {
138ede46466SAndreas Gohr        // Simulate a search where term matches multiple tokens on the same entity
139ede46466SAndreas Gohr        $term = new Term('*wiki*');
140ede46466SAndreas Gohr
141*1148921dSAndreas Gohr        $term->addMatch('page1', 'wiki', 5);
142*1148921dSAndreas Gohr        $term->addMatch('page1', 'dokuwiki', 3);
143*1148921dSAndreas Gohr        $term->addMatch('page1', 'wikitext', 2);
144*1148921dSAndreas Gohr        $term->addMatch('page2', 'wikipedia', 7);
145ede46466SAndreas Gohr
146ede46466SAndreas Gohr        $frequencies = $term->getEntityFrequencies();
147*1148921dSAndreas Gohr        $this->assertEquals(10, $frequencies['page1']); // 5 + 3 + 2
148*1148921dSAndreas Gohr        $this->assertEquals(7, $frequencies['page2']);
149ede46466SAndreas Gohr
150*1148921dSAndreas Gohr        // getTokens returns all unique tokens
151*1148921dSAndreas Gohr        $tokens = $term->getTokens();
152*1148921dSAndreas Gohr        sort($tokens);
153*1148921dSAndreas Gohr        $this->assertEquals(['dokuwiki', 'wiki', 'wikipedia', 'wikitext'], $tokens);
154ede46466SAndreas Gohr
155*1148921dSAndreas Gohr        // getEntityTokens returns tokens per entity
156*1148921dSAndreas Gohr        $entityTokens = $term->getEntityTokens();
157*1148921dSAndreas Gohr        $this->assertCount(3, $entityTokens['page1']);
158*1148921dSAndreas Gohr        $this->assertEquals(['wikipedia'], $entityTokens['page2']);
159ede46466SAndreas Gohr
160*1148921dSAndreas Gohr        // getMatches returns full detail
161*1148921dSAndreas Gohr        $matches = $term->getMatches();
162*1148921dSAndreas Gohr        $this->assertEquals(['wiki' => 5, 'dokuwiki' => 3, 'wikitext' => 2], $matches['page1']);
163*1148921dSAndreas Gohr        $this->assertEquals(['wikipedia' => 7], $matches['page2']);
164ede46466SAndreas Gohr    }
165ede46466SAndreas Gohr
166ede46466SAndreas Gohr    public function testZeroFrequency()
167ede46466SAndreas Gohr    {
168ede46466SAndreas Gohr        $term = new Term('dokuwiki');
169ede46466SAndreas Gohr
170*1148921dSAndreas Gohr        $term->addMatch('page1', 'dokuwiki', 5);
171*1148921dSAndreas Gohr        $term->addMatch('page2', 'dokuwiki', 0);
172*1148921dSAndreas Gohr        $term->addMatch('page3', 'dokuwiki', 3);
173ede46466SAndreas Gohr
174ede46466SAndreas Gohr        $frequencies = $term->getEntityFrequencies();
175ede46466SAndreas Gohr        $this->assertEquals(5, $frequencies['page1']);
176*1148921dSAndreas Gohr        $this->assertEquals(0, $frequencies['page2']);
177*1148921dSAndreas Gohr        $this->assertEquals(3, $frequencies['page3']);
178*1148921dSAndreas Gohr    }
179*1148921dSAndreas Gohr
180*1148921dSAndreas Gohr    public function testEmptyResults()
181*1148921dSAndreas Gohr    {
182*1148921dSAndreas Gohr        $term = new Term('dokuwiki');
183*1148921dSAndreas Gohr
184*1148921dSAndreas Gohr        $this->assertEquals([], $term->getMatches());
185*1148921dSAndreas Gohr        $this->assertEquals([], $term->getEntityFrequencies());
186*1148921dSAndreas Gohr        $this->assertEquals([], $term->getEntityTokens());
187*1148921dSAndreas Gohr        $this->assertEquals([], $term->getTokens());
188ede46466SAndreas Gohr    }
189ede46466SAndreas Gohr
190ede46466SAndreas Gohr    public function testCaseSensitiveBase()
191ede46466SAndreas Gohr    {
192ede46466SAndreas Gohr        // Test that case is preserved
193ede46466SAndreas Gohr        $term = new Term('DokuWiki');
194ede46466SAndreas Gohr
195ede46466SAndreas Gohr        $this->assertEquals('DokuWiki', $term->getOriginal());
196ede46466SAndreas Gohr        $this->assertEquals('DokuWiki', $term->getBase());
197ede46466SAndreas Gohr    }
198ede46466SAndreas Gohr
199ede46466SAndreas Gohr    public function testComplexRegexCharacters()
200ede46466SAndreas Gohr    {
201ede46466SAndreas Gohr        // Test multiple special regex characters
202ede46466SAndreas Gohr        $term = new Term('test[0-9]+.txt');
203ede46466SAndreas Gohr
204ede46466SAndreas Gohr        $this->assertEquals('test[0-9]+.txt', $term->getOriginal());
205ede46466SAndreas Gohr        $this->assertEquals('test[0-9]+.txt', $term->getBase());
206ede46466SAndreas Gohr        // All special characters should be escaped
207ede46466SAndreas Gohr        $quoted = $term->getQuoted();
208ede46466SAndreas Gohr        $this->assertStringContainsString('\\[', $quoted);
209ede46466SAndreas Gohr        $this->assertStringContainsString('\\]', $quoted);
210ede46466SAndreas Gohr        $this->assertStringContainsString('\\+', $quoted);
211ede46466SAndreas Gohr        $this->assertStringContainsString('\\.', $quoted);
212ede46466SAndreas Gohr    }
213ede46466SAndreas Gohr
214ede46466SAndreas Gohr}
215