xref: /dokuwiki/_test/tests/Search/Query/QueryEvaluatorTest.php (revision ede4646658cf51245060332d97a319a39c788ea1)
1*ede46466SAndreas Gohr<?php
2*ede46466SAndreas Gohr
3*ede46466SAndreas Gohrnamespace dokuwiki\test\Search\Query;
4*ede46466SAndreas Gohr
5*ede46466SAndreas Gohruse dokuwiki\Search\Collection\Term;
6*ede46466SAndreas Gohruse dokuwiki\Search\Query\QueryEvaluator;
7*ede46466SAndreas Gohr
8*ede46466SAndreas Gohr/**
9*ede46466SAndreas Gohr * Tests for the QueryEvaluator class
10*ede46466SAndreas Gohr *
11*ede46466SAndreas Gohr * These tests verify RPN evaluation with typed stack entries (page sets,
12*ede46466SAndreas Gohr * namespace predicates, negated wrappers) independent of actual index data.
13*ede46466SAndreas Gohr */
14*ede46466SAndreas Gohrclass QueryEvaluatorTest extends \DokuWikiTest
15*ede46466SAndreas Gohr{
16*ede46466SAndreas Gohr    /**
17*ede46466SAndreas Gohr     * Create a Term with pre-resolved entity frequencies
18*ede46466SAndreas Gohr     *
19*ede46466SAndreas Gohr     * @param string $word the word this term represents
20*ede46466SAndreas Gohr     * @param array $frequencies [pageName => frequency]
21*ede46466SAndreas Gohr     * @return Term
22*ede46466SAndreas Gohr     */
23*ede46466SAndreas Gohr    protected function makeTerm(string $word, array $frequencies): Term
24*ede46466SAndreas Gohr    {
25*ede46466SAndreas Gohr        $term = new Term($word);
26*ede46466SAndreas Gohr        // Use addEntityFrequency with numeric IDs, then resolve with a map
27*ede46466SAndreas Gohr        $map = [];
28*ede46466SAndreas Gohr        $id = 0;
29*ede46466SAndreas Gohr        foreach ($frequencies as $page => $freq) {
30*ede46466SAndreas Gohr            $term->addEntityFrequency($id, $freq);
31*ede46466SAndreas Gohr            $map[$id] = $page;
32*ede46466SAndreas Gohr            $id++;
33*ede46466SAndreas Gohr        }
34*ede46466SAndreas Gohr        $term->resolveEntities($map);
35*ede46466SAndreas Gohr        return $term;
36*ede46466SAndreas Gohr    }
37*ede46466SAndreas Gohr
38*ede46466SAndreas Gohr    // region Basic word lookups
39*ede46466SAndreas Gohr
40*ede46466SAndreas Gohr    public function testSingleWord()
41*ede46466SAndreas Gohr    {
42*ede46466SAndreas Gohr        $terms = [
43*ede46466SAndreas Gohr            'dokuwiki' => $this->makeTerm('dokuwiki', ['page1' => 3, 'page2' => 1]),
44*ede46466SAndreas Gohr        ];
45*ede46466SAndreas Gohr        $rpn = ['W+:dokuwiki'];
46*ede46466SAndreas Gohr
47*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator($rpn, $terms);
48*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
49*ede46466SAndreas Gohr
50*ede46466SAndreas Gohr        $this->assertEquals(['page1' => 3, 'page2' => 1], $result);
51*ede46466SAndreas Gohr    }
52*ede46466SAndreas Gohr
53*ede46466SAndreas Gohr    public function testUnknownWord()
54*ede46466SAndreas Gohr    {
55*ede46466SAndreas Gohr        $terms = [];
56*ede46466SAndreas Gohr        $rpn = ['W+:nonexistent'];
57*ede46466SAndreas Gohr
58*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator($rpn, $terms);
59*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
60*ede46466SAndreas Gohr
61*ede46466SAndreas Gohr        $this->assertEquals([], $result);
62*ede46466SAndreas Gohr    }
63*ede46466SAndreas Gohr
64*ede46466SAndreas Gohr    // endregion
65*ede46466SAndreas Gohr
66*ede46466SAndreas Gohr    // region AND operation
67*ede46466SAndreas Gohr
68*ede46466SAndreas Gohr    public function testAndTwoWords()
69*ede46466SAndreas Gohr    {
70*ede46466SAndreas Gohr        $terms = [
71*ede46466SAndreas Gohr            'foo' => $this->makeTerm('foo', ['page1' => 2, 'page2' => 3, 'page3' => 1]),
72*ede46466SAndreas Gohr            'bar' => $this->makeTerm('bar', ['page1' => 1, 'page3' => 4]),
73*ede46466SAndreas Gohr        ];
74*ede46466SAndreas Gohr        // foo AND bar → pages in both, scores summed
75*ede46466SAndreas Gohr        $rpn = ['W+:foo', 'W+:bar', 'AND'];
76*ede46466SAndreas Gohr
77*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator($rpn, $terms);
78*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
79*ede46466SAndreas Gohr
80*ede46466SAndreas Gohr        $this->assertEquals(['page1' => 3, 'page3' => 5], $result);
81*ede46466SAndreas Gohr    }
82*ede46466SAndreas Gohr
83*ede46466SAndreas Gohr    // endregion
84*ede46466SAndreas Gohr
85*ede46466SAndreas Gohr    // region OR operation
86*ede46466SAndreas Gohr
87*ede46466SAndreas Gohr    public function testOrTwoWords()
88*ede46466SAndreas Gohr    {
89*ede46466SAndreas Gohr        $terms = [
90*ede46466SAndreas Gohr            'foo' => $this->makeTerm('foo', ['page1' => 2, 'page2' => 3]),
91*ede46466SAndreas Gohr            'bar' => $this->makeTerm('bar', ['page1' => 1, 'page3' => 4]),
92*ede46466SAndreas Gohr        ];
93*ede46466SAndreas Gohr        // foo OR bar → union, scores summed where overlapping
94*ede46466SAndreas Gohr        $rpn = ['W+:foo', 'W+:bar', 'OR'];
95*ede46466SAndreas Gohr
96*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator($rpn, $terms);
97*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
98*ede46466SAndreas Gohr
99*ede46466SAndreas Gohr        $this->assertEquals(['page1' => 3, 'page2' => 3, 'page3' => 4], $result);
100*ede46466SAndreas Gohr    }
101*ede46466SAndreas Gohr
102*ede46466SAndreas Gohr    // endregion
103*ede46466SAndreas Gohr
104*ede46466SAndreas Gohr    // region NOT with AND (subtraction)
105*ede46466SAndreas Gohr
106*ede46466SAndreas Gohr    public function testNotWithAnd()
107*ede46466SAndreas Gohr    {
108*ede46466SAndreas Gohr        // "foo -bar" → foo AND NOT bar → foo minus bar
109*ede46466SAndreas Gohr        $terms = [
110*ede46466SAndreas Gohr            'foo' => $this->makeTerm('foo', ['page1' => 2, 'page2' => 3, 'page3' => 1]),
111*ede46466SAndreas Gohr            'bar' => $this->makeTerm('bar', ['page2' => 1]),
112*ede46466SAndreas Gohr        ];
113*ede46466SAndreas Gohr        // RPN: foo bar NOT AND
114*ede46466SAndreas Gohr        $rpn = ['W+:foo', 'W-:bar', 'NOT', 'AND'];
115*ede46466SAndreas Gohr
116*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator($rpn, $terms);
117*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
118*ede46466SAndreas Gohr
119*ede46466SAndreas Gohr        $this->assertEquals(['page1' => 2, 'page3' => 1], $result);
120*ede46466SAndreas Gohr    }
121*ede46466SAndreas Gohr
122*ede46466SAndreas Gohr    public function testNegatedGroupWithAnd()
123*ede46466SAndreas Gohr    {
124*ede46466SAndreas Gohr        // "baz -(foo OR bar)" → baz AND NOT(foo OR bar) → baz minus (foo ∪ bar)
125*ede46466SAndreas Gohr        $terms = [
126*ede46466SAndreas Gohr            'foo' => $this->makeTerm('foo', ['page1' => 1, 'page2' => 2]),
127*ede46466SAndreas Gohr            'bar' => $this->makeTerm('bar', ['page2' => 1, 'page3' => 3]),
128*ede46466SAndreas Gohr            'baz' => $this->makeTerm('baz', ['page1' => 5, 'page2' => 4, 'page3' => 2, 'page4' => 1]),
129*ede46466SAndreas Gohr        ];
130*ede46466SAndreas Gohr        // RPN: foo bar OR NOT baz AND
131*ede46466SAndreas Gohr        $rpn = ['W+:foo', 'W+:bar', 'OR', 'NOT', 'W+:baz', 'AND'];
132*ede46466SAndreas Gohr
133*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator($rpn, $terms);
134*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
135*ede46466SAndreas Gohr
136*ede46466SAndreas Gohr        // page1, page2, page3 are in (foo ∪ bar), so only page4 remains
137*ede46466SAndreas Gohr        $this->assertEquals(['page4' => 1], $result);
138*ede46466SAndreas Gohr    }
139*ede46466SAndreas Gohr
140*ede46466SAndreas Gohr    // endregion
141*ede46466SAndreas Gohr
142*ede46466SAndreas Gohr    // region Namespace filtering
143*ede46466SAndreas Gohr
144*ede46466SAndreas Gohr    public function testNamespaceInclude()
145*ede46466SAndreas Gohr    {
146*ede46466SAndreas Gohr        // "foo @wiki:" → foo AND namespace wiki:
147*ede46466SAndreas Gohr        $terms = [
148*ede46466SAndreas Gohr            'foo' => $this->makeTerm('foo', ['wiki:page1' => 2, 'other:page2' => 3, 'wiki:sub:page3' => 1]),
149*ede46466SAndreas Gohr        ];
150*ede46466SAndreas Gohr        // RPN: foo N+:wiki AND
151*ede46466SAndreas Gohr        $rpn = ['W+:foo', 'N+:wiki', 'AND'];
152*ede46466SAndreas Gohr
153*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator($rpn, $terms);
154*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
155*ede46466SAndreas Gohr
156*ede46466SAndreas Gohr        $this->assertEquals(['wiki:page1' => 2, 'wiki:sub:page3' => 1], $result);
157*ede46466SAndreas Gohr    }
158*ede46466SAndreas Gohr
159*ede46466SAndreas Gohr    public function testNamespaceExclude()
160*ede46466SAndreas Gohr    {
161*ede46466SAndreas Gohr        // "foo ^wiki:" → foo AND NOT namespace wiki:
162*ede46466SAndreas Gohr        $terms = [
163*ede46466SAndreas Gohr            'foo' => $this->makeTerm('foo', ['wiki:page1' => 2, 'other:page2' => 3, 'wiki:sub:page3' => 1]),
164*ede46466SAndreas Gohr        ];
165*ede46466SAndreas Gohr        // RPN: foo N+:wiki NOT AND
166*ede46466SAndreas Gohr        $rpn = ['W+:foo', 'N+:wiki', 'NOT', 'AND'];
167*ede46466SAndreas Gohr
168*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator($rpn, $terms);
169*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
170*ede46466SAndreas Gohr
171*ede46466SAndreas Gohr        $this->assertEquals(['other:page2' => 3], $result);
172*ede46466SAndreas Gohr    }
173*ede46466SAndreas Gohr
174*ede46466SAndreas Gohr    // endregion
175*ede46466SAndreas Gohr
176*ede46466SAndreas Gohr    // region Combined queries
177*ede46466SAndreas Gohr
178*ede46466SAndreas Gohr    public function testOrThenNot()
179*ede46466SAndreas Gohr    {
180*ede46466SAndreas Gohr        // "(foo OR bar) -baz" → (foo OR bar) AND NOT baz
181*ede46466SAndreas Gohr        $terms = [
182*ede46466SAndreas Gohr            'foo' => $this->makeTerm('foo', ['page1' => 2, 'page2' => 1]),
183*ede46466SAndreas Gohr            'bar' => $this->makeTerm('bar', ['page2' => 3, 'page3' => 4]),
184*ede46466SAndreas Gohr            'baz' => $this->makeTerm('baz', ['page2' => 1]),
185*ede46466SAndreas Gohr        ];
186*ede46466SAndreas Gohr        // RPN: foo bar OR baz NOT AND
187*ede46466SAndreas Gohr        $rpn = ['W+:foo', 'W+:bar', 'OR', 'W-:baz', 'NOT', 'AND'];
188*ede46466SAndreas Gohr
189*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator($rpn, $terms);
190*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
191*ede46466SAndreas Gohr
192*ede46466SAndreas Gohr        $this->assertEquals(['page1' => 2, 'page3' => 4], $result);
193*ede46466SAndreas Gohr    }
194*ede46466SAndreas Gohr
195*ede46466SAndreas Gohr    public function testWordWithNamespaceAndNot()
196*ede46466SAndreas Gohr    {
197*ede46466SAndreas Gohr        // "foo -bar @wiki:" → foo AND NOT bar AND @wiki:
198*ede46466SAndreas Gohr        $terms = [
199*ede46466SAndreas Gohr            'foo' => $this->makeTerm('foo', [
200*ede46466SAndreas Gohr                'wiki:a' => 5, 'wiki:b' => 3, 'other:c' => 2, 'wiki:d' => 1,
201*ede46466SAndreas Gohr            ]),
202*ede46466SAndreas Gohr            'bar' => $this->makeTerm('bar', ['wiki:b' => 1]),
203*ede46466SAndreas Gohr        ];
204*ede46466SAndreas Gohr        // RPN: foo bar NOT AND N+:wiki AND
205*ede46466SAndreas Gohr        $rpn = ['W+:foo', 'W-:bar', 'NOT', 'AND', 'N+:wiki', 'AND'];
206*ede46466SAndreas Gohr
207*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator($rpn, $terms);
208*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
209*ede46466SAndreas Gohr
210*ede46466SAndreas Gohr        // foo minus bar = wiki:a, other:c, wiki:d
211*ede46466SAndreas Gohr        // filtered to wiki: = wiki:a, wiki:d
212*ede46466SAndreas Gohr        $this->assertEquals(['wiki:a' => 5, 'wiki:d' => 1], $result);
213*ede46466SAndreas Gohr    }
214*ede46466SAndreas Gohr
215*ede46466SAndreas Gohr    public function testNamespaceDoesNotMatchPartialPrefix()
216*ede46466SAndreas Gohr    {
217*ede46466SAndreas Gohr        // @foo should not match pages in foobar: namespace
218*ede46466SAndreas Gohr        $terms = [
219*ede46466SAndreas Gohr            'test' => $this->makeTerm('test', [
220*ede46466SAndreas Gohr                'foo:page1' => 1,
221*ede46466SAndreas Gohr                'foobar:page2' => 2,
222*ede46466SAndreas Gohr                'foo:sub:page3' => 3,
223*ede46466SAndreas Gohr            ]),
224*ede46466SAndreas Gohr        ];
225*ede46466SAndreas Gohr        // RPN: test N+:foo AND
226*ede46466SAndreas Gohr        $rpn = ['W+:test', 'N+:foo', 'AND'];
227*ede46466SAndreas Gohr
228*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator($rpn, $terms);
229*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
230*ede46466SAndreas Gohr
231*ede46466SAndreas Gohr        // foobar:page2 must NOT match — only foo: prefix pages
232*ede46466SAndreas Gohr        $this->assertEquals(['foo:page1' => 1, 'foo:sub:page3' => 3], $result);
233*ede46466SAndreas Gohr    }
234*ede46466SAndreas Gohr
235*ede46466SAndreas Gohr    // endregion
236*ede46466SAndreas Gohr
237*ede46466SAndreas Gohr    // region Empty result cases
238*ede46466SAndreas Gohr
239*ede46466SAndreas Gohr    public function testAndNoOverlap()
240*ede46466SAndreas Gohr    {
241*ede46466SAndreas Gohr        $terms = [
242*ede46466SAndreas Gohr            'foo' => $this->makeTerm('foo', ['page1' => 1]),
243*ede46466SAndreas Gohr            'bar' => $this->makeTerm('bar', ['page2' => 1]),
244*ede46466SAndreas Gohr        ];
245*ede46466SAndreas Gohr        $rpn = ['W+:foo', 'W+:bar', 'AND'];
246*ede46466SAndreas Gohr
247*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator($rpn, $terms);
248*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
249*ede46466SAndreas Gohr
250*ede46466SAndreas Gohr        $this->assertEquals([], $result);
251*ede46466SAndreas Gohr    }
252*ede46466SAndreas Gohr
253*ede46466SAndreas Gohr    public function testNotRemovesAll()
254*ede46466SAndreas Gohr    {
255*ede46466SAndreas Gohr        $terms = [
256*ede46466SAndreas Gohr            'foo' => $this->makeTerm('foo', ['page1' => 1]),
257*ede46466SAndreas Gohr            'bar' => $this->makeTerm('bar', ['page1' => 2]),
258*ede46466SAndreas Gohr        ];
259*ede46466SAndreas Gohr        // foo -bar where bar covers all foo pages
260*ede46466SAndreas Gohr        $rpn = ['W+:foo', 'W-:bar', 'NOT', 'AND'];
261*ede46466SAndreas Gohr
262*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator($rpn, $terms);
263*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
264*ede46466SAndreas Gohr
265*ede46466SAndreas Gohr        $this->assertEquals([], $result);
266*ede46466SAndreas Gohr    }
267*ede46466SAndreas Gohr
268*ede46466SAndreas Gohr    public function testEmptyRpn()
269*ede46466SAndreas Gohr    {
270*ede46466SAndreas Gohr        $evaluator = new QueryEvaluator([], []);
271*ede46466SAndreas Gohr        $result = $evaluator->evaluate();
272*ede46466SAndreas Gohr
273*ede46466SAndreas Gohr        $this->assertEquals([], $result);
274*ede46466SAndreas Gohr    }
275*ede46466SAndreas Gohr
276*ede46466SAndreas Gohr    // endregion
277*ede46466SAndreas Gohr}
278