xref: /dokuwiki/_test/tests/Parsing/Lexer/LexerTest.php (revision 504c13e8df88563c11b3720b317991bc38835a35)
1*504c13e8SAndreas Gohr<?php
2*504c13e8SAndreas Gohr
3*504c13e8SAndreas Gohrnamespace dokuwiki\test\Parsing\Lexer;
4*504c13e8SAndreas Gohr
5*504c13e8SAndreas Gohruse dokuwiki\Parsing\Lexer\Lexer;
6*504c13e8SAndreas Gohr
7*504c13e8SAndreas Gohrclass LexerTest extends \DokuWikiTest
8*504c13e8SAndreas Gohr{
9*504c13e8SAndreas Gohr    function testNoPatterns()
10*504c13e8SAndreas Gohr    {
11*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
12*504c13e8SAndreas Gohr        $lexer = new Lexer($handler);
13*504c13e8SAndreas Gohr        $this->assertFalse($lexer->parse("abcdef"));
14*504c13e8SAndreas Gohr        $this->assertSame([], $handler->recorded);
15*504c13e8SAndreas Gohr    }
16*504c13e8SAndreas Gohr
17*504c13e8SAndreas Gohr    function testEmptyPage()
18*504c13e8SAndreas Gohr    {
19*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
20*504c13e8SAndreas Gohr        $lexer = new Lexer($handler);
21*504c13e8SAndreas Gohr        $lexer->addPattern("a+");
22*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse(""));
23*504c13e8SAndreas Gohr        $this->assertSame([], $handler->recorded);
24*504c13e8SAndreas Gohr    }
25*504c13e8SAndreas Gohr
26*504c13e8SAndreas Gohr    function testSinglePattern()
27*504c13e8SAndreas Gohr    {
28*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
29*504c13e8SAndreas Gohr        $lexer = new Lexer($handler);
30*504c13e8SAndreas Gohr        $lexer->addPattern("a+");
31*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse("aaaxayyyaxaaaz"));
32*504c13e8SAndreas Gohr        $this->assertSame([
33*504c13e8SAndreas Gohr            ['accept', 'aaa', \DOKU_LEXER_MATCHED, 0],
34*504c13e8SAndreas Gohr            ['accept', 'x', \DOKU_LEXER_UNMATCHED, 3],
35*504c13e8SAndreas Gohr            ['accept', 'a', \DOKU_LEXER_MATCHED, 4],
36*504c13e8SAndreas Gohr            ['accept', 'yyy', \DOKU_LEXER_UNMATCHED, 5],
37*504c13e8SAndreas Gohr            ['accept', 'a', \DOKU_LEXER_MATCHED, 8],
38*504c13e8SAndreas Gohr            ['accept', 'x', \DOKU_LEXER_UNMATCHED, 9],
39*504c13e8SAndreas Gohr            ['accept', 'aaa', \DOKU_LEXER_MATCHED, 10],
40*504c13e8SAndreas Gohr            ['accept', 'z', \DOKU_LEXER_UNMATCHED, 13],
41*504c13e8SAndreas Gohr        ], $handler->recorded);
42*504c13e8SAndreas Gohr    }
43*504c13e8SAndreas Gohr
44*504c13e8SAndreas Gohr    function testMultiplePattern()
45*504c13e8SAndreas Gohr    {
46*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
47*504c13e8SAndreas Gohr        $lexer = new Lexer($handler);
48*504c13e8SAndreas Gohr        $lexer->addPattern("a+");
49*504c13e8SAndreas Gohr        $lexer->addPattern("b+");
50*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse("ababbxbaxxxxxxax"));
51*504c13e8SAndreas Gohr        $expected = ['a', 'b', 'a', 'bb', 'x', 'b', 'a', 'xxxxxx', 'a', 'x'];
52*504c13e8SAndreas Gohr        $actual = array_column($handler->recorded, 1);
53*504c13e8SAndreas Gohr        $this->assertSame($expected, $actual);
54*504c13e8SAndreas Gohr    }
55*504c13e8SAndreas Gohr
56*504c13e8SAndreas Gohr    function testIsolatedPattern()
57*504c13e8SAndreas Gohr    {
58*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
59*504c13e8SAndreas Gohr        $lexer = new Lexer($handler, "a");
60*504c13e8SAndreas Gohr        $lexer->addPattern("a+", "a");
61*504c13e8SAndreas Gohr        $lexer->addPattern("b+", "b");
62*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse("abaabxbaaaxaaaax"));
63*504c13e8SAndreas Gohr        $this->assertSame([
64*504c13e8SAndreas Gohr            ['a', 'a', \DOKU_LEXER_MATCHED, 0],
65*504c13e8SAndreas Gohr            ['a', 'b', \DOKU_LEXER_UNMATCHED, 1],
66*504c13e8SAndreas Gohr            ['a', 'aa', \DOKU_LEXER_MATCHED, 2],
67*504c13e8SAndreas Gohr            ['a', 'bxb', \DOKU_LEXER_UNMATCHED, 4],
68*504c13e8SAndreas Gohr            ['a', 'aaa', \DOKU_LEXER_MATCHED, 7],
69*504c13e8SAndreas Gohr            ['a', 'x', \DOKU_LEXER_UNMATCHED, 10],
70*504c13e8SAndreas Gohr            ['a', 'aaaa', \DOKU_LEXER_MATCHED, 11],
71*504c13e8SAndreas Gohr            ['a', 'x', \DOKU_LEXER_UNMATCHED, 15],
72*504c13e8SAndreas Gohr        ], $handler->recorded);
73*504c13e8SAndreas Gohr    }
74*504c13e8SAndreas Gohr
75*504c13e8SAndreas Gohr    function testModeChange()
76*504c13e8SAndreas Gohr    {
77*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
78*504c13e8SAndreas Gohr        $lexer = new Lexer($handler, "a");
79*504c13e8SAndreas Gohr        $lexer->addPattern("a+", "a");
80*504c13e8SAndreas Gohr        $lexer->addEntryPattern(":", "a", "b");
81*504c13e8SAndreas Gohr        $lexer->addPattern("b+", "b");
82*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse("abaabaaa:ababbabbba"));
83*504c13e8SAndreas Gohr        $this->assertSame([
84*504c13e8SAndreas Gohr            ['a', 'a', \DOKU_LEXER_MATCHED, 0],
85*504c13e8SAndreas Gohr            ['a', 'b', \DOKU_LEXER_UNMATCHED, 1],
86*504c13e8SAndreas Gohr            ['a', 'aa', \DOKU_LEXER_MATCHED, 2],
87*504c13e8SAndreas Gohr            ['a', 'b', \DOKU_LEXER_UNMATCHED, 4],
88*504c13e8SAndreas Gohr            ['a', 'aaa', \DOKU_LEXER_MATCHED, 5],
89*504c13e8SAndreas Gohr            ['b', ':', \DOKU_LEXER_ENTER, 8],
90*504c13e8SAndreas Gohr            ['b', 'a', \DOKU_LEXER_UNMATCHED, 9],
91*504c13e8SAndreas Gohr            ['b', 'b', \DOKU_LEXER_MATCHED, 10],
92*504c13e8SAndreas Gohr            ['b', 'a', \DOKU_LEXER_UNMATCHED, 11],
93*504c13e8SAndreas Gohr            ['b', 'bb', \DOKU_LEXER_MATCHED, 12],
94*504c13e8SAndreas Gohr            ['b', 'a', \DOKU_LEXER_UNMATCHED, 14],
95*504c13e8SAndreas Gohr            ['b', 'bbb', \DOKU_LEXER_MATCHED, 15],
96*504c13e8SAndreas Gohr            ['b', 'a', \DOKU_LEXER_UNMATCHED, 18],
97*504c13e8SAndreas Gohr        ], $handler->recorded);
98*504c13e8SAndreas Gohr    }
99*504c13e8SAndreas Gohr
100*504c13e8SAndreas Gohr    function testNesting()
101*504c13e8SAndreas Gohr    {
102*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
103*504c13e8SAndreas Gohr        $lexer = new Lexer($handler, "a");
104*504c13e8SAndreas Gohr        $lexer->addPattern("a+", "a");
105*504c13e8SAndreas Gohr        $lexer->addEntryPattern("(", "a", "b");
106*504c13e8SAndreas Gohr        $lexer->addPattern("b+", "b");
107*504c13e8SAndreas Gohr        $lexer->addExitPattern(")", "b");
108*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse("aabaab(bbabb)aab"));
109*504c13e8SAndreas Gohr        $this->assertSame([
110*504c13e8SAndreas Gohr            ['a', 'aa', \DOKU_LEXER_MATCHED, 0],
111*504c13e8SAndreas Gohr            ['a', 'b', \DOKU_LEXER_UNMATCHED, 2],
112*504c13e8SAndreas Gohr            ['a', 'aa', \DOKU_LEXER_MATCHED, 3],
113*504c13e8SAndreas Gohr            ['a', 'b', \DOKU_LEXER_UNMATCHED, 5],
114*504c13e8SAndreas Gohr            ['b', '(', \DOKU_LEXER_ENTER, 6],
115*504c13e8SAndreas Gohr            ['b', 'bb', \DOKU_LEXER_MATCHED, 7],
116*504c13e8SAndreas Gohr            ['b', 'a', \DOKU_LEXER_UNMATCHED, 9],
117*504c13e8SAndreas Gohr            ['b', 'bb', \DOKU_LEXER_MATCHED, 10],
118*504c13e8SAndreas Gohr            ['b', ')', \DOKU_LEXER_EXIT, 12],
119*504c13e8SAndreas Gohr            ['a', 'aa', \DOKU_LEXER_MATCHED, 13],
120*504c13e8SAndreas Gohr            ['a', 'b', \DOKU_LEXER_UNMATCHED, 15],
121*504c13e8SAndreas Gohr        ], $handler->recorded);
122*504c13e8SAndreas Gohr    }
123*504c13e8SAndreas Gohr
124*504c13e8SAndreas Gohr    function testSingular()
125*504c13e8SAndreas Gohr    {
126*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
127*504c13e8SAndreas Gohr        $lexer = new Lexer($handler, "a");
128*504c13e8SAndreas Gohr        $lexer->addPattern("a+", "a");
129*504c13e8SAndreas Gohr        $lexer->addSpecialPattern("b+", "a", "b");
130*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse("aabaaxxbbbxx"));
131*504c13e8SAndreas Gohr        $this->assertSame([
132*504c13e8SAndreas Gohr            ['a', 'aa', \DOKU_LEXER_MATCHED, 0],
133*504c13e8SAndreas Gohr            ['b', 'b', \DOKU_LEXER_SPECIAL, 2],
134*504c13e8SAndreas Gohr            ['a', 'aa', \DOKU_LEXER_MATCHED, 3],
135*504c13e8SAndreas Gohr            ['a', 'xx', \DOKU_LEXER_UNMATCHED, 5],
136*504c13e8SAndreas Gohr            ['b', 'bbb', \DOKU_LEXER_SPECIAL, 7],
137*504c13e8SAndreas Gohr            ['a', 'xx', \DOKU_LEXER_UNMATCHED, 10],
138*504c13e8SAndreas Gohr        ], $handler->recorded);
139*504c13e8SAndreas Gohr    }
140*504c13e8SAndreas Gohr
141*504c13e8SAndreas Gohr    function testUnwindTooFar()
142*504c13e8SAndreas Gohr    {
143*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
144*504c13e8SAndreas Gohr        $lexer = new Lexer($handler, "a");
145*504c13e8SAndreas Gohr        $lexer->addPattern("a+", "a");
146*504c13e8SAndreas Gohr        $lexer->addExitPattern(")", "a");
147*504c13e8SAndreas Gohr        $this->assertFalse($lexer->parse("aa)aa"));
148*504c13e8SAndreas Gohr        $this->assertSame([
149*504c13e8SAndreas Gohr            ['a', 'aa', \DOKU_LEXER_MATCHED, 0],
150*504c13e8SAndreas Gohr            ['a', ')', \DOKU_LEXER_EXIT, 2],
151*504c13e8SAndreas Gohr        ], $handler->recorded);
152*504c13e8SAndreas Gohr    }
153*504c13e8SAndreas Gohr
154*504c13e8SAndreas Gohr    function testModeMapping()
155*504c13e8SAndreas Gohr    {
156*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
157*504c13e8SAndreas Gohr        $lexer = new Lexer($handler, "mode_a");
158*504c13e8SAndreas Gohr        $lexer->addPattern("a+", "mode_a");
159*504c13e8SAndreas Gohr        $lexer->addEntryPattern("(", "mode_a", "mode_b");
160*504c13e8SAndreas Gohr        $lexer->addPattern("b+", "mode_b");
161*504c13e8SAndreas Gohr        $lexer->addExitPattern(")", "mode_b");
162*504c13e8SAndreas Gohr        $lexer->mapHandler("mode_a", "a");
163*504c13e8SAndreas Gohr        $lexer->mapHandler("mode_b", "a");
164*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse("aa(bbabb)b"));
165*504c13e8SAndreas Gohr        $this->assertSame([
166*504c13e8SAndreas Gohr            ['a', 'aa', \DOKU_LEXER_MATCHED, 0],
167*504c13e8SAndreas Gohr            ['a', '(', \DOKU_LEXER_ENTER, 2],
168*504c13e8SAndreas Gohr            ['a', 'bb', \DOKU_LEXER_MATCHED, 3],
169*504c13e8SAndreas Gohr            ['a', 'a', \DOKU_LEXER_UNMATCHED, 5],
170*504c13e8SAndreas Gohr            ['a', 'bb', \DOKU_LEXER_MATCHED, 6],
171*504c13e8SAndreas Gohr            ['a', ')', \DOKU_LEXER_EXIT, 8],
172*504c13e8SAndreas Gohr            ['a', 'b', \DOKU_LEXER_UNMATCHED, 9],
173*504c13e8SAndreas Gohr        ], $handler->recorded);
174*504c13e8SAndreas Gohr    }
175*504c13e8SAndreas Gohr
176*504c13e8SAndreas Gohr    function testIndex()
177*504c13e8SAndreas Gohr    {
178*504c13e8SAndreas Gohr        $doc = "aaa<file>bcd</file>eee";
179*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
180*504c13e8SAndreas Gohr        $lexer = new Lexer($handler, "ignore");
181*504c13e8SAndreas Gohr        $lexer->addEntryPattern("<file>", "ignore", "caught");
182*504c13e8SAndreas Gohr        $lexer->addExitPattern("</file>", "caught");
183*504c13e8SAndreas Gohr        $lexer->addSpecialPattern('b', 'caught', 'special');
184*504c13e8SAndreas Gohr        $lexer->mapHandler('special', 'caught');
185*504c13e8SAndreas Gohr        $lexer->addPattern('c', 'caught');
186*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse($doc));
187*504c13e8SAndreas Gohr
188*504c13e8SAndreas Gohr        $caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught'));
189*504c13e8SAndreas Gohr        $this->assertSame([
190*504c13e8SAndreas Gohr            ['caught', '<file>', \DOKU_LEXER_ENTER, strpos($doc, '<file>')],
191*504c13e8SAndreas Gohr            ['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')],
192*504c13e8SAndreas Gohr            ['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')],
193*504c13e8SAndreas Gohr            ['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')],
194*504c13e8SAndreas Gohr            ['caught', '</file>', \DOKU_LEXER_EXIT, strpos($doc, '</file>')],
195*504c13e8SAndreas Gohr        ], $caught);
196*504c13e8SAndreas Gohr    }
197*504c13e8SAndreas Gohr
198*504c13e8SAndreas Gohr    function testIndexLookaheadEqual()
199*504c13e8SAndreas Gohr    {
200*504c13e8SAndreas Gohr        $doc = "aaa<file>bcd</file>eee";
201*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
202*504c13e8SAndreas Gohr        $lexer = new Lexer($handler, "ignore");
203*504c13e8SAndreas Gohr        $lexer->addEntryPattern('<file>(?=.*</file>)', "ignore", "caught");
204*504c13e8SAndreas Gohr        $lexer->addExitPattern("</file>", "caught");
205*504c13e8SAndreas Gohr        $lexer->addSpecialPattern('b', 'caught', 'special');
206*504c13e8SAndreas Gohr        $lexer->mapHandler('special', 'caught');
207*504c13e8SAndreas Gohr        $lexer->addPattern('c', 'caught');
208*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse($doc));
209*504c13e8SAndreas Gohr
210*504c13e8SAndreas Gohr        $caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught'));
211*504c13e8SAndreas Gohr        $this->assertSame([
212*504c13e8SAndreas Gohr            ['caught', '<file>', \DOKU_LEXER_ENTER, strpos($doc, '<file>')],
213*504c13e8SAndreas Gohr            ['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')],
214*504c13e8SAndreas Gohr            ['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')],
215*504c13e8SAndreas Gohr            ['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')],
216*504c13e8SAndreas Gohr            ['caught', '</file>', \DOKU_LEXER_EXIT, strpos($doc, '</file>')],
217*504c13e8SAndreas Gohr        ], $caught);
218*504c13e8SAndreas Gohr    }
219*504c13e8SAndreas Gohr
220*504c13e8SAndreas Gohr    function testIndexLookaheadNotEqual()
221*504c13e8SAndreas Gohr    {
222*504c13e8SAndreas Gohr        $doc = "aaa<file>bcd</file>eee";
223*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
224*504c13e8SAndreas Gohr        $lexer = new Lexer($handler, "ignore");
225*504c13e8SAndreas Gohr        $lexer->addEntryPattern('<file>(?!foo)', "ignore", "caught");
226*504c13e8SAndreas Gohr        $lexer->addExitPattern("</file>", "caught");
227*504c13e8SAndreas Gohr        $lexer->addSpecialPattern('b', 'caught', 'special');
228*504c13e8SAndreas Gohr        $lexer->mapHandler('special', 'caught');
229*504c13e8SAndreas Gohr        $lexer->addPattern('c', 'caught');
230*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse($doc));
231*504c13e8SAndreas Gohr
232*504c13e8SAndreas Gohr        $caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught'));
233*504c13e8SAndreas Gohr        $this->assertSame([
234*504c13e8SAndreas Gohr            ['caught', '<file>', \DOKU_LEXER_ENTER, strpos($doc, '<file>')],
235*504c13e8SAndreas Gohr            ['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')],
236*504c13e8SAndreas Gohr            ['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')],
237*504c13e8SAndreas Gohr            ['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')],
238*504c13e8SAndreas Gohr            ['caught', '</file>', \DOKU_LEXER_EXIT, strpos($doc, '</file>')],
239*504c13e8SAndreas Gohr        ], $caught);
240*504c13e8SAndreas Gohr    }
241*504c13e8SAndreas Gohr
242*504c13e8SAndreas Gohr    function testIndexLookbehindEqual()
243*504c13e8SAndreas Gohr    {
244*504c13e8SAndreas Gohr        $doc = "aaa<file>bcd</file>eee";
245*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
246*504c13e8SAndreas Gohr        $lexer = new Lexer($handler, "ignore");
247*504c13e8SAndreas Gohr        $lexer->addEntryPattern('<file>', "ignore", "caught");
248*504c13e8SAndreas Gohr        $lexer->addExitPattern("(?<=d)</file>", "caught");
249*504c13e8SAndreas Gohr        $lexer->addSpecialPattern('b', 'caught', 'special');
250*504c13e8SAndreas Gohr        $lexer->mapHandler('special', 'caught');
251*504c13e8SAndreas Gohr        $lexer->addPattern('c', 'caught');
252*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse($doc));
253*504c13e8SAndreas Gohr
254*504c13e8SAndreas Gohr        $caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught'));
255*504c13e8SAndreas Gohr        $this->assertSame([
256*504c13e8SAndreas Gohr            ['caught', '<file>', \DOKU_LEXER_ENTER, strpos($doc, '<file>')],
257*504c13e8SAndreas Gohr            ['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')],
258*504c13e8SAndreas Gohr            ['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')],
259*504c13e8SAndreas Gohr            ['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')],
260*504c13e8SAndreas Gohr            ['caught', '</file>', \DOKU_LEXER_EXIT, strpos($doc, '</file>')],
261*504c13e8SAndreas Gohr        ], $caught);
262*504c13e8SAndreas Gohr    }
263*504c13e8SAndreas Gohr
264*504c13e8SAndreas Gohr    function testIndexLookbehindNotEqual()
265*504c13e8SAndreas Gohr    {
266*504c13e8SAndreas Gohr        $doc = "aaa<file>bcd</file>eee";
267*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
268*504c13e8SAndreas Gohr        $lexer = new Lexer($handler, 'ignore');
269*504c13e8SAndreas Gohr        $lexer->addEntryPattern('<file>', 'ignore', 'caught');
270*504c13e8SAndreas Gohr        $lexer->addExitPattern('(?<!c)</file>', 'caught');
271*504c13e8SAndreas Gohr        $lexer->addSpecialPattern('b', 'caught', 'special');
272*504c13e8SAndreas Gohr        $lexer->mapHandler('special', 'caught');
273*504c13e8SAndreas Gohr        $lexer->addPattern('c', 'caught');
274*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse($doc));
275*504c13e8SAndreas Gohr
276*504c13e8SAndreas Gohr        $caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught'));
277*504c13e8SAndreas Gohr        $this->assertSame([
278*504c13e8SAndreas Gohr            ['caught', '<file>', \DOKU_LEXER_ENTER, strpos($doc, '<file>')],
279*504c13e8SAndreas Gohr            ['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')],
280*504c13e8SAndreas Gohr            ['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')],
281*504c13e8SAndreas Gohr            ['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')],
282*504c13e8SAndreas Gohr            ['caught', '</file>', \DOKU_LEXER_EXIT, strpos($doc, '</file>')],
283*504c13e8SAndreas Gohr        ], $caught);
284*504c13e8SAndreas Gohr    }
285*504c13e8SAndreas Gohr
286*504c13e8SAndreas Gohr    /**
287*504c13e8SAndreas Gohr     * This test is primarily to ensure the correct match is chosen
288*504c13e8SAndreas Gohr     * when there are non-captured elements in the pattern.
289*504c13e8SAndreas Gohr     */
290*504c13e8SAndreas Gohr    function testIndexSelectCorrectMatch()
291*504c13e8SAndreas Gohr    {
292*504c13e8SAndreas Gohr        $doc = "ALL FOOLS ARE FOO";
293*504c13e8SAndreas Gohr        $pattern = '\bFOO\b';
294*504c13e8SAndreas Gohr        $handler = new RecordingHandler();
295*504c13e8SAndreas Gohr        $lexer = new Lexer($handler, "ignore");
296*504c13e8SAndreas Gohr        $lexer->addSpecialPattern($pattern, 'ignore', 'caught');
297*504c13e8SAndreas Gohr        $this->assertTrue($lexer->parse($doc));
298*504c13e8SAndreas Gohr
299*504c13e8SAndreas Gohr        $caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught'));
300*504c13e8SAndreas Gohr        $matches = [];
301*504c13e8SAndreas Gohr        preg_match('/' . $pattern . '/', $doc, $matches, PREG_OFFSET_CAPTURE);
302*504c13e8SAndreas Gohr        $this->assertCount(1, $caught);
303*504c13e8SAndreas Gohr        $this->assertSame('FOO', $caught[0][1]);
304*504c13e8SAndreas Gohr        $this->assertSame(\DOKU_LEXER_SPECIAL, $caught[0][2]);
305*504c13e8SAndreas Gohr        $this->assertSame($matches[0][1], $caught[0][3]);
306*504c13e8SAndreas Gohr    }
307*504c13e8SAndreas Gohr}
308