xref: /dokuwiki/_test/tests/Parsing/Lexer/ParallelRegexTest.php (revision 75364f13219a5af44f52c564ea0a62df64c3a17f)
1<?php
2
3namespace dokuwiki\test\Parsing\Lexer;
4
5use dokuwiki\Parsing\Lexer\ParallelRegex;
6
7class ParallelRegexTest extends \DokuWikiTest
8{
9    function testNoPatterns()
10    {
11        $regex = new ParallelRegex(false);
12        $this->assertFalse($regex->split("Hello", $split));
13    }
14
15    function testNoSubject()
16    {
17        $regex = new ParallelRegex(false);
18        $regex->addPattern(".*");
19        $this->assertTrue($regex->split("", $split));
20        $this->assertEquals("", $split[1]);
21    }
22
23    function testMatchAll()
24    {
25        $regex = new ParallelRegex(false);
26        $regex->addPattern(".*");
27        $this->assertTrue($regex->split("Hello", $split));
28        $this->assertEquals("Hello", $split[1]);
29    }
30
31    function testCaseSensitive()
32    {
33        $regex = new ParallelRegex(true);
34        $regex->addPattern("abc");
35        $this->assertTrue($regex->split("abcdef", $split));
36        $this->assertEquals("abc", $split[1]);
37        $regex = new ParallelRegex(true);
38        $regex->addPattern("abc");
39        $this->assertTrue($regex->split("AAABCabcdef", $split));
40        $this->assertEquals("abc", $split[1]);
41    }
42
43    function testCaseInsensitive()
44    {
45        $regex = new ParallelRegex(false);
46        $regex->addPattern("abc");
47        $this->assertTrue($regex->split("abcdef", $split));
48        $this->assertEquals("abc", $split[1]);
49        $regex = new ParallelRegex(false);
50        $regex->addPattern("abc");
51        $this->assertTrue($regex->split("AAABCabcdef", $split));
52        $this->assertEquals("ABC", $split[1]);
53    }
54
55    function testMatchMultiple()
56    {
57        $regex = new ParallelRegex(true);
58        $regex->addPattern("abc");
59        $regex->addPattern("ABC");
60        $this->assertTrue($regex->split("abcdef", $split));
61        $this->assertEquals("abc", $split[1]);
62        $this->assertTrue($regex->split("AAABCabcdef", $split));
63        $this->assertEquals("ABC", $split[1]);
64        $this->assertFalse($regex->split("Hello", $split));
65    }
66
67    function testPatternLabels()
68    {
69        $regex = new ParallelRegex(false);
70        $regex->addPattern("abc", "letter");
71        $regex->addPattern("123", "number");
72        $this->assertEquals("letter", $regex->split("abcdef", $split));
73        $this->assertEquals("abc", $split[1]);
74        $this->assertEquals("number", $regex->split("0123456789", $split));
75        $this->assertEquals("123", $split[1]);
76    }
77
78    function testMatchMultipleWithLookaheadNot()
79    {
80        $regex = new ParallelRegex(true);
81        $regex->addPattern("abc");
82        $regex->addPattern("ABC");
83        $regex->addPattern("a(?!\n).{1}");
84        $this->assertTrue($regex->split("abcdef", $split));
85        $this->assertEquals("abc", $split[1]);
86        $this->assertTrue($regex->split("AAABCabcdef", $split));
87        $this->assertEquals("ABC", $split[1]);
88        $this->assertTrue($regex->split("a\nab", $split));
89        $this->assertEquals("ab", $split[1]);
90        $this->assertFalse($regex->split("Hello", $split));
91    }
92
93    function testMatchSetOptionCaseless()
94    {
95        $regex = new ParallelRegex(true);
96        $regex->addPattern("a(?i)b(?i)c");
97        $this->assertTrue($regex->split("aBc", $split));
98        $this->assertEquals("aBc", $split[1]);
99    }
100
101    function testMatchSetOptionUngreedy()
102    {
103        $regex = new ParallelRegex(true);
104        $regex->addPattern("(?U)\w+");
105        $this->assertTrue($regex->split("aaaaaa", $split));
106        $this->assertEquals("a", $split[1]);
107    }
108
109    function testMatchLookaheadEqual()
110    {
111        $regex = new ParallelRegex(true);
112        $regex->addPattern("\w(?=c)");
113        $this->assertTrue($regex->split("xbyczd", $split));
114        $this->assertEquals("y", $split[1]);
115    }
116
117    function testMatchLookaheadNot()
118    {
119        $regex = new ParallelRegex(true);
120        $regex->addPattern("\w(?!b|c)");
121        $this->assertTrue($regex->split("xbyczd", $split));
122        $this->assertEquals("b", $split[1]);
123    }
124
125    function testMatchLookbehindEqual()
126    {
127        $regex = new ParallelRegex(true);
128        $regex->addPattern("(?<=c)\w");
129        $this->assertTrue($regex->split("xbyczd", $split));
130        $this->assertEquals("z", $split[1]);
131    }
132
133    function testMatchLookbehindNot()
134    {
135        $regex = new ParallelRegex(true);
136        $regex->addPattern("(?<!\A|x|b)\w");
137        $this->assertTrue($regex->split("xbyczd", $split));
138        $this->assertEquals("c", $split[1]);
139    }
140
141    function testSplitReturnsPreAndPostMatch()
142    {
143        $regex = new ParallelRegex(true);
144        $regex->addPattern("abc");
145        $this->assertTrue($regex->split("xxxabcyyy", $split));
146        $this->assertEquals("xxx", $split[0]);
147        $this->assertEquals("abc", $split[1]);
148        $this->assertEquals("yyy", $split[2]);
149    }
150
151    function testSplitWithOffsetSeesLookbehindBeforeOffset()
152    {
153        // Regression: inline-formatting closers like `(?<=[^\s])\*\*` must
154        // see the preceding non-whitespace character even when it was part
155        // of a previously matched token (e.g. the `]` of a `[[link]]`).
156        // With `$offset`, the full subject is passed to PCRE and the
157        // lookbehind works against bytes before the offset.
158        $regex = new ParallelRegex(true);
159        $regex->addPattern('(?<=[^\s])\*\*');
160
161        // Without offset: subject starts with `**`, lookbehind fails.
162        $this->assertFalse((bool) $regex->split("** bar", $split));
163
164        // With offset: full subject passed, lookbehind at offset 10 sees `]`.
165        $this->assertTrue((bool) $regex->split("**[[link]]** bar", $split, 10));
166        $this->assertEquals("", $split[0]);
167        $this->assertEquals("**", $split[1]);
168    }
169
170    function testSplitWithOffsetPreIsBetweenOffsetAndMatch()
171    {
172        $regex = new ParallelRegex(true);
173        $regex->addPattern("abc");
174        // Match at position 6, offset at 3 — pre is "yyy" (bytes 3..5).
175        $this->assertTrue((bool) $regex->split("xxxyyyabczzz", $split, 3));
176        $this->assertEquals("yyy", $split[0]);
177        $this->assertEquals("abc", $split[1]);
178        $this->assertEquals("zzz", $split[2]);
179    }
180}
181