1<?php 2 3namespace dokuwiki\test\Parsing\Lexer; 4 5use dokuwiki\Parsing\Lexer\Lexer; 6 7class LexerTest extends \DokuWikiTest 8{ 9 function testNoPatterns() 10 { 11 $handler = new RecordingHandler(); 12 $lexer = new Lexer($handler); 13 $this->assertFalse($lexer->parse("abcdef")); 14 $this->assertSame([], $handler->recorded); 15 } 16 17 function testEmptyPage() 18 { 19 $handler = new RecordingHandler(); 20 $lexer = new Lexer($handler); 21 $lexer->addPattern("a+"); 22 $this->assertTrue($lexer->parse("")); 23 $this->assertSame([], $handler->recorded); 24 } 25 26 function testSinglePattern() 27 { 28 $handler = new RecordingHandler(); 29 $lexer = new Lexer($handler); 30 $lexer->addPattern("a+"); 31 $this->assertTrue($lexer->parse("aaaxayyyaxaaaz")); 32 $this->assertSame([ 33 ['accept', 'aaa', \DOKU_LEXER_MATCHED, 0], 34 ['accept', 'x', \DOKU_LEXER_UNMATCHED, 3], 35 ['accept', 'a', \DOKU_LEXER_MATCHED, 4], 36 ['accept', 'yyy', \DOKU_LEXER_UNMATCHED, 5], 37 ['accept', 'a', \DOKU_LEXER_MATCHED, 8], 38 ['accept', 'x', \DOKU_LEXER_UNMATCHED, 9], 39 ['accept', 'aaa', \DOKU_LEXER_MATCHED, 10], 40 ['accept', 'z', \DOKU_LEXER_UNMATCHED, 13], 41 ], $handler->recorded); 42 } 43 44 function testMultiplePattern() 45 { 46 $handler = new RecordingHandler(); 47 $lexer = new Lexer($handler); 48 $lexer->addPattern("a+"); 49 $lexer->addPattern("b+"); 50 $this->assertTrue($lexer->parse("ababbxbaxxxxxxax")); 51 $expected = ['a', 'b', 'a', 'bb', 'x', 'b', 'a', 'xxxxxx', 'a', 'x']; 52 $actual = array_column($handler->recorded, 1); 53 $this->assertSame($expected, $actual); 54 } 55 56 function testIsolatedPattern() 57 { 58 $handler = new RecordingHandler(); 59 $lexer = new Lexer($handler, "a"); 60 $lexer->addPattern("a+", "a"); 61 $lexer->addPattern("b+", "b"); 62 $this->assertTrue($lexer->parse("abaabxbaaaxaaaax")); 63 $this->assertSame([ 64 ['a', 'a', \DOKU_LEXER_MATCHED, 0], 65 ['a', 'b', \DOKU_LEXER_UNMATCHED, 1], 66 ['a', 'aa', \DOKU_LEXER_MATCHED, 2], 67 ['a', 'bxb', \DOKU_LEXER_UNMATCHED, 4], 68 ['a', 'aaa', \DOKU_LEXER_MATCHED, 7], 69 ['a', 'x', \DOKU_LEXER_UNMATCHED, 10], 70 ['a', 'aaaa', \DOKU_LEXER_MATCHED, 11], 71 ['a', 'x', \DOKU_LEXER_UNMATCHED, 15], 72 ], $handler->recorded); 73 } 74 75 function testModeChange() 76 { 77 $handler = new RecordingHandler(); 78 $lexer = new Lexer($handler, "a"); 79 $lexer->addPattern("a+", "a"); 80 $lexer->addEntryPattern(":", "a", "b"); 81 $lexer->addPattern("b+", "b"); 82 $this->assertTrue($lexer->parse("abaabaaa:ababbabbba")); 83 $this->assertSame([ 84 ['a', 'a', \DOKU_LEXER_MATCHED, 0], 85 ['a', 'b', \DOKU_LEXER_UNMATCHED, 1], 86 ['a', 'aa', \DOKU_LEXER_MATCHED, 2], 87 ['a', 'b', \DOKU_LEXER_UNMATCHED, 4], 88 ['a', 'aaa', \DOKU_LEXER_MATCHED, 5], 89 ['b', ':', \DOKU_LEXER_ENTER, 8], 90 ['b', 'a', \DOKU_LEXER_UNMATCHED, 9], 91 ['b', 'b', \DOKU_LEXER_MATCHED, 10], 92 ['b', 'a', \DOKU_LEXER_UNMATCHED, 11], 93 ['b', 'bb', \DOKU_LEXER_MATCHED, 12], 94 ['b', 'a', \DOKU_LEXER_UNMATCHED, 14], 95 ['b', 'bbb', \DOKU_LEXER_MATCHED, 15], 96 ['b', 'a', \DOKU_LEXER_UNMATCHED, 18], 97 ], $handler->recorded); 98 } 99 100 function testNesting() 101 { 102 $handler = new RecordingHandler(); 103 $lexer = new Lexer($handler, "a"); 104 $lexer->addPattern("a+", "a"); 105 $lexer->addEntryPattern("(", "a", "b"); 106 $lexer->addPattern("b+", "b"); 107 $lexer->addExitPattern(")", "b"); 108 $this->assertTrue($lexer->parse("aabaab(bbabb)aab")); 109 $this->assertSame([ 110 ['a', 'aa', \DOKU_LEXER_MATCHED, 0], 111 ['a', 'b', \DOKU_LEXER_UNMATCHED, 2], 112 ['a', 'aa', \DOKU_LEXER_MATCHED, 3], 113 ['a', 'b', \DOKU_LEXER_UNMATCHED, 5], 114 ['b', '(', \DOKU_LEXER_ENTER, 6], 115 ['b', 'bb', \DOKU_LEXER_MATCHED, 7], 116 ['b', 'a', \DOKU_LEXER_UNMATCHED, 9], 117 ['b', 'bb', \DOKU_LEXER_MATCHED, 10], 118 ['b', ')', \DOKU_LEXER_EXIT, 12], 119 ['a', 'aa', \DOKU_LEXER_MATCHED, 13], 120 ['a', 'b', \DOKU_LEXER_UNMATCHED, 15], 121 ], $handler->recorded); 122 } 123 124 function testSingular() 125 { 126 $handler = new RecordingHandler(); 127 $lexer = new Lexer($handler, "a"); 128 $lexer->addPattern("a+", "a"); 129 $lexer->addSpecialPattern("b+", "a", "b"); 130 $this->assertTrue($lexer->parse("aabaaxxbbbxx")); 131 $this->assertSame([ 132 ['a', 'aa', \DOKU_LEXER_MATCHED, 0], 133 ['b', 'b', \DOKU_LEXER_SPECIAL, 2], 134 ['a', 'aa', \DOKU_LEXER_MATCHED, 3], 135 ['a', 'xx', \DOKU_LEXER_UNMATCHED, 5], 136 ['b', 'bbb', \DOKU_LEXER_SPECIAL, 7], 137 ['a', 'xx', \DOKU_LEXER_UNMATCHED, 10], 138 ], $handler->recorded); 139 } 140 141 function testUnwindTooFar() 142 { 143 $handler = new RecordingHandler(); 144 $lexer = new Lexer($handler, "a"); 145 $lexer->addPattern("a+", "a"); 146 $lexer->addExitPattern(")", "a"); 147 $this->assertFalse($lexer->parse("aa)aa")); 148 $this->assertSame([ 149 ['a', 'aa', \DOKU_LEXER_MATCHED, 0], 150 ['a', ')', \DOKU_LEXER_EXIT, 2], 151 ], $handler->recorded); 152 } 153 154 function testModeMapping() 155 { 156 $handler = new RecordingHandler(); 157 $lexer = new Lexer($handler, "mode_a"); 158 $lexer->addPattern("a+", "mode_a"); 159 $lexer->addEntryPattern("(", "mode_a", "mode_b"); 160 $lexer->addPattern("b+", "mode_b"); 161 $lexer->addExitPattern(")", "mode_b"); 162 $lexer->mapHandler("mode_a", "a"); 163 $lexer->mapHandler("mode_b", "a"); 164 $this->assertTrue($lexer->parse("aa(bbabb)b")); 165 $this->assertSame([ 166 ['a', 'aa', \DOKU_LEXER_MATCHED, 0], 167 ['a', '(', \DOKU_LEXER_ENTER, 2], 168 ['a', 'bb', \DOKU_LEXER_MATCHED, 3], 169 ['a', 'a', \DOKU_LEXER_UNMATCHED, 5], 170 ['a', 'bb', \DOKU_LEXER_MATCHED, 6], 171 ['a', ')', \DOKU_LEXER_EXIT, 8], 172 ['a', 'b', \DOKU_LEXER_UNMATCHED, 9], 173 ], $handler->recorded); 174 } 175 176 function testIndex() 177 { 178 $doc = "aaa<file>bcd</file>eee"; 179 $handler = new RecordingHandler(); 180 $lexer = new Lexer($handler, "ignore"); 181 $lexer->addEntryPattern("<file>", "ignore", "caught"); 182 $lexer->addExitPattern("</file>", "caught"); 183 $lexer->addSpecialPattern('b', 'caught', 'special'); 184 $lexer->mapHandler('special', 'caught'); 185 $lexer->addPattern('c', 'caught'); 186 $this->assertTrue($lexer->parse($doc)); 187 188 $caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught')); 189 $this->assertSame([ 190 ['caught', '<file>', \DOKU_LEXER_ENTER, strpos($doc, '<file>')], 191 ['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')], 192 ['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')], 193 ['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')], 194 ['caught', '</file>', \DOKU_LEXER_EXIT, strpos($doc, '</file>')], 195 ], $caught); 196 } 197 198 function testIndexLookaheadEqual() 199 { 200 $doc = "aaa<file>bcd</file>eee"; 201 $handler = new RecordingHandler(); 202 $lexer = new Lexer($handler, "ignore"); 203 $lexer->addEntryPattern('<file>(?=.*</file>)', "ignore", "caught"); 204 $lexer->addExitPattern("</file>", "caught"); 205 $lexer->addSpecialPattern('b', 'caught', 'special'); 206 $lexer->mapHandler('special', 'caught'); 207 $lexer->addPattern('c', 'caught'); 208 $this->assertTrue($lexer->parse($doc)); 209 210 $caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught')); 211 $this->assertSame([ 212 ['caught', '<file>', \DOKU_LEXER_ENTER, strpos($doc, '<file>')], 213 ['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')], 214 ['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')], 215 ['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')], 216 ['caught', '</file>', \DOKU_LEXER_EXIT, strpos($doc, '</file>')], 217 ], $caught); 218 } 219 220 function testIndexLookaheadNotEqual() 221 { 222 $doc = "aaa<file>bcd</file>eee"; 223 $handler = new RecordingHandler(); 224 $lexer = new Lexer($handler, "ignore"); 225 $lexer->addEntryPattern('<file>(?!foo)', "ignore", "caught"); 226 $lexer->addExitPattern("</file>", "caught"); 227 $lexer->addSpecialPattern('b', 'caught', 'special'); 228 $lexer->mapHandler('special', 'caught'); 229 $lexer->addPattern('c', 'caught'); 230 $this->assertTrue($lexer->parse($doc)); 231 232 $caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught')); 233 $this->assertSame([ 234 ['caught', '<file>', \DOKU_LEXER_ENTER, strpos($doc, '<file>')], 235 ['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')], 236 ['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')], 237 ['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')], 238 ['caught', '</file>', \DOKU_LEXER_EXIT, strpos($doc, '</file>')], 239 ], $caught); 240 } 241 242 function testIndexLookbehindEqual() 243 { 244 $doc = "aaa<file>bcd</file>eee"; 245 $handler = new RecordingHandler(); 246 $lexer = new Lexer($handler, "ignore"); 247 $lexer->addEntryPattern('<file>', "ignore", "caught"); 248 $lexer->addExitPattern("(?<=d)</file>", "caught"); 249 $lexer->addSpecialPattern('b', 'caught', 'special'); 250 $lexer->mapHandler('special', 'caught'); 251 $lexer->addPattern('c', 'caught'); 252 $this->assertTrue($lexer->parse($doc)); 253 254 $caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught')); 255 $this->assertSame([ 256 ['caught', '<file>', \DOKU_LEXER_ENTER, strpos($doc, '<file>')], 257 ['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')], 258 ['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')], 259 ['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')], 260 ['caught', '</file>', \DOKU_LEXER_EXIT, strpos($doc, '</file>')], 261 ], $caught); 262 } 263 264 function testIndexLookbehindNotEqual() 265 { 266 $doc = "aaa<file>bcd</file>eee"; 267 $handler = new RecordingHandler(); 268 $lexer = new Lexer($handler, 'ignore'); 269 $lexer->addEntryPattern('<file>', 'ignore', 'caught'); 270 $lexer->addExitPattern('(?<!c)</file>', 'caught'); 271 $lexer->addSpecialPattern('b', 'caught', 'special'); 272 $lexer->mapHandler('special', 'caught'); 273 $lexer->addPattern('c', 'caught'); 274 $this->assertTrue($lexer->parse($doc)); 275 276 $caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught')); 277 $this->assertSame([ 278 ['caught', '<file>', \DOKU_LEXER_ENTER, strpos($doc, '<file>')], 279 ['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')], 280 ['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')], 281 ['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')], 282 ['caught', '</file>', \DOKU_LEXER_EXIT, strpos($doc, '</file>')], 283 ], $caught); 284 } 285 286 /** 287 * This test is primarily to ensure the correct match is chosen 288 * when there are non-captured elements in the pattern. 289 */ 290 function testIndexSelectCorrectMatch() 291 { 292 $doc = "ALL FOOLS ARE FOO"; 293 $pattern = '\bFOO\b'; 294 $handler = new RecordingHandler(); 295 $lexer = new Lexer($handler, "ignore"); 296 $lexer->addSpecialPattern($pattern, 'ignore', 'caught'); 297 $this->assertTrue($lexer->parse($doc)); 298 299 $caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught')); 300 $matches = []; 301 preg_match('/' . $pattern . '/', $doc, $matches, PREG_OFFSET_CAPTURE); 302 $this->assertCount(1, $caught); 303 $this->assertSame('FOO', $caught[0][1]); 304 $this->assertSame(\DOKU_LEXER_SPECIAL, $caught[0][2]); 305 $this->assertSame($matches[0][1], $caught[0][3]); 306 } 307} 308