assertFalse($lexer->parse("abcdef"));
$this->assertSame([], $handler->recorded);
}
function testEmptyPage()
{
$handler = new RecordingHandler();
$lexer = new Lexer($handler);
$lexer->addPattern("a+");
$this->assertTrue($lexer->parse(""));
$this->assertSame([], $handler->recorded);
}
function testSinglePattern()
{
$handler = new RecordingHandler();
$lexer = new Lexer($handler);
$lexer->addPattern("a+");
$this->assertTrue($lexer->parse("aaaxayyyaxaaaz"));
$this->assertSame([
['accept', 'aaa', \DOKU_LEXER_MATCHED, 0],
['accept', 'x', \DOKU_LEXER_UNMATCHED, 3],
['accept', 'a', \DOKU_LEXER_MATCHED, 4],
['accept', 'yyy', \DOKU_LEXER_UNMATCHED, 5],
['accept', 'a', \DOKU_LEXER_MATCHED, 8],
['accept', 'x', \DOKU_LEXER_UNMATCHED, 9],
['accept', 'aaa', \DOKU_LEXER_MATCHED, 10],
['accept', 'z', \DOKU_LEXER_UNMATCHED, 13],
], $handler->recorded);
}
function testMultiplePattern()
{
$handler = new RecordingHandler();
$lexer = new Lexer($handler);
$lexer->addPattern("a+");
$lexer->addPattern("b+");
$this->assertTrue($lexer->parse("ababbxbaxxxxxxax"));
$expected = ['a', 'b', 'a', 'bb', 'x', 'b', 'a', 'xxxxxx', 'a', 'x'];
$actual = array_column($handler->recorded, 1);
$this->assertSame($expected, $actual);
}
function testIsolatedPattern()
{
$handler = new RecordingHandler();
$lexer = new Lexer($handler, "a");
$lexer->addPattern("a+", "a");
$lexer->addPattern("b+", "b");
$this->assertTrue($lexer->parse("abaabxbaaaxaaaax"));
$this->assertSame([
['a', 'a', \DOKU_LEXER_MATCHED, 0],
['a', 'b', \DOKU_LEXER_UNMATCHED, 1],
['a', 'aa', \DOKU_LEXER_MATCHED, 2],
['a', 'bxb', \DOKU_LEXER_UNMATCHED, 4],
['a', 'aaa', \DOKU_LEXER_MATCHED, 7],
['a', 'x', \DOKU_LEXER_UNMATCHED, 10],
['a', 'aaaa', \DOKU_LEXER_MATCHED, 11],
['a', 'x', \DOKU_LEXER_UNMATCHED, 15],
], $handler->recorded);
}
function testModeChange()
{
$handler = new RecordingHandler();
$lexer = new Lexer($handler, "a");
$lexer->addPattern("a+", "a");
$lexer->addEntryPattern(":", "a", "b");
$lexer->addPattern("b+", "b");
$this->assertTrue($lexer->parse("abaabaaa:ababbabbba"));
$this->assertSame([
['a', 'a', \DOKU_LEXER_MATCHED, 0],
['a', 'b', \DOKU_LEXER_UNMATCHED, 1],
['a', 'aa', \DOKU_LEXER_MATCHED, 2],
['a', 'b', \DOKU_LEXER_UNMATCHED, 4],
['a', 'aaa', \DOKU_LEXER_MATCHED, 5],
['b', ':', \DOKU_LEXER_ENTER, 8],
['b', 'a', \DOKU_LEXER_UNMATCHED, 9],
['b', 'b', \DOKU_LEXER_MATCHED, 10],
['b', 'a', \DOKU_LEXER_UNMATCHED, 11],
['b', 'bb', \DOKU_LEXER_MATCHED, 12],
['b', 'a', \DOKU_LEXER_UNMATCHED, 14],
['b', 'bbb', \DOKU_LEXER_MATCHED, 15],
['b', 'a', \DOKU_LEXER_UNMATCHED, 18],
], $handler->recorded);
}
function testNesting()
{
$handler = new RecordingHandler();
$lexer = new Lexer($handler, "a");
$lexer->addPattern("a+", "a");
$lexer->addEntryPattern("(", "a", "b");
$lexer->addPattern("b+", "b");
$lexer->addExitPattern(")", "b");
$this->assertTrue($lexer->parse("aabaab(bbabb)aab"));
$this->assertSame([
['a', 'aa', \DOKU_LEXER_MATCHED, 0],
['a', 'b', \DOKU_LEXER_UNMATCHED, 2],
['a', 'aa', \DOKU_LEXER_MATCHED, 3],
['a', 'b', \DOKU_LEXER_UNMATCHED, 5],
['b', '(', \DOKU_LEXER_ENTER, 6],
['b', 'bb', \DOKU_LEXER_MATCHED, 7],
['b', 'a', \DOKU_LEXER_UNMATCHED, 9],
['b', 'bb', \DOKU_LEXER_MATCHED, 10],
['b', ')', \DOKU_LEXER_EXIT, 12],
['a', 'aa', \DOKU_LEXER_MATCHED, 13],
['a', 'b', \DOKU_LEXER_UNMATCHED, 15],
], $handler->recorded);
}
function testSingular()
{
$handler = new RecordingHandler();
$lexer = new Lexer($handler, "a");
$lexer->addPattern("a+", "a");
$lexer->addSpecialPattern("b+", "a", "b");
$this->assertTrue($lexer->parse("aabaaxxbbbxx"));
$this->assertSame([
['a', 'aa', \DOKU_LEXER_MATCHED, 0],
['b', 'b', \DOKU_LEXER_SPECIAL, 2],
['a', 'aa', \DOKU_LEXER_MATCHED, 3],
['a', 'xx', \DOKU_LEXER_UNMATCHED, 5],
['b', 'bbb', \DOKU_LEXER_SPECIAL, 7],
['a', 'xx', \DOKU_LEXER_UNMATCHED, 10],
], $handler->recorded);
}
function testUnwindTooFar()
{
$handler = new RecordingHandler();
$lexer = new Lexer($handler, "a");
$lexer->addPattern("a+", "a");
$lexer->addExitPattern(")", "a");
$this->assertFalse($lexer->parse("aa)aa"));
$this->assertSame([
['a', 'aa', \DOKU_LEXER_MATCHED, 0],
['a', ')', \DOKU_LEXER_EXIT, 2],
], $handler->recorded);
}
function testModeMapping()
{
$handler = new RecordingHandler();
$lexer = new Lexer($handler, "mode_a");
$lexer->addPattern("a+", "mode_a");
$lexer->addEntryPattern("(", "mode_a", "mode_b");
$lexer->addPattern("b+", "mode_b");
$lexer->addExitPattern(")", "mode_b");
$lexer->mapHandler("mode_a", "a");
$lexer->mapHandler("mode_b", "a");
$this->assertTrue($lexer->parse("aa(bbabb)b"));
$this->assertSame([
['a', 'aa', \DOKU_LEXER_MATCHED, 0],
['a', '(', \DOKU_LEXER_ENTER, 2],
['a', 'bb', \DOKU_LEXER_MATCHED, 3],
['a', 'a', \DOKU_LEXER_UNMATCHED, 5],
['a', 'bb', \DOKU_LEXER_MATCHED, 6],
['a', ')', \DOKU_LEXER_EXIT, 8],
['a', 'b', \DOKU_LEXER_UNMATCHED, 9],
], $handler->recorded);
}
function testIndex()
{
$doc = "aaabcdeee";
$handler = new RecordingHandler();
$lexer = new Lexer($handler, "ignore");
$lexer->addEntryPattern("", "ignore", "caught");
$lexer->addExitPattern("", "caught");
$lexer->addSpecialPattern('b', 'caught', 'special');
$lexer->mapHandler('special', 'caught');
$lexer->addPattern('c', 'caught');
$this->assertTrue($lexer->parse($doc));
$caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught'));
$this->assertSame([
['caught', '', \DOKU_LEXER_ENTER, strpos($doc, '')],
['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')],
['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')],
['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')],
['caught', '', \DOKU_LEXER_EXIT, strpos($doc, '')],
], $caught);
}
function testIndexLookaheadEqual()
{
$doc = "aaabcdeee";
$handler = new RecordingHandler();
$lexer = new Lexer($handler, "ignore");
$lexer->addEntryPattern('(?=.*)', "ignore", "caught");
$lexer->addExitPattern("", "caught");
$lexer->addSpecialPattern('b', 'caught', 'special');
$lexer->mapHandler('special', 'caught');
$lexer->addPattern('c', 'caught');
$this->assertTrue($lexer->parse($doc));
$caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught'));
$this->assertSame([
['caught', '', \DOKU_LEXER_ENTER, strpos($doc, '')],
['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')],
['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')],
['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')],
['caught', '', \DOKU_LEXER_EXIT, strpos($doc, '')],
], $caught);
}
function testIndexLookaheadNotEqual()
{
$doc = "aaabcdeee";
$handler = new RecordingHandler();
$lexer = new Lexer($handler, "ignore");
$lexer->addEntryPattern('(?!foo)', "ignore", "caught");
$lexer->addExitPattern("", "caught");
$lexer->addSpecialPattern('b', 'caught', 'special');
$lexer->mapHandler('special', 'caught');
$lexer->addPattern('c', 'caught');
$this->assertTrue($lexer->parse($doc));
$caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught'));
$this->assertSame([
['caught', '', \DOKU_LEXER_ENTER, strpos($doc, '')],
['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')],
['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')],
['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')],
['caught', '', \DOKU_LEXER_EXIT, strpos($doc, '')],
], $caught);
}
function testIndexLookbehindEqual()
{
$doc = "aaabcdeee";
$handler = new RecordingHandler();
$lexer = new Lexer($handler, "ignore");
$lexer->addEntryPattern('', "ignore", "caught");
$lexer->addExitPattern("(?<=d)", "caught");
$lexer->addSpecialPattern('b', 'caught', 'special');
$lexer->mapHandler('special', 'caught');
$lexer->addPattern('c', 'caught');
$this->assertTrue($lexer->parse($doc));
$caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught'));
$this->assertSame([
['caught', '', \DOKU_LEXER_ENTER, strpos($doc, '')],
['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')],
['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')],
['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')],
['caught', '', \DOKU_LEXER_EXIT, strpos($doc, '')],
], $caught);
}
function testIndexLookbehindNotEqual()
{
$doc = "aaabcdeee";
$handler = new RecordingHandler();
$lexer = new Lexer($handler, 'ignore');
$lexer->addEntryPattern('', 'ignore', 'caught');
$lexer->addExitPattern('(?', 'caught');
$lexer->addSpecialPattern('b', 'caught', 'special');
$lexer->mapHandler('special', 'caught');
$lexer->addPattern('c', 'caught');
$this->assertTrue($lexer->parse($doc));
$caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught'));
$this->assertSame([
['caught', '', \DOKU_LEXER_ENTER, strpos($doc, '')],
['caught', 'b', \DOKU_LEXER_SPECIAL, strpos($doc, 'b')],
['caught', 'c', \DOKU_LEXER_MATCHED, strpos($doc, 'c')],
['caught', 'd', \DOKU_LEXER_UNMATCHED, strpos($doc, 'd')],
['caught', '', \DOKU_LEXER_EXIT, strpos($doc, '')],
], $caught);
}
/**
* This test is primarily to ensure the correct match is chosen
* when there are non-captured elements in the pattern.
*/
function testIndexSelectCorrectMatch()
{
$doc = "ALL FOOLS ARE FOO";
$pattern = '\bFOO\b';
$handler = new RecordingHandler();
$lexer = new Lexer($handler, "ignore");
$lexer->addSpecialPattern($pattern, 'ignore', 'caught');
$this->assertTrue($lexer->parse($doc));
$caught = array_values(array_filter($handler->recorded, fn($c) => $c[0] === 'caught'));
$matches = [];
preg_match('/' . $pattern . '/', $doc, $matches, PREG_OFFSET_CAPTURE);
$this->assertCount(1, $caught);
$this->assertSame('FOO', $caught[0][1]);
$this->assertSame(\DOKU_LEXER_SPECIAL, $caught[0][2]);
$this->assertSame($matches[0][1], $caught[0][3]);
}
}