xref: /dokuwiki/_test/tests/Parsing/ParserMode/GfmEscapeTest.php (revision 13a62f810fbd091d15ab734b467eaec0a6bf829a)
174031e46SAndreas Gohr<?php
274031e46SAndreas Gohr
374031e46SAndreas Gohrnamespace dokuwiki\test\Parsing\ParserMode;
474031e46SAndreas Gohr
574031e46SAndreas Gohruse dokuwiki\Parsing\ModeRegistry;
674031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmBacktickSingle;
774031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmEmphasis;
874031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmEscape;
974031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmHeader;
1074031e46SAndreas Gohr
1174031e46SAndreas Gohr/**
1274031e46SAndreas Gohr * Tests for the GFM backslash-escape mode.
1374031e46SAndreas Gohr */
1474031e46SAndreas Gohrclass GfmEscapeTest extends ParserTestBase
1574031e46SAndreas Gohr{
1674031e46SAndreas Gohr    public function setUp(): void
1774031e46SAndreas Gohr    {
1874031e46SAndreas Gohr        parent::setUp();
1974031e46SAndreas Gohr        global $conf;
20*13a62f81SAndreas Gohr        $conf['syntax'] = 'md';
2174031e46SAndreas Gohr        ModeRegistry::reset();
2274031e46SAndreas Gohr    }
2374031e46SAndreas Gohr
2474031e46SAndreas Gohr    public function tearDown(): void
2574031e46SAndreas Gohr    {
2674031e46SAndreas Gohr        ModeRegistry::reset();
2774031e46SAndreas Gohr        parent::tearDown();
2874031e46SAndreas Gohr    }
2974031e46SAndreas Gohr
3074031e46SAndreas Gohr    /**
3174031e46SAndreas Gohr     * Every ASCII punctuation character is escapable per GFM §6.1.
3274031e46SAndreas Gohr     *
3374031e46SAndreas Gohr     * @dataProvider provideEscapableChars
3474031e46SAndreas Gohr     */
3574031e46SAndreas Gohr    function testEscapableAsciiPunctuationProducesLiteral(string $char)
3674031e46SAndreas Gohr    {
3774031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
3874031e46SAndreas Gohr        $this->P->parse('foo \\' . $char . ' bar');
3974031e46SAndreas Gohr
4074031e46SAndreas Gohr        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
4174031e46SAndreas Gohr        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
4274031e46SAndreas Gohr
4374031e46SAndreas Gohr        $this->assertSame("\nfoo " . $char . ' bar', $joined,
4474031e46SAndreas Gohr            "Escaped {$char} must collapse to the literal char in cdata stream");
4574031e46SAndreas Gohr    }
4674031e46SAndreas Gohr
4774031e46SAndreas Gohr    public static function provideEscapableChars(): array
4874031e46SAndreas Gohr    {
4974031e46SAndreas Gohr        $chars = str_split('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~');
5074031e46SAndreas Gohr        return array_combine(
5174031e46SAndreas Gohr            array_map(static fn($c) => 'char_' . bin2hex($c), $chars),
5274031e46SAndreas Gohr            array_map(static fn($c) => [$c], $chars),
5374031e46SAndreas Gohr        );
5474031e46SAndreas Gohr    }
5574031e46SAndreas Gohr
5674031e46SAndreas Gohr    /**
5774031e46SAndreas Gohr     * Backslash before non-ASCII-punctuation stays literal — letters,
5874031e46SAndreas Gohr     * digits, multibyte chars, spaces, and tabs are not escapable. The
5974031e46SAndreas Gohr     * pattern simply doesn't match, so the bytes flow through as cdata.
6074031e46SAndreas Gohr     *
6174031e46SAndreas Gohr     * @dataProvider provideNonEscapableChars
6274031e46SAndreas Gohr     */
6374031e46SAndreas Gohr    function testNonEscapableCharsKeepBackslash(string $tail)
6474031e46SAndreas Gohr    {
6574031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
6674031e46SAndreas Gohr        $this->P->parse('a \\' . $tail . ' b');
6774031e46SAndreas Gohr
6874031e46SAndreas Gohr        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
6974031e46SAndreas Gohr        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
7074031e46SAndreas Gohr
7174031e46SAndreas Gohr        $this->assertSame("\na \\" . $tail . ' b', $joined);
7274031e46SAndreas Gohr    }
7374031e46SAndreas Gohr
7474031e46SAndreas Gohr    public static function provideNonEscapableChars(): array
7574031e46SAndreas Gohr    {
7674031e46SAndreas Gohr        return [
7774031e46SAndreas Gohr            'letter_upper' => ['A'],
7874031e46SAndreas Gohr            'letter_lower' => ['a'],
7974031e46SAndreas Gohr            'digit'        => ['3'],
8074031e46SAndreas Gohr            'multibyte'    => ['α'],
8174031e46SAndreas Gohr            'space'        => [' '],
8274031e46SAndreas Gohr            'tab'          => ["\t"],
8374031e46SAndreas Gohr        ];
8474031e46SAndreas Gohr    }
8574031e46SAndreas Gohr
8674031e46SAndreas Gohr    function testDoubleBackslashCollapsesToSingleBackslash()
8774031e46SAndreas Gohr    {
8874031e46SAndreas Gohr        // \\ is the escaped-backslash form. The first char in the match
8974031e46SAndreas Gohr        // is consumed as the escape introducer; the second is emitted as
9074031e46SAndreas Gohr        // a literal backslash.
9174031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
9274031e46SAndreas Gohr        $this->P->parse('foo \\\\ bar');
9374031e46SAndreas Gohr
9474031e46SAndreas Gohr        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
9574031e46SAndreas Gohr        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
9674031e46SAndreas Gohr
9774031e46SAndreas Gohr        $this->assertSame("\nfoo \\ bar", $joined);
9874031e46SAndreas Gohr    }
9974031e46SAndreas Gohr
10074031e46SAndreas Gohr    function testEscapedAsteriskBlocksEmphasis()
10174031e46SAndreas Gohr    {
10274031e46SAndreas Gohr        // GFM spec example 310 fragment. \* must consume the asterisk
10374031e46SAndreas Gohr        // before GfmEmphasis can use it as an opener.
10474031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
10574031e46SAndreas Gohr        $this->P->addMode('gfm_emphasis', new GfmEmphasis());
10674031e46SAndreas Gohr        $this->P->parse('\\*not emphasized*');
10774031e46SAndreas Gohr
10874031e46SAndreas Gohr        $modes = array_column($this->H->calls, 0);
10974031e46SAndreas Gohr        $this->assertNotContains('emphasis_open', $modes,
11074031e46SAndreas Gohr            'Escaped opener must not start emphasis');
11174031e46SAndreas Gohr    }
11274031e46SAndreas Gohr
11374031e46SAndreas Gohr    function testEscapedBackslashThenEmphasisOpens()
11474031e46SAndreas Gohr    {
11574031e46SAndreas Gohr        // GFM spec example 311. \\ collapses to a literal backslash, and
11674031e46SAndreas Gohr        // the *emphasis* that follows is now seen by GfmEmphasis with
11774031e46SAndreas Gohr        // its full text intact.
11874031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
11974031e46SAndreas Gohr        $this->P->addMode('gfm_emphasis', new GfmEmphasis());
12074031e46SAndreas Gohr        $this->P->parse('\\\\*emphasis*');
12174031e46SAndreas Gohr
12274031e46SAndreas Gohr        $modes = array_column($this->H->calls, 0);
12374031e46SAndreas Gohr        $this->assertContains('emphasis_open', $modes,
12474031e46SAndreas Gohr            'After \\\\ collapses, the surviving *emphasis* must open emphasis');
12574031e46SAndreas Gohr    }
12674031e46SAndreas Gohr
12774031e46SAndreas Gohr    function testEscapedHashBlocksHeader()
12874031e46SAndreas Gohr    {
12974031e46SAndreas Gohr        // \# must defeat GfmHeader's column-0 # match. The trailing text
13074031e46SAndreas Gohr        // becomes a normal paragraph instead.
13174031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
13274031e46SAndreas Gohr        $this->P->addMode('gfm_header', new GfmHeader());
13374031e46SAndreas Gohr        $this->P->parse("\\# not a heading");
13474031e46SAndreas Gohr
13574031e46SAndreas Gohr        $modes = array_column($this->H->calls, 0);
13674031e46SAndreas Gohr        $this->assertNotContains('header', $modes,
13774031e46SAndreas Gohr            'Escaped # must not produce a header');
13874031e46SAndreas Gohr    }
13974031e46SAndreas Gohr
14074031e46SAndreas Gohr    function testNoEscapeInsideBacktickSpan()
14174031e46SAndreas Gohr    {
14274031e46SAndreas Gohr        // GFM spec example 313. The whole `\[\`` is captured by
14374031e46SAndreas Gohr        // GfmBacktickSingle in one regex shot, so GfmEscape never runs
14474031e46SAndreas Gohr        // on its body. The body must retain the literal backslashes.
14574031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
14674031e46SAndreas Gohr        $this->P->addMode('gfm_backtick_single', new GfmBacktickSingle());
14774031e46SAndreas Gohr        $this->P->parse('`\\[\\`');
14874031e46SAndreas Gohr
14974031e46SAndreas Gohr        $unformatted = array_filter($this->H->calls, static fn($c) => $c[0] === 'unformatted');
15074031e46SAndreas Gohr        $bodies = array_map(static fn($c) => $c[1][0], $unformatted);
15174031e46SAndreas Gohr        $this->assertContains('\\[\\', $bodies,
15274031e46SAndreas Gohr            'Backtick span body must preserve the literal backslashes');
15374031e46SAndreas Gohr    }
15474031e46SAndreas Gohr
15574031e46SAndreas Gohr    function testSortValue()
15674031e46SAndreas Gohr    {
15774031e46SAndreas Gohr        $mode = new GfmEscape();
15874031e46SAndreas Gohr        $this->assertSame(5, $mode->getSort());
15974031e46SAndreas Gohr    }
16074031e46SAndreas Gohr}
161