xref: /dokuwiki/_test/tests/Parsing/ParserMode/GfmEscapeTest.php (revision 74031e463764923581b9204cebc0fc3f34ce881f)
1*74031e46SAndreas Gohr<?php
2*74031e46SAndreas Gohr
3*74031e46SAndreas Gohrnamespace dokuwiki\test\Parsing\ParserMode;
4*74031e46SAndreas Gohr
5*74031e46SAndreas Gohruse dokuwiki\Parsing\ModeRegistry;
6*74031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmBacktickSingle;
7*74031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmEmphasis;
8*74031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmEscape;
9*74031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmHeader;
10*74031e46SAndreas Gohr
11*74031e46SAndreas Gohr/**
12*74031e46SAndreas Gohr * Tests for the GFM backslash-escape mode.
13*74031e46SAndreas Gohr */
14*74031e46SAndreas Gohrclass GfmEscapeTest extends ParserTestBase
15*74031e46SAndreas Gohr{
16*74031e46SAndreas Gohr    public function setUp(): void
17*74031e46SAndreas Gohr    {
18*74031e46SAndreas Gohr        parent::setUp();
19*74031e46SAndreas Gohr        global $conf;
20*74031e46SAndreas Gohr        $conf['syntax'] = 'markdown';
21*74031e46SAndreas Gohr        ModeRegistry::reset();
22*74031e46SAndreas Gohr    }
23*74031e46SAndreas Gohr
24*74031e46SAndreas Gohr    public function tearDown(): void
25*74031e46SAndreas Gohr    {
26*74031e46SAndreas Gohr        ModeRegistry::reset();
27*74031e46SAndreas Gohr        parent::tearDown();
28*74031e46SAndreas Gohr    }
29*74031e46SAndreas Gohr
30*74031e46SAndreas Gohr    /**
31*74031e46SAndreas Gohr     * Every ASCII punctuation character is escapable per GFM §6.1.
32*74031e46SAndreas Gohr     *
33*74031e46SAndreas Gohr     * @dataProvider provideEscapableChars
34*74031e46SAndreas Gohr     */
35*74031e46SAndreas Gohr    function testEscapableAsciiPunctuationProducesLiteral(string $char)
36*74031e46SAndreas Gohr    {
37*74031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
38*74031e46SAndreas Gohr        $this->P->parse('foo \\' . $char . ' bar');
39*74031e46SAndreas Gohr
40*74031e46SAndreas Gohr        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
41*74031e46SAndreas Gohr        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
42*74031e46SAndreas Gohr
43*74031e46SAndreas Gohr        $this->assertSame("\nfoo " . $char . ' bar', $joined,
44*74031e46SAndreas Gohr            "Escaped {$char} must collapse to the literal char in cdata stream");
45*74031e46SAndreas Gohr    }
46*74031e46SAndreas Gohr
47*74031e46SAndreas Gohr    public static function provideEscapableChars(): array
48*74031e46SAndreas Gohr    {
49*74031e46SAndreas Gohr        $chars = str_split('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~');
50*74031e46SAndreas Gohr        return array_combine(
51*74031e46SAndreas Gohr            array_map(static fn($c) => 'char_' . bin2hex($c), $chars),
52*74031e46SAndreas Gohr            array_map(static fn($c) => [$c], $chars),
53*74031e46SAndreas Gohr        );
54*74031e46SAndreas Gohr    }
55*74031e46SAndreas Gohr
56*74031e46SAndreas Gohr    /**
57*74031e46SAndreas Gohr     * Backslash before non-ASCII-punctuation stays literal — letters,
58*74031e46SAndreas Gohr     * digits, multibyte chars, spaces, and tabs are not escapable. The
59*74031e46SAndreas Gohr     * pattern simply doesn't match, so the bytes flow through as cdata.
60*74031e46SAndreas Gohr     *
61*74031e46SAndreas Gohr     * @dataProvider provideNonEscapableChars
62*74031e46SAndreas Gohr     */
63*74031e46SAndreas Gohr    function testNonEscapableCharsKeepBackslash(string $tail)
64*74031e46SAndreas Gohr    {
65*74031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
66*74031e46SAndreas Gohr        $this->P->parse('a \\' . $tail . ' b');
67*74031e46SAndreas Gohr
68*74031e46SAndreas Gohr        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
69*74031e46SAndreas Gohr        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
70*74031e46SAndreas Gohr
71*74031e46SAndreas Gohr        $this->assertSame("\na \\" . $tail . ' b', $joined);
72*74031e46SAndreas Gohr    }
73*74031e46SAndreas Gohr
74*74031e46SAndreas Gohr    public static function provideNonEscapableChars(): array
75*74031e46SAndreas Gohr    {
76*74031e46SAndreas Gohr        return [
77*74031e46SAndreas Gohr            'letter_upper' => ['A'],
78*74031e46SAndreas Gohr            'letter_lower' => ['a'],
79*74031e46SAndreas Gohr            'digit'        => ['3'],
80*74031e46SAndreas Gohr            'multibyte'    => ['α'],
81*74031e46SAndreas Gohr            'space'        => [' '],
82*74031e46SAndreas Gohr            'tab'          => ["\t"],
83*74031e46SAndreas Gohr        ];
84*74031e46SAndreas Gohr    }
85*74031e46SAndreas Gohr
86*74031e46SAndreas Gohr    function testDoubleBackslashCollapsesToSingleBackslash()
87*74031e46SAndreas Gohr    {
88*74031e46SAndreas Gohr        // \\ is the escaped-backslash form. The first char in the match
89*74031e46SAndreas Gohr        // is consumed as the escape introducer; the second is emitted as
90*74031e46SAndreas Gohr        // a literal backslash.
91*74031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
92*74031e46SAndreas Gohr        $this->P->parse('foo \\\\ bar');
93*74031e46SAndreas Gohr
94*74031e46SAndreas Gohr        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
95*74031e46SAndreas Gohr        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
96*74031e46SAndreas Gohr
97*74031e46SAndreas Gohr        $this->assertSame("\nfoo \\ bar", $joined);
98*74031e46SAndreas Gohr    }
99*74031e46SAndreas Gohr
100*74031e46SAndreas Gohr    function testEscapedAsteriskBlocksEmphasis()
101*74031e46SAndreas Gohr    {
102*74031e46SAndreas Gohr        // GFM spec example 310 fragment. \* must consume the asterisk
103*74031e46SAndreas Gohr        // before GfmEmphasis can use it as an opener.
104*74031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
105*74031e46SAndreas Gohr        $this->P->addMode('gfm_emphasis', new GfmEmphasis());
106*74031e46SAndreas Gohr        $this->P->parse('\\*not emphasized*');
107*74031e46SAndreas Gohr
108*74031e46SAndreas Gohr        $modes = array_column($this->H->calls, 0);
109*74031e46SAndreas Gohr        $this->assertNotContains('emphasis_open', $modes,
110*74031e46SAndreas Gohr            'Escaped opener must not start emphasis');
111*74031e46SAndreas Gohr    }
112*74031e46SAndreas Gohr
113*74031e46SAndreas Gohr    function testEscapedBackslashThenEmphasisOpens()
114*74031e46SAndreas Gohr    {
115*74031e46SAndreas Gohr        // GFM spec example 311. \\ collapses to a literal backslash, and
116*74031e46SAndreas Gohr        // the *emphasis* that follows is now seen by GfmEmphasis with
117*74031e46SAndreas Gohr        // its full text intact.
118*74031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
119*74031e46SAndreas Gohr        $this->P->addMode('gfm_emphasis', new GfmEmphasis());
120*74031e46SAndreas Gohr        $this->P->parse('\\\\*emphasis*');
121*74031e46SAndreas Gohr
122*74031e46SAndreas Gohr        $modes = array_column($this->H->calls, 0);
123*74031e46SAndreas Gohr        $this->assertContains('emphasis_open', $modes,
124*74031e46SAndreas Gohr            'After \\\\ collapses, the surviving *emphasis* must open emphasis');
125*74031e46SAndreas Gohr    }
126*74031e46SAndreas Gohr
127*74031e46SAndreas Gohr    function testEscapedHashBlocksHeader()
128*74031e46SAndreas Gohr    {
129*74031e46SAndreas Gohr        // \# must defeat GfmHeader's column-0 # match. The trailing text
130*74031e46SAndreas Gohr        // becomes a normal paragraph instead.
131*74031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
132*74031e46SAndreas Gohr        $this->P->addMode('gfm_header', new GfmHeader());
133*74031e46SAndreas Gohr        $this->P->parse("\\# not a heading");
134*74031e46SAndreas Gohr
135*74031e46SAndreas Gohr        $modes = array_column($this->H->calls, 0);
136*74031e46SAndreas Gohr        $this->assertNotContains('header', $modes,
137*74031e46SAndreas Gohr            'Escaped # must not produce a header');
138*74031e46SAndreas Gohr    }
139*74031e46SAndreas Gohr
140*74031e46SAndreas Gohr    function testNoEscapeInsideBacktickSpan()
141*74031e46SAndreas Gohr    {
142*74031e46SAndreas Gohr        // GFM spec example 313. The whole `\[\`` is captured by
143*74031e46SAndreas Gohr        // GfmBacktickSingle in one regex shot, so GfmEscape never runs
144*74031e46SAndreas Gohr        // on its body. The body must retain the literal backslashes.
145*74031e46SAndreas Gohr        $this->P->addMode('gfm_escape', new GfmEscape());
146*74031e46SAndreas Gohr        $this->P->addMode('gfm_backtick_single', new GfmBacktickSingle());
147*74031e46SAndreas Gohr        $this->P->parse('`\\[\\`');
148*74031e46SAndreas Gohr
149*74031e46SAndreas Gohr        $unformatted = array_filter($this->H->calls, static fn($c) => $c[0] === 'unformatted');
150*74031e46SAndreas Gohr        $bodies = array_map(static fn($c) => $c[1][0], $unformatted);
151*74031e46SAndreas Gohr        $this->assertContains('\\[\\', $bodies,
152*74031e46SAndreas Gohr            'Backtick span body must preserve the literal backslashes');
153*74031e46SAndreas Gohr    }
154*74031e46SAndreas Gohr
155*74031e46SAndreas Gohr    function testSortValue()
156*74031e46SAndreas Gohr    {
157*74031e46SAndreas Gohr        $mode = new GfmEscape();
158*74031e46SAndreas Gohr        $this->assertSame(5, $mode->getSort());
159*74031e46SAndreas Gohr    }
160*74031e46SAndreas Gohr}
161