xref: /dokuwiki/_test/tests/Parsing/ParserMode/GfmEscapeTest.php (revision eb15e634e1400f6c4d78f5fb40179ca25f41574d)
1<?php
2
3namespace dokuwiki\test\Parsing\ParserMode;
4
5use dokuwiki\Parsing\ModeRegistry;
6use dokuwiki\Parsing\ParserMode\GfmBacktickSingle;
7use dokuwiki\Parsing\ParserMode\GfmEmphasis;
8use dokuwiki\Parsing\ParserMode\GfmEscape;
9use dokuwiki\Parsing\ParserMode\GfmHeader;
10
11/**
12 * Tests for the GFM backslash-escape mode.
13 */
14class GfmEscapeTest extends ParserTestBase
15{
16    public function setUp(): void
17    {
18        parent::setUp();
19        global $conf;
20        $conf['syntax'] = 'md';
21        ModeRegistry::reset();
22    }
23
24    public function tearDown(): void
25    {
26        ModeRegistry::reset();
27        parent::tearDown();
28    }
29
30    /**
31     * Every ASCII punctuation character is escapable per GFM §6.1.
32     *
33     * @dataProvider provideEscapableChars
34     */
35    function testEscapableAsciiPunctuationProducesLiteral(string $char)
36    {
37        $this->P->addMode('gfm_escape', new GfmEscape());
38        $this->P->parse('foo \\' . $char . ' bar');
39
40        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
41        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
42
43        $this->assertSame("\nfoo " . $char . ' bar', $joined,
44            "Escaped {$char} must collapse to the literal char in cdata stream");
45    }
46
47    public static function provideEscapableChars(): array
48    {
49        $chars = str_split('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~');
50        return array_combine(
51            array_map(static fn($c) => 'char_' . bin2hex($c), $chars),
52            array_map(static fn($c) => [$c], $chars),
53        );
54    }
55
56    /**
57     * Backslash before non-ASCII-punctuation stays literal — letters,
58     * digits, multibyte chars, spaces, and tabs are not escapable. The
59     * pattern simply doesn't match, so the bytes flow through as cdata.
60     *
61     * @dataProvider provideNonEscapableChars
62     */
63    function testNonEscapableCharsKeepBackslash(string $tail)
64    {
65        $this->P->addMode('gfm_escape', new GfmEscape());
66        $this->P->parse('a \\' . $tail . ' b');
67
68        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
69        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
70
71        $this->assertSame("\na \\" . $tail . ' b', $joined);
72    }
73
74    public static function provideNonEscapableChars(): array
75    {
76        return [
77            'letter_upper' => ['A'],
78            'letter_lower' => ['a'],
79            'digit'        => ['3'],
80            'multibyte'    => ['α'],
81            'space'        => [' '],
82            'tab'          => ["\t"],
83        ];
84    }
85
86    function testDoubleBackslashCollapsesToSingleBackslash()
87    {
88        // \\ is the escaped-backslash form. The first char in the match
89        // is consumed as the escape introducer; the second is emitted as
90        // a literal backslash.
91        $this->P->addMode('gfm_escape', new GfmEscape());
92        $this->P->parse('foo \\\\ bar');
93
94        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
95        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
96
97        $this->assertSame("\nfoo \\ bar", $joined);
98    }
99
100    function testEscapedAsteriskBlocksEmphasis()
101    {
102        // GFM spec example 310 fragment. \* must consume the asterisk
103        // before GfmEmphasis can use it as an opener.
104        $this->P->addMode('gfm_escape', new GfmEscape());
105        $this->P->addMode('gfm_emphasis', new GfmEmphasis());
106        $this->P->parse('\\*not emphasized*');
107
108        $modes = array_column($this->H->calls, 0);
109        $this->assertNotContains('emphasis_open', $modes,
110            'Escaped opener must not start emphasis');
111    }
112
113    function testEscapedBackslashThenEmphasisOpens()
114    {
115        // GFM spec example 311. \\ collapses to a literal backslash, and
116        // the *emphasis* that follows is now seen by GfmEmphasis with
117        // its full text intact.
118        $this->P->addMode('gfm_escape', new GfmEscape());
119        $this->P->addMode('gfm_emphasis', new GfmEmphasis());
120        $this->P->parse('\\\\*emphasis*');
121
122        $modes = array_column($this->H->calls, 0);
123        $this->assertContains('emphasis_open', $modes,
124            'After \\\\ collapses, the surviving *emphasis* must open emphasis');
125    }
126
127    function testEscapedHashBlocksHeader()
128    {
129        // \# must defeat GfmHeader's column-0 # match. The trailing text
130        // becomes a normal paragraph instead.
131        $this->P->addMode('gfm_escape', new GfmEscape());
132        $this->P->addMode('gfm_header', new GfmHeader());
133        $this->P->parse("\\# not a heading");
134
135        $modes = array_column($this->H->calls, 0);
136        $this->assertNotContains('header', $modes,
137            'Escaped # must not produce a header');
138    }
139
140    function testNoEscapeInsideBacktickSpan()
141    {
142        // GFM spec example 313. The whole `\[\`` is captured by
143        // GfmBacktickSingle in one regex shot, so GfmEscape never runs
144        // on its body. The body must retain the literal backslashes.
145        $this->P->addMode('gfm_escape', new GfmEscape());
146        $this->P->addMode('gfm_backtick_single', new GfmBacktickSingle());
147        $this->P->parse('`\\[\\`');
148
149        $unformatted = array_filter($this->H->calls, static fn($c) => $c[0] === 'unformatted');
150        $bodies = array_map(static fn($c) => $c[1][0], $unformatted);
151        $this->assertContains('\\[\\', $bodies,
152            'Backtick span body must preserve the literal backslashes');
153    }
154
155    function testSortValue()
156    {
157        $mode = new GfmEscape();
158        $this->assertSame(5, $mode->getSort());
159    }
160}
161