xref: /dokuwiki/_test/tests/Parsing/ParserMode/GfmEscapeTest.php (revision b73ece99c18919754d993a1d1f5cb27140555705)
1<?php
2
3namespace dokuwiki\test\Parsing\ParserMode;
4
5use dokuwiki\Parsing\ParserMode\GfmBacktickSingle;
6use dokuwiki\Parsing\ParserMode\GfmEmphasis;
7use dokuwiki\Parsing\ParserMode\GfmEscape;
8use dokuwiki\Parsing\ParserMode\GfmHeader;
9use dokuwiki\Parsing\ParserMode\Linebreak;
10
11/**
12 * Tests for the GFM backslash-escape mode.
13 */
14class GfmEscapeTest extends ParserTestBase
15{
16    public function setUp(): void
17    {
18        parent::setUp();
19        $this->setSyntax('md');
20    }
21
22    /**
23     * Every ASCII punctuation character is escapable per GFM §6.1.
24     *
25     * @dataProvider provideEscapableChars
26     */
27    function testEscapableAsciiPunctuationProducesLiteral(string $char)
28    {
29        $this->P->addMode('gfm_escape', new GfmEscape());
30        $this->P->parse('foo \\' . $char . ' bar');
31
32        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
33        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
34
35        $this->assertSame("\nfoo " . $char . ' bar', $joined,
36            "Escaped {$char} must collapse to the literal char in cdata stream");
37    }
38
39    public static function provideEscapableChars(): array
40    {
41        $chars = str_split('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~');
42        return array_combine(
43            array_map(static fn($c) => 'char_' . bin2hex($c), $chars),
44            array_map(static fn($c) => [$c], $chars),
45        );
46    }
47
48    /**
49     * Backslash before non-ASCII-punctuation stays literal — letters,
50     * digits, multibyte chars, spaces, and tabs are not escapable. The
51     * pattern simply doesn't match, so the bytes flow through as cdata.
52     *
53     * @dataProvider provideNonEscapableChars
54     */
55    function testNonEscapableCharsKeepBackslash(string $tail)
56    {
57        $this->P->addMode('gfm_escape', new GfmEscape());
58        $this->P->parse('a \\' . $tail . ' b');
59
60        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
61        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
62
63        $this->assertSame("\na \\" . $tail . ' b', $joined);
64    }
65
66    public static function provideNonEscapableChars(): array
67    {
68        return [
69            'letter_upper' => ['A'],
70            'letter_lower' => ['a'],
71            'digit'        => ['3'],
72            'multibyte'    => ['α'],
73            'space'        => [' '],
74            'tab'          => ["\t"],
75        ];
76    }
77
78    function testDoubleBackslashCollapsesToSingleBackslash()
79    {
80        // \\ is the escaped-backslash form. The first char in the match
81        // is consumed as the escape introducer; the second is emitted as
82        // a literal backslash.
83        $this->P->addMode('gfm_escape', new GfmEscape());
84        $this->P->parse('foo \\\\ bar');
85
86        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
87        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
88
89        $this->assertSame("\nfoo \\ bar", $joined);
90    }
91
92    function testEscapedAsteriskBlocksEmphasis()
93    {
94        // GFM spec example 310 fragment. \* must consume the asterisk
95        // before GfmEmphasis can use it as an opener.
96        $this->P->addMode('gfm_escape', new GfmEscape());
97        $this->P->addMode('gfm_emphasis', new GfmEmphasis());
98        $this->P->parse('\\*not emphasized*');
99
100        $modes = array_column($this->H->calls, 0);
101        $this->assertNotContains('emphasis_open', $modes,
102            'Escaped opener must not start emphasis');
103    }
104
105    function testEscapedBackslashThenEmphasisOpens()
106    {
107        // GFM spec example 311. \\ collapses to a literal backslash, and
108        // the *emphasis* that follows is now seen by GfmEmphasis with
109        // its full text intact.
110        $this->P->addMode('gfm_escape', new GfmEscape());
111        $this->P->addMode('gfm_emphasis', new GfmEmphasis());
112        $this->P->parse('\\\\*emphasis*');
113
114        $modes = array_column($this->H->calls, 0);
115        $this->assertContains('emphasis_open', $modes,
116            'After \\\\ collapses, the surviving *emphasis* must open emphasis');
117    }
118
119    function testEscapedHashBlocksHeader()
120    {
121        // \# must defeat GfmHeader's column-0 # match. The trailing text
122        // becomes a normal paragraph instead.
123        $this->P->addMode('gfm_escape', new GfmEscape());
124        $this->P->addMode('gfm_header', new GfmHeader());
125        $this->P->parse("\\# not a heading");
126
127        $modes = array_column($this->H->calls, 0);
128        $this->assertNotContains('header', $modes,
129            'Escaped # must not produce a header');
130    }
131
132    function testNoEscapeInsideBacktickSpan()
133    {
134        // GFM spec example 313. The whole `\[\`` is captured by
135        // GfmBacktickSingle in one regex shot, so GfmEscape never runs
136        // on its body. The body must retain the literal backslashes.
137        $this->P->addMode('gfm_escape', new GfmEscape());
138        $this->P->addMode('gfm_backtick_single', new GfmBacktickSingle());
139        $this->P->parse('`\\[\\`');
140
141        $unformatted = array_filter($this->H->calls, static fn($c) => $c[0] === 'unformatted');
142        $bodies = array_map(static fn($c) => $c[1][0], $unformatted);
143        $this->assertContains('\\[\\', $bodies,
144            'Backtick span body must preserve the literal backslashes');
145    }
146
147    function testSortValue()
148    {
149        $mode = new GfmEscape();
150        $this->assertSame(5, $mode->getSort());
151    }
152
153    /**
154     * In pure `md` mode, `\\` before a newline still escapes to a literal
155     * backslash per GFM §6.1 — no DW Linebreak is loaded to defer to.
156     */
157    function testDoubleBackslashBeforeNewlineEscapesInPureMd()
158    {
159        $this->P->addMode('gfm_escape', new GfmEscape());
160        $this->P->parse("foo \\\\\nbar");
161
162        $names = array_column($this->H->calls, 0);
163        $this->assertNotContains('linebreak', $names,
164            'No DW Linebreak is loaded in pure md mode — `\\\\\\n` must stay an escape');
165
166        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
167        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
168        $this->assertSame("\nfoo \\\nbar", $joined,
169            '`\\\\` collapses to a literal backslash; the newline survives as cdata');
170    }
171
172    /**
173     * In any DW-loaded mode (`dw+md` / `md+dw`), `\\` before a space, tab,
174     * or newline must defer to DW's Linebreak mode. GfmEscape would
175     * otherwise consume those two bytes first (sort 5 vs Linebreak's 140)
176     * and the forced linebreak would never fire.
177     *
178     * @dataProvider provideDwLoadedSyntaxes
179     */
180    function testDoubleBackslashBeforeNewlineDefersToLinebreakWhenDwLoaded(string $syntax)
181    {
182        $this->setSyntax($syntax);
183
184        $this->P->addMode('gfm_escape', new GfmEscape());
185        $this->P->addMode('linebreak', new Linebreak());
186        $this->P->parse("foo\\\\\nbar");
187
188        $names = array_column($this->H->calls, 0);
189        $this->assertContains('linebreak', $names,
190            "Under $syntax, `\\\\\\\\\\n` must yield a DW linebreak instead of an escape");
191    }
192
193    /**
194     * Same deferral applies for `\\` before a literal space — the
195     * canonical DW forced-linebreak form.
196     *
197     * @dataProvider provideDwLoadedSyntaxes
198     */
199    function testDoubleBackslashBeforeSpaceDefersToLinebreakWhenDwLoaded(string $syntax)
200    {
201        $this->setSyntax($syntax);
202
203        $this->P->addMode('gfm_escape', new GfmEscape());
204        $this->P->addMode('linebreak', new Linebreak());
205        $this->P->parse('foo \\\\ bar');
206
207        $names = array_column($this->H->calls, 0);
208        $this->assertContains('linebreak', $names,
209            "Under $syntax, `\\\\\\\\ ` must yield a DW linebreak instead of an escape");
210    }
211
212    /**
213     * The deferral is narrow: `\\` followed by non-whitespace still
214     * escapes to a literal backslash, even with DW Linebreak loaded.
215     * UNC-style paths like `\\\\host\\share` would otherwise become a
216     * surprise of literal double-backslashes for a user who typed two
217     * GFM-escapes back-to-back.
218     *
219     * @dataProvider provideDwLoadedSyntaxes
220     */
221    function testMidLineDoubleBackslashStillEscapesWhenDwLoaded(string $syntax)
222    {
223        $this->setSyntax($syntax);
224
225        $this->P->addMode('gfm_escape', new GfmEscape());
226        $this->P->addMode('linebreak', new Linebreak());
227        $this->P->parse('\\\\\\\\host\\\\share');
228
229        $names = array_column($this->H->calls, 0);
230        $this->assertNotContains('linebreak', $names,
231            'Mid-line `\\\\` (no EOL whitespace) must not fire a linebreak');
232
233        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
234        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
235        $this->assertSame("\n\\\\host\\share", $joined,
236            'Each `\\\\` collapses to a single literal backslash, GFM-style');
237    }
238
239    public static function provideDwLoadedSyntaxes(): array
240    {
241        return [
242            'dw_md' => ['dw+md'],
243            'md_dw' => ['md+dw'],
244        ];
245    }
246}
247