xref: /dokuwiki/_test/tests/Parsing/ParserMode/GfmEscapeTest.php (revision 65dd2042806255b56d4cf303530fd396ff38f151)
1<?php
2
3namespace dokuwiki\test\Parsing\ParserMode;
4
5use dokuwiki\Parsing\ModeRegistry;
6use dokuwiki\Parsing\ParserMode\GfmBacktickSingle;
7use dokuwiki\Parsing\ParserMode\GfmEmphasis;
8use dokuwiki\Parsing\ParserMode\GfmEscape;
9use dokuwiki\Parsing\ParserMode\GfmHeader;
10use dokuwiki\Parsing\ParserMode\Linebreak;
11
12/**
13 * Tests for the GFM backslash-escape mode.
14 */
15class GfmEscapeTest extends ParserTestBase
16{
17    public function setUp(): void
18    {
19        parent::setUp();
20        global $conf;
21        $conf['syntax'] = 'md';
22        ModeRegistry::reset();
23    }
24
25    public function tearDown(): void
26    {
27        ModeRegistry::reset();
28        parent::tearDown();
29    }
30
31    /**
32     * Every ASCII punctuation character is escapable per GFM §6.1.
33     *
34     * @dataProvider provideEscapableChars
35     */
36    function testEscapableAsciiPunctuationProducesLiteral(string $char)
37    {
38        $this->P->addMode('gfm_escape', new GfmEscape());
39        $this->P->parse('foo \\' . $char . ' bar');
40
41        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
42        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
43
44        $this->assertSame("\nfoo " . $char . ' bar', $joined,
45            "Escaped {$char} must collapse to the literal char in cdata stream");
46    }
47
48    public static function provideEscapableChars(): array
49    {
50        $chars = str_split('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~');
51        return array_combine(
52            array_map(static fn($c) => 'char_' . bin2hex($c), $chars),
53            array_map(static fn($c) => [$c], $chars),
54        );
55    }
56
57    /**
58     * Backslash before non-ASCII-punctuation stays literal — letters,
59     * digits, multibyte chars, spaces, and tabs are not escapable. The
60     * pattern simply doesn't match, so the bytes flow through as cdata.
61     *
62     * @dataProvider provideNonEscapableChars
63     */
64    function testNonEscapableCharsKeepBackslash(string $tail)
65    {
66        $this->P->addMode('gfm_escape', new GfmEscape());
67        $this->P->parse('a \\' . $tail . ' b');
68
69        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
70        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
71
72        $this->assertSame("\na \\" . $tail . ' b', $joined);
73    }
74
75    public static function provideNonEscapableChars(): array
76    {
77        return [
78            'letter_upper' => ['A'],
79            'letter_lower' => ['a'],
80            'digit'        => ['3'],
81            'multibyte'    => ['α'],
82            'space'        => [' '],
83            'tab'          => ["\t"],
84        ];
85    }
86
87    function testDoubleBackslashCollapsesToSingleBackslash()
88    {
89        // \\ is the escaped-backslash form. The first char in the match
90        // is consumed as the escape introducer; the second is emitted as
91        // a literal backslash.
92        $this->P->addMode('gfm_escape', new GfmEscape());
93        $this->P->parse('foo \\\\ bar');
94
95        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
96        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
97
98        $this->assertSame("\nfoo \\ bar", $joined);
99    }
100
101    function testEscapedAsteriskBlocksEmphasis()
102    {
103        // GFM spec example 310 fragment. \* must consume the asterisk
104        // before GfmEmphasis can use it as an opener.
105        $this->P->addMode('gfm_escape', new GfmEscape());
106        $this->P->addMode('gfm_emphasis', new GfmEmphasis());
107        $this->P->parse('\\*not emphasized*');
108
109        $modes = array_column($this->H->calls, 0);
110        $this->assertNotContains('emphasis_open', $modes,
111            'Escaped opener must not start emphasis');
112    }
113
114    function testEscapedBackslashThenEmphasisOpens()
115    {
116        // GFM spec example 311. \\ collapses to a literal backslash, and
117        // the *emphasis* that follows is now seen by GfmEmphasis with
118        // its full text intact.
119        $this->P->addMode('gfm_escape', new GfmEscape());
120        $this->P->addMode('gfm_emphasis', new GfmEmphasis());
121        $this->P->parse('\\\\*emphasis*');
122
123        $modes = array_column($this->H->calls, 0);
124        $this->assertContains('emphasis_open', $modes,
125            'After \\\\ collapses, the surviving *emphasis* must open emphasis');
126    }
127
128    function testEscapedHashBlocksHeader()
129    {
130        // \# must defeat GfmHeader's column-0 # match. The trailing text
131        // becomes a normal paragraph instead.
132        $this->P->addMode('gfm_escape', new GfmEscape());
133        $this->P->addMode('gfm_header', new GfmHeader());
134        $this->P->parse("\\# not a heading");
135
136        $modes = array_column($this->H->calls, 0);
137        $this->assertNotContains('header', $modes,
138            'Escaped # must not produce a header');
139    }
140
141    function testNoEscapeInsideBacktickSpan()
142    {
143        // GFM spec example 313. The whole `\[\`` is captured by
144        // GfmBacktickSingle in one regex shot, so GfmEscape never runs
145        // on its body. The body must retain the literal backslashes.
146        $this->P->addMode('gfm_escape', new GfmEscape());
147        $this->P->addMode('gfm_backtick_single', new GfmBacktickSingle());
148        $this->P->parse('`\\[\\`');
149
150        $unformatted = array_filter($this->H->calls, static fn($c) => $c[0] === 'unformatted');
151        $bodies = array_map(static fn($c) => $c[1][0], $unformatted);
152        $this->assertContains('\\[\\', $bodies,
153            'Backtick span body must preserve the literal backslashes');
154    }
155
156    function testSortValue()
157    {
158        $mode = new GfmEscape();
159        $this->assertSame(5, $mode->getSort());
160    }
161
162    /**
163     * In pure `md` mode, `\\` before a newline still escapes to a literal
164     * backslash per GFM §6.1 — no DW Linebreak is loaded to defer to.
165     */
166    function testDoubleBackslashBeforeNewlineEscapesInPureMd()
167    {
168        $this->P->addMode('gfm_escape', new GfmEscape());
169        $this->P->parse("foo \\\\\nbar");
170
171        $names = array_column($this->H->calls, 0);
172        $this->assertNotContains('linebreak', $names,
173            'No DW Linebreak is loaded in pure md mode — `\\\\\\n` must stay an escape');
174
175        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
176        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
177        $this->assertSame("\nfoo \\\nbar", $joined,
178            '`\\\\` collapses to a literal backslash; the newline survives as cdata');
179    }
180
181    /**
182     * In any DW-loaded mode (`dw+md` / `md+dw`), `\\` before a space, tab,
183     * or newline must defer to DW's Linebreak mode. GfmEscape would
184     * otherwise consume those two bytes first (sort 5 vs Linebreak's 140)
185     * and the forced linebreak would never fire.
186     *
187     * @dataProvider provideDwLoadedSyntaxes
188     */
189    function testDoubleBackslashBeforeNewlineDefersToLinebreakWhenDwLoaded(string $syntax)
190    {
191        global $conf;
192        $conf['syntax'] = $syntax;
193
194        $this->P->addMode('gfm_escape', new GfmEscape());
195        $this->P->addMode('linebreak', new Linebreak());
196        $this->P->parse("foo\\\\\nbar");
197
198        $names = array_column($this->H->calls, 0);
199        $this->assertContains('linebreak', $names,
200            "Under $syntax, `\\\\\\\\\\n` must yield a DW linebreak instead of an escape");
201    }
202
203    /**
204     * Same deferral applies for `\\` before a literal space — the
205     * canonical DW forced-linebreak form.
206     *
207     * @dataProvider provideDwLoadedSyntaxes
208     */
209    function testDoubleBackslashBeforeSpaceDefersToLinebreakWhenDwLoaded(string $syntax)
210    {
211        global $conf;
212        $conf['syntax'] = $syntax;
213
214        $this->P->addMode('gfm_escape', new GfmEscape());
215        $this->P->addMode('linebreak', new Linebreak());
216        $this->P->parse('foo \\\\ bar');
217
218        $names = array_column($this->H->calls, 0);
219        $this->assertContains('linebreak', $names,
220            "Under $syntax, `\\\\\\\\ ` must yield a DW linebreak instead of an escape");
221    }
222
223    /**
224     * The deferral is narrow: `\\` followed by non-whitespace still
225     * escapes to a literal backslash, even with DW Linebreak loaded.
226     * UNC-style paths like `\\\\host\\share` would otherwise become a
227     * surprise of literal double-backslashes for a user who typed two
228     * GFM-escapes back-to-back.
229     *
230     * @dataProvider provideDwLoadedSyntaxes
231     */
232    function testMidLineDoubleBackslashStillEscapesWhenDwLoaded(string $syntax)
233    {
234        global $conf;
235        $conf['syntax'] = $syntax;
236
237        $this->P->addMode('gfm_escape', new GfmEscape());
238        $this->P->addMode('linebreak', new Linebreak());
239        $this->P->parse('\\\\\\\\host\\\\share');
240
241        $names = array_column($this->H->calls, 0);
242        $this->assertNotContains('linebreak', $names,
243            'Mid-line `\\\\` (no EOL whitespace) must not fire a linebreak');
244
245        $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata');
246        $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata));
247        $this->assertSame("\n\\\\host\\share", $joined,
248            'Each `\\\\` collapses to a single literal backslash, GFM-style');
249    }
250
251    public static function provideDwLoadedSyntaxes(): array
252    {
253        return [
254            'dw_md' => ['dw+md'],
255            'md_dw' => ['md+dw'],
256        ];
257    }
258}
259