xref: /dokuwiki/_test/tests/Parsing/ParserMode/GfmCodeTest.php (revision b414dba2b10d2f550b453d752c86bb62343bec93)
1<?php
2
3namespace dokuwiki\test\Parsing\ParserMode;
4
5use dokuwiki\Parsing\ModeRegistry;
6use dokuwiki\Parsing\ParserMode\Eol;
7use dokuwiki\Parsing\ParserMode\GfmCode;
8
9/**
10 * Tests for GFM backtick-fenced code blocks (`GfmCode`).
11 */
12class GfmCodeTest extends ParserTestBase
13{
14    public function setUp(): void
15    {
16        parent::setUp();
17        global $conf;
18        $conf['syntax'] = 'md';
19        ModeRegistry::reset();
20    }
21
22    public function tearDown(): void
23    {
24        ModeRegistry::reset();
25        parent::tearDown();
26    }
27
28    /**
29     * Register the mode plus Eol. Order matters: the ParallelRegex
30     * alternates patterns in insertion order and leftmost-match picks the
31     * first alternative, so the block mode must be added before Eol
32     * (same effect ModeRegistry achieves in production via sort values).
33     */
34    private function addModes(): void
35    {
36        $this->P->addMode('gfm_code', new GfmCode());
37        $this->P->addMode('eol', new Eol());
38    }
39
40    function testBasicBacktickFence()
41    {
42        $this->addModes();
43        $this->P->parse("```\nhello\n```");
44        $codeCalls = array_values(array_filter(
45            $this->H->calls,
46            static fn($c) => $c[0] === 'code'
47        ));
48        $this->assertCount(1, $codeCalls);
49        $this->assertSame("hello\n", $codeCalls[0][1][0]);
50        $this->assertNull($codeCalls[0][1][1]);
51        $this->assertNull($codeCalls[0][1][2]);
52    }
53
54    function testLanguageFromInfoString()
55    {
56        $this->addModes();
57        $this->P->parse("```ruby\nx\n```");
58        $codeCalls = array_values(array_filter(
59            $this->H->calls,
60            static fn($c) => $c[0] === 'code'
61        ));
62        $this->assertCount(1, $codeCalls);
63        $this->assertSame("x\n", $codeCalls[0][1][0]);
64        $this->assertSame('ruby', $codeCalls[0][1][1]);
65    }
66
67    function testLanguageIsFirstWord()
68    {
69        // GFM spec example 113: only the first token of the info string
70        // is treated as a language; extra junk is dropped.
71        $this->addModes();
72        $this->P->parse("```ruby startline=3 \$%@#\$\nx\n```");
73        $codeCalls = array_values(array_filter(
74            $this->H->calls,
75            static fn($c) => $c[0] === 'code'
76        ));
77        $this->assertCount(1, $codeCalls);
78        $this->assertSame('ruby', $codeCalls[0][1][1]);
79    }
80
81    function testBacktickInfoRejectsBackticks()
82    {
83        // GFM spec example 115: a backtick run with backticks in its
84        // info string is NOT a fence — stays for inline code parsing.
85        $this->addModes();
86        $this->P->parse("``` aa ```\nfoo");
87        $modes = array_column($this->H->calls, 0);
88        $this->assertNotContains('code', $modes,
89            'Backtick fence must reject backticks in info string');
90    }
91
92    function testLongerCloseFenceIsValid()
93    {
94        // Opener 3, closer 5 — valid because closer is ≥ opener.
95        $this->addModes();
96        $this->P->parse("```\naaa\n`````");
97        $codeCalls = array_values(array_filter(
98            $this->H->calls,
99            static fn($c) => $c[0] === 'code'
100        ));
101        $this->assertCount(1, $codeCalls);
102        $this->assertSame("aaa\n", $codeCalls[0][1][0]);
103    }
104
105    function testIndentedFenceIsNotFence()
106    {
107        // Column-0-only policy: any leading space rejects the fence.
108        $this->addModes();
109        $this->P->parse(" ```\nx\n ```");
110        $modes = array_column($this->H->calls, 0);
111        $this->assertNotContains('code', $modes,
112            'Fence must start at column 0; indent is out of scope');
113    }
114
115    function testUnclosedFenceStaysLiteral()
116    {
117        // An unclosed fence must not emit a code call — the ``` stays as
118        // paragraph text. Diverges from strict GFM (which would consume
119        // to EOF); see class docblock for the rationale.
120        $this->addModes();
121        $this->P->parse("```\nabc\ndef");
122        $modes = array_column($this->H->calls, 0);
123        $this->assertNotContains('code', $modes,
124            'Unclosed fences must stay literal, not emit code');
125    }
126
127    function testEmptyBody()
128    {
129        $this->addModes();
130        $this->P->parse("```\n```");
131        $codeCalls = array_values(array_filter(
132            $this->H->calls,
133            static fn($c) => $c[0] === 'code'
134        ));
135        $this->assertCount(1, $codeCalls);
136        $this->assertSame('', $codeCalls[0][1][0]);
137    }
138
139    function testCloseWithTrailingSpaces()
140    {
141        $this->addModes();
142        $this->P->parse("```\nx\n```   ");
143        $codeCalls = array_values(array_filter(
144            $this->H->calls,
145            static fn($c) => $c[0] === 'code'
146        ));
147        $this->assertCount(1, $codeCalls);
148        $this->assertSame("x\n", $codeCalls[0][1][0]);
149    }
150
151    function testCloseWithTrailingTabs()
152    {
153        $this->addModes();
154        $this->P->parse("```\nx\n```\t\t");
155        $codeCalls = array_values(array_filter(
156            $this->H->calls,
157            static fn($c) => $c[0] === 'code'
158        ));
159        $this->assertCount(1, $codeCalls);
160        $this->assertSame("x\n", $codeCalls[0][1][0]);
161    }
162
163    function testFenceInterruptsParagraph()
164    {
165        // GFM spec example 110: a fence doesn't need a blank line before
166        // it; the `code` instruction is block-level and paragraphs break.
167        $this->addModes();
168        $this->P->parse("foo\n```\nbar\n```\nbaz");
169        $codeCalls = array_values(array_filter(
170            $this->H->calls,
171            static fn($c) => $c[0] === 'code'
172        ));
173        $this->assertCount(1, $codeCalls);
174        $this->assertSame("bar\n", $codeCalls[0][1][0]);
175    }
176
177    function testEmptyInfoStringMeansNullLanguage()
178    {
179        $this->addModes();
180        $this->P->parse("```\nx\n```");
181        $codeCalls = array_values(array_filter(
182            $this->H->calls,
183            static fn($c) => $c[0] === 'code'
184        ));
185        $this->assertCount(1, $codeCalls);
186        $this->assertNull($codeCalls[0][1][1]);
187    }
188
189    function testInfoStringSpecialChar()
190    {
191        // GFM spec example 114: a semicolon is a valid language token.
192        $this->addModes();
193        $this->P->parse("```;\n```");
194        $codeCalls = array_values(array_filter(
195            $this->H->calls,
196            static fn($c) => $c[0] === 'code'
197        ));
198        $this->assertCount(1, $codeCalls);
199        $this->assertSame(';', $codeCalls[0][1][1]);
200    }
201
202    function testTildeLineDoesNotCloseBacktickFence()
203    {
204        $this->addModes();
205        $this->P->parse("```\naaa\n~~~\nbbb\n```");
206        $codeCalls = array_values(array_filter(
207            $this->H->calls,
208            static fn($c) => $c[0] === 'code'
209        ));
210        $this->assertCount(1, $codeCalls);
211        $this->assertSame("aaa\n~~~\nbbb\n", $codeCalls[0][1][0]);
212    }
213
214    function testFilenameAfterLanguage()
215    {
216        // DokuWiki's Code mode treats the second whitespace token as
217        // the filename (turns the block into a download link). GfmCode
218        // accepts the same vocabulary on the info string.
219        $this->addModes();
220        $this->P->parse("```php myfile.php\n<?php\n```");
221        $codeCalls = array_values(array_filter(
222            $this->H->calls,
223            static fn($c) => $c[0] === 'code'
224        ));
225        $this->assertCount(1, $codeCalls);
226        $this->assertSame('php', $codeCalls[0][1][1]);
227        $this->assertSame('myfile.php', $codeCalls[0][1][2]);
228    }
229
230    function testHtmlAliasedToHtml4Strict()
231    {
232        // Same GeSHi alias DokuWiki's Code mode applies.
233        $this->addModes();
234        $this->P->parse("```html\n<p>\n```");
235        $codeCalls = array_values(array_filter(
236            $this->H->calls,
237            static fn($c) => $c[0] === 'code'
238        ));
239        $this->assertCount(1, $codeCalls);
240        $this->assertSame('html4strict', $codeCalls[0][1][1]);
241    }
242
243    function testDashMeansNoLanguage()
244    {
245        // DokuWiki uses `-` as an explicit "no language" marker; lets
246        // a filename follow without a language argument first.
247        $this->addModes();
248        $this->P->parse("```- somefile.txt\nx\n```");
249        $codeCalls = array_values(array_filter(
250            $this->H->calls,
251            static fn($c) => $c[0] === 'code'
252        ));
253        $this->assertCount(1, $codeCalls);
254        $this->assertNull($codeCalls[0][1][1]);
255        $this->assertSame('somefile.txt', $codeCalls[0][1][2]);
256    }
257
258    function testHighlightOptions()
259    {
260        // DokuWiki uses space-separated keys inside `[...]`; comma
261        // separators inside a value survive (as GeSHi line lists).
262        $this->addModes();
263        $this->P->parse("```php [enable_line_numbers start_line_numbers_at=\"10\"]\nx\n```");
264        $codeCalls = array_values(array_filter(
265            $this->H->calls,
266            static fn($c) => $c[0] === 'code'
267        ));
268        $this->assertCount(1, $codeCalls);
269        $this->assertSame('php', $codeCalls[0][1][1]);
270        $this->assertNull($codeCalls[0][1][2]);
271        $this->assertCount(4, $codeCalls[0][1]);
272        $this->assertSame(
273            ['enable_line_numbers' => true, 'start_line_numbers_at' => 10],
274            $codeCalls[0][1][3]
275        );
276    }
277
278    function testFilenameAndOptions()
279    {
280        $this->addModes();
281        $this->P->parse("```php myfile.php [enable_line_numbers]\nx\n```");
282        $codeCalls = array_values(array_filter(
283            $this->H->calls,
284            static fn($c) => $c[0] === 'code'
285        ));
286        $this->assertCount(1, $codeCalls);
287        $this->assertSame('php', $codeCalls[0][1][1]);
288        $this->assertSame('myfile.php', $codeCalls[0][1][2]);
289        $this->assertSame(['enable_line_numbers' => true], $codeCalls[0][1][3]);
290    }
291
292    function testInfoStringBackslashEscapeIsResolved()
293    {
294        // GFM §6.1 (spec example 322): backslash-escaped punctuation in
295        // the info string is unescaped before parseAttributes splits it
296        // into language / filename / options.
297        $this->addModes();
298        $this->P->parse("```c\\#\nx\n```");
299        $codeCalls = array_values(array_filter(
300            $this->H->calls,
301            static fn($c) => $c[0] === 'code'
302        ));
303        $this->assertCount(1, $codeCalls);
304        $this->assertSame('c#', $codeCalls[0][1][1]);
305    }
306
307    function testCodeBodyKeepsBackslashEscapes()
308    {
309        // The body of a fenced code block is captured verbatim — escapes
310        // inside it must NOT collapse (spec: escapes don't fire in code
311        // blocks). Only the info string is touched by Escape::unescape.
312        $this->addModes();
313        $this->P->parse("```\nfoo \\* bar\n```");
314        $codeCalls = array_values(array_filter(
315            $this->H->calls,
316            static fn($c) => $c[0] === 'code'
317        ));
318        $this->assertCount(1, $codeCalls);
319        $this->assertSame("foo \\* bar\n", $codeCalls[0][1][0]);
320    }
321
322    function testSortValue()
323    {
324        $this->assertSame(200, (new GfmCode())->getSort());
325    }
326}
327