xref: /dokuwiki/_test/tests/Parsing/ParserMode/GfmCodeTest.php (revision 2e43b79909f3bc04928779d886f68c1242b5d436)
1<?php
2
3namespace dokuwiki\test\Parsing\ParserMode;
4
5use dokuwiki\Parsing\ParserMode\Eol;
6use dokuwiki\Parsing\ParserMode\GfmCode;
7
8/**
9 * Tests for GFM backtick-fenced code blocks (`GfmCode`).
10 */
11class GfmCodeTest extends ParserTestBase
12{
13    public function setUp(): void
14    {
15        parent::setUp();
16        $this->setSyntax('md');
17    }
18
19    /**
20     * Register the mode plus Eol. Order matters: the ParallelRegex
21     * alternates patterns in insertion order and leftmost-match picks the
22     * first alternative, so the block mode must be added before Eol
23     * (same effect ModeRegistry achieves in production via sort values).
24     */
25    private function addModes(): void
26    {
27        $this->P->addMode('gfm_code', new GfmCode());
28        $this->P->addMode('eol', new Eol());
29    }
30
31    function testBasicBacktickFence()
32    {
33        $this->addModes();
34        $this->P->parse("```\nhello\n```");
35        $codeCalls = array_values(array_filter(
36            $this->H->calls,
37            static fn($c) => $c[0] === 'code'
38        ));
39        $this->assertCount(1, $codeCalls);
40        $this->assertSame("hello\n", $codeCalls[0][1][0]);
41        $this->assertNull($codeCalls[0][1][1]);
42        $this->assertNull($codeCalls[0][1][2]);
43    }
44
45    function testLanguageFromInfoString()
46    {
47        $this->addModes();
48        $this->P->parse("```ruby\nx\n```");
49        $codeCalls = array_values(array_filter(
50            $this->H->calls,
51            static fn($c) => $c[0] === 'code'
52        ));
53        $this->assertCount(1, $codeCalls);
54        $this->assertSame("x\n", $codeCalls[0][1][0]);
55        $this->assertSame('ruby', $codeCalls[0][1][1]);
56    }
57
58    function testLanguageIsFirstWord()
59    {
60        // GFM spec example 113: only the first token of the info string
61        // is treated as a language; extra junk is dropped.
62        $this->addModes();
63        $this->P->parse("```ruby startline=3 \$%@#\$\nx\n```");
64        $codeCalls = array_values(array_filter(
65            $this->H->calls,
66            static fn($c) => $c[0] === 'code'
67        ));
68        $this->assertCount(1, $codeCalls);
69        $this->assertSame('ruby', $codeCalls[0][1][1]);
70    }
71
72    function testBacktickInfoRejectsBackticks()
73    {
74        // GFM spec example 115: a backtick run with backticks in its
75        // info string is NOT a fence — stays for inline code parsing.
76        $this->addModes();
77        $this->P->parse("``` aa ```\nfoo");
78        $modes = array_column($this->H->calls, 0);
79        $this->assertNotContains('code', $modes,
80            'Backtick fence must reject backticks in info string');
81    }
82
83    function testLongerCloseFenceIsValid()
84    {
85        // Opener 3, closer 5 — valid because closer is ≥ opener.
86        $this->addModes();
87        $this->P->parse("```\naaa\n`````");
88        $codeCalls = array_values(array_filter(
89            $this->H->calls,
90            static fn($c) => $c[0] === 'code'
91        ));
92        $this->assertCount(1, $codeCalls);
93        $this->assertSame("aaa\n", $codeCalls[0][1][0]);
94    }
95
96    function testIndentedFenceIsNotFence()
97    {
98        // Column-0-only policy: any leading space rejects the fence.
99        $this->addModes();
100        $this->P->parse(" ```\nx\n ```");
101        $modes = array_column($this->H->calls, 0);
102        $this->assertNotContains('code', $modes,
103            'Fence must start at column 0; indent is out of scope');
104    }
105
106    function testUnclosedFenceStaysLiteral()
107    {
108        // An unclosed fence must not emit a code call — the ``` stays as
109        // paragraph text. Diverges from strict GFM (which would consume
110        // to EOF); see class docblock for the rationale.
111        $this->addModes();
112        $this->P->parse("```\nabc\ndef");
113        $modes = array_column($this->H->calls, 0);
114        $this->assertNotContains('code', $modes,
115            'Unclosed fences must stay literal, not emit code');
116    }
117
118    function testEmptyBody()
119    {
120        $this->addModes();
121        $this->P->parse("```\n```");
122        $codeCalls = array_values(array_filter(
123            $this->H->calls,
124            static fn($c) => $c[0] === 'code'
125        ));
126        $this->assertCount(1, $codeCalls);
127        $this->assertSame('', $codeCalls[0][1][0]);
128    }
129
130    function testCloseWithTrailingSpaces()
131    {
132        $this->addModes();
133        $this->P->parse("```\nx\n```   ");
134        $codeCalls = array_values(array_filter(
135            $this->H->calls,
136            static fn($c) => $c[0] === 'code'
137        ));
138        $this->assertCount(1, $codeCalls);
139        $this->assertSame("x\n", $codeCalls[0][1][0]);
140    }
141
142    function testCloseWithTrailingTabs()
143    {
144        $this->addModes();
145        $this->P->parse("```\nx\n```\t\t");
146        $codeCalls = array_values(array_filter(
147            $this->H->calls,
148            static fn($c) => $c[0] === 'code'
149        ));
150        $this->assertCount(1, $codeCalls);
151        $this->assertSame("x\n", $codeCalls[0][1][0]);
152    }
153
154    function testFenceInterruptsParagraph()
155    {
156        // GFM spec example 110: a fence doesn't need a blank line before
157        // it; the `code` instruction is block-level and paragraphs break.
158        $this->addModes();
159        $this->P->parse("foo\n```\nbar\n```\nbaz");
160        $codeCalls = array_values(array_filter(
161            $this->H->calls,
162            static fn($c) => $c[0] === 'code'
163        ));
164        $this->assertCount(1, $codeCalls);
165        $this->assertSame("bar\n", $codeCalls[0][1][0]);
166    }
167
168    function testEmptyInfoStringMeansNullLanguage()
169    {
170        $this->addModes();
171        $this->P->parse("```\nx\n```");
172        $codeCalls = array_values(array_filter(
173            $this->H->calls,
174            static fn($c) => $c[0] === 'code'
175        ));
176        $this->assertCount(1, $codeCalls);
177        $this->assertNull($codeCalls[0][1][1]);
178    }
179
180    function testInfoStringSpecialChar()
181    {
182        // GFM spec example 114: a semicolon is a valid language token.
183        $this->addModes();
184        $this->P->parse("```;\n```");
185        $codeCalls = array_values(array_filter(
186            $this->H->calls,
187            static fn($c) => $c[0] === 'code'
188        ));
189        $this->assertCount(1, $codeCalls);
190        $this->assertSame(';', $codeCalls[0][1][1]);
191    }
192
193    function testTildeLineDoesNotCloseBacktickFence()
194    {
195        $this->addModes();
196        $this->P->parse("```\naaa\n~~~\nbbb\n```");
197        $codeCalls = array_values(array_filter(
198            $this->H->calls,
199            static fn($c) => $c[0] === 'code'
200        ));
201        $this->assertCount(1, $codeCalls);
202        $this->assertSame("aaa\n~~~\nbbb\n", $codeCalls[0][1][0]);
203    }
204
205    function testFilenameAfterLanguage()
206    {
207        // DokuWiki's Code mode treats the second whitespace token as
208        // the filename (turns the block into a download link). GfmCode
209        // accepts the same vocabulary on the info string.
210        $this->addModes();
211        $this->P->parse("```php myfile.php\n<?php\n```");
212        $codeCalls = array_values(array_filter(
213            $this->H->calls,
214            static fn($c) => $c[0] === 'code'
215        ));
216        $this->assertCount(1, $codeCalls);
217        $this->assertSame('php', $codeCalls[0][1][1]);
218        $this->assertSame('myfile.php', $codeCalls[0][1][2]);
219    }
220
221    function testHtmlAliasedToHtml4Strict()
222    {
223        // Same GeSHi alias DokuWiki's Code mode applies.
224        $this->addModes();
225        $this->P->parse("```html\n<p>\n```");
226        $codeCalls = array_values(array_filter(
227            $this->H->calls,
228            static fn($c) => $c[0] === 'code'
229        ));
230        $this->assertCount(1, $codeCalls);
231        $this->assertSame('html4strict', $codeCalls[0][1][1]);
232    }
233
234    function testDashMeansNoLanguage()
235    {
236        // DokuWiki uses `-` as an explicit "no language" marker; lets
237        // a filename follow without a language argument first.
238        $this->addModes();
239        $this->P->parse("```- somefile.txt\nx\n```");
240        $codeCalls = array_values(array_filter(
241            $this->H->calls,
242            static fn($c) => $c[0] === 'code'
243        ));
244        $this->assertCount(1, $codeCalls);
245        $this->assertNull($codeCalls[0][1][1]);
246        $this->assertSame('somefile.txt', $codeCalls[0][1][2]);
247    }
248
249    function testHighlightOptions()
250    {
251        // DokuWiki uses space-separated keys inside `[...]`; comma
252        // separators inside a value survive (as GeSHi line lists).
253        $this->addModes();
254        $this->P->parse("```php [enable_line_numbers start_line_numbers_at=\"10\"]\nx\n```");
255        $codeCalls = array_values(array_filter(
256            $this->H->calls,
257            static fn($c) => $c[0] === 'code'
258        ));
259        $this->assertCount(1, $codeCalls);
260        $this->assertSame('php', $codeCalls[0][1][1]);
261        $this->assertNull($codeCalls[0][1][2]);
262        $this->assertCount(4, $codeCalls[0][1]);
263        $this->assertSame(
264            ['enable_line_numbers' => true, 'start_line_numbers_at' => 10],
265            $codeCalls[0][1][3]
266        );
267    }
268
269    function testFilenameAndOptions()
270    {
271        $this->addModes();
272        $this->P->parse("```php myfile.php [enable_line_numbers]\nx\n```");
273        $codeCalls = array_values(array_filter(
274            $this->H->calls,
275            static fn($c) => $c[0] === 'code'
276        ));
277        $this->assertCount(1, $codeCalls);
278        $this->assertSame('php', $codeCalls[0][1][1]);
279        $this->assertSame('myfile.php', $codeCalls[0][1][2]);
280        $this->assertSame(['enable_line_numbers' => true], $codeCalls[0][1][3]);
281    }
282
283    function testInfoStringBackslashEscapeIsResolved()
284    {
285        // GFM §6.1 (spec example 322): backslash-escaped punctuation in
286        // the info string is unescaped before parseAttributes splits it
287        // into language / filename / options.
288        $this->addModes();
289        $this->P->parse("```c\\#\nx\n```");
290        $codeCalls = array_values(array_filter(
291            $this->H->calls,
292            static fn($c) => $c[0] === 'code'
293        ));
294        $this->assertCount(1, $codeCalls);
295        $this->assertSame('c#', $codeCalls[0][1][1]);
296    }
297
298    function testCodeBodyKeepsBackslashEscapes()
299    {
300        // The body of a fenced code block is captured verbatim — escapes
301        // inside it must NOT collapse (spec: escapes don't fire in code
302        // blocks). Only the info string is touched by Escape::unescape.
303        $this->addModes();
304        $this->P->parse("```\nfoo \\* bar\n```");
305        $codeCalls = array_values(array_filter(
306            $this->H->calls,
307            static fn($c) => $c[0] === 'code'
308        ));
309        $this->assertCount(1, $codeCalls);
310        $this->assertSame("foo \\* bar\n", $codeCalls[0][1][0]);
311    }
312
313    function testSortValue()
314    {
315        $this->assertSame(200, (new GfmCode())->getSort());
316    }
317}
318