xref: /dokuwiki/_test/tests/Parsing/ParserMode/GfmCodeTest.php (revision 74031e463764923581b9204cebc0fc3f34ce881f)
1b1c59bedSAndreas Gohr<?php
2b1c59bedSAndreas Gohr
3b1c59bedSAndreas Gohrnamespace dokuwiki\test\Parsing\ParserMode;
4b1c59bedSAndreas Gohr
5b1c59bedSAndreas Gohruse dokuwiki\Parsing\ModeRegistry;
6b1c59bedSAndreas Gohruse dokuwiki\Parsing\ParserMode\Eol;
7b1c59bedSAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmCode;
8b1c59bedSAndreas Gohr
9b1c59bedSAndreas Gohr/**
10b1c59bedSAndreas Gohr * Tests for GFM backtick-fenced code blocks (`GfmCode`).
11b1c59bedSAndreas Gohr */
12b1c59bedSAndreas Gohrclass GfmCodeTest extends ParserTestBase
13b1c59bedSAndreas Gohr{
14b1c59bedSAndreas Gohr    public function setUp(): void
15b1c59bedSAndreas Gohr    {
16b1c59bedSAndreas Gohr        parent::setUp();
17b1c59bedSAndreas Gohr        global $conf;
18b1c59bedSAndreas Gohr        $conf['syntax'] = 'markdown';
19b1c59bedSAndreas Gohr        ModeRegistry::reset();
20b1c59bedSAndreas Gohr    }
21b1c59bedSAndreas Gohr
22b1c59bedSAndreas Gohr    public function tearDown(): void
23b1c59bedSAndreas Gohr    {
24b1c59bedSAndreas Gohr        ModeRegistry::reset();
25b1c59bedSAndreas Gohr        parent::tearDown();
26b1c59bedSAndreas Gohr    }
27b1c59bedSAndreas Gohr
28b1c59bedSAndreas Gohr    /**
29b1c59bedSAndreas Gohr     * Register the mode plus Eol. Order matters: the ParallelRegex
30b1c59bedSAndreas Gohr     * alternates patterns in insertion order and leftmost-match picks the
31b1c59bedSAndreas Gohr     * first alternative, so the block mode must be added before Eol
32b1c59bedSAndreas Gohr     * (same effect ModeRegistry achieves in production via sort values).
33b1c59bedSAndreas Gohr     */
34b1c59bedSAndreas Gohr    private function addModes(): void
35b1c59bedSAndreas Gohr    {
36b1c59bedSAndreas Gohr        $this->P->addMode('gfm_code', new GfmCode());
37b1c59bedSAndreas Gohr        $this->P->addMode('eol', new Eol());
38b1c59bedSAndreas Gohr    }
39b1c59bedSAndreas Gohr
40b1c59bedSAndreas Gohr    function testBasicBacktickFence()
41b1c59bedSAndreas Gohr    {
42b1c59bedSAndreas Gohr        $this->addModes();
43b1c59bedSAndreas Gohr        $this->P->parse("```\nhello\n```");
44b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
45b1c59bedSAndreas Gohr            $this->H->calls,
46b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
47b1c59bedSAndreas Gohr        ));
48b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
49b1c59bedSAndreas Gohr        $this->assertSame("hello\n", $codeCalls[0][1][0]);
50b1c59bedSAndreas Gohr        $this->assertNull($codeCalls[0][1][1]);
51b1c59bedSAndreas Gohr        $this->assertNull($codeCalls[0][1][2]);
52b1c59bedSAndreas Gohr    }
53b1c59bedSAndreas Gohr
54b1c59bedSAndreas Gohr    function testLanguageFromInfoString()
55b1c59bedSAndreas Gohr    {
56b1c59bedSAndreas Gohr        $this->addModes();
57b1c59bedSAndreas Gohr        $this->P->parse("```ruby\nx\n```");
58b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
59b1c59bedSAndreas Gohr            $this->H->calls,
60b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
61b1c59bedSAndreas Gohr        ));
62b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
63b1c59bedSAndreas Gohr        $this->assertSame("x\n", $codeCalls[0][1][0]);
64b1c59bedSAndreas Gohr        $this->assertSame('ruby', $codeCalls[0][1][1]);
65b1c59bedSAndreas Gohr    }
66b1c59bedSAndreas Gohr
67b1c59bedSAndreas Gohr    function testLanguageIsFirstWord()
68b1c59bedSAndreas Gohr    {
69b1c59bedSAndreas Gohr        // GFM spec example 113: only the first token of the info string
70b1c59bedSAndreas Gohr        // is treated as a language; extra junk is dropped.
71b1c59bedSAndreas Gohr        $this->addModes();
72b1c59bedSAndreas Gohr        $this->P->parse("```ruby startline=3 \$%@#\$\nx\n```");
73b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
74b1c59bedSAndreas Gohr            $this->H->calls,
75b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
76b1c59bedSAndreas Gohr        ));
77b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
78b1c59bedSAndreas Gohr        $this->assertSame('ruby', $codeCalls[0][1][1]);
79b1c59bedSAndreas Gohr    }
80b1c59bedSAndreas Gohr
81b1c59bedSAndreas Gohr    function testBacktickInfoRejectsBackticks()
82b1c59bedSAndreas Gohr    {
83b1c59bedSAndreas Gohr        // GFM spec example 115: a backtick run with backticks in its
84b1c59bedSAndreas Gohr        // info string is NOT a fence — stays for inline code parsing.
85b1c59bedSAndreas Gohr        $this->addModes();
86b1c59bedSAndreas Gohr        $this->P->parse("``` aa ```\nfoo");
87b1c59bedSAndreas Gohr        $modes = array_column($this->H->calls, 0);
88b1c59bedSAndreas Gohr        $this->assertNotContains('code', $modes,
89b1c59bedSAndreas Gohr            'Backtick fence must reject backticks in info string');
90b1c59bedSAndreas Gohr    }
91b1c59bedSAndreas Gohr
92b1c59bedSAndreas Gohr    function testLongerCloseFenceIsValid()
93b1c59bedSAndreas Gohr    {
94b1c59bedSAndreas Gohr        // Opener 3, closer 5 — valid because closer is ≥ opener.
95b1c59bedSAndreas Gohr        $this->addModes();
96b1c59bedSAndreas Gohr        $this->P->parse("```\naaa\n`````");
97b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
98b1c59bedSAndreas Gohr            $this->H->calls,
99b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
100b1c59bedSAndreas Gohr        ));
101b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
102b1c59bedSAndreas Gohr        $this->assertSame("aaa\n", $codeCalls[0][1][0]);
103b1c59bedSAndreas Gohr    }
104b1c59bedSAndreas Gohr
105b1c59bedSAndreas Gohr    function testIndentedFenceIsNotFence()
106b1c59bedSAndreas Gohr    {
107b1c59bedSAndreas Gohr        // Column-0-only policy: any leading space rejects the fence.
108b1c59bedSAndreas Gohr        $this->addModes();
109b1c59bedSAndreas Gohr        $this->P->parse(" ```\nx\n ```");
110b1c59bedSAndreas Gohr        $modes = array_column($this->H->calls, 0);
111b1c59bedSAndreas Gohr        $this->assertNotContains('code', $modes,
112b1c59bedSAndreas Gohr            'Fence must start at column 0; indent is out of scope');
113b1c59bedSAndreas Gohr    }
114b1c59bedSAndreas Gohr
115b1c59bedSAndreas Gohr    function testUnclosedFenceStaysLiteral()
116b1c59bedSAndreas Gohr    {
117b1c59bedSAndreas Gohr        // An unclosed fence must not emit a code call — the ``` stays as
118b1c59bedSAndreas Gohr        // paragraph text. Diverges from strict GFM (which would consume
119b1c59bedSAndreas Gohr        // to EOF); see class docblock for the rationale.
120b1c59bedSAndreas Gohr        $this->addModes();
121b1c59bedSAndreas Gohr        $this->P->parse("```\nabc\ndef");
122b1c59bedSAndreas Gohr        $modes = array_column($this->H->calls, 0);
123b1c59bedSAndreas Gohr        $this->assertNotContains('code', $modes,
124b1c59bedSAndreas Gohr            'Unclosed fences must stay literal, not emit code');
125b1c59bedSAndreas Gohr    }
126b1c59bedSAndreas Gohr
127b1c59bedSAndreas Gohr    function testEmptyBody()
128b1c59bedSAndreas Gohr    {
129b1c59bedSAndreas Gohr        $this->addModes();
130b1c59bedSAndreas Gohr        $this->P->parse("```\n```");
131b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
132b1c59bedSAndreas Gohr            $this->H->calls,
133b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
134b1c59bedSAndreas Gohr        ));
135b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
136b1c59bedSAndreas Gohr        $this->assertSame('', $codeCalls[0][1][0]);
137b1c59bedSAndreas Gohr    }
138b1c59bedSAndreas Gohr
139b1c59bedSAndreas Gohr    function testCloseWithTrailingSpaces()
140b1c59bedSAndreas Gohr    {
141b1c59bedSAndreas Gohr        $this->addModes();
142b1c59bedSAndreas Gohr        $this->P->parse("```\nx\n```   ");
143b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
144b1c59bedSAndreas Gohr            $this->H->calls,
145b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
146b1c59bedSAndreas Gohr        ));
147b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
148b1c59bedSAndreas Gohr        $this->assertSame("x\n", $codeCalls[0][1][0]);
149b1c59bedSAndreas Gohr    }
150b1c59bedSAndreas Gohr
151b1c59bedSAndreas Gohr    function testCloseWithTrailingTabs()
152b1c59bedSAndreas Gohr    {
153b1c59bedSAndreas Gohr        $this->addModes();
154b1c59bedSAndreas Gohr        $this->P->parse("```\nx\n```\t\t");
155b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
156b1c59bedSAndreas Gohr            $this->H->calls,
157b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
158b1c59bedSAndreas Gohr        ));
159b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
160b1c59bedSAndreas Gohr        $this->assertSame("x\n", $codeCalls[0][1][0]);
161b1c59bedSAndreas Gohr    }
162b1c59bedSAndreas Gohr
163b1c59bedSAndreas Gohr    function testFenceInterruptsParagraph()
164b1c59bedSAndreas Gohr    {
165b1c59bedSAndreas Gohr        // GFM spec example 110: a fence doesn't need a blank line before
166b1c59bedSAndreas Gohr        // it; the `code` instruction is block-level and paragraphs break.
167b1c59bedSAndreas Gohr        $this->addModes();
168b1c59bedSAndreas Gohr        $this->P->parse("foo\n```\nbar\n```\nbaz");
169b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
170b1c59bedSAndreas Gohr            $this->H->calls,
171b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
172b1c59bedSAndreas Gohr        ));
173b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
174b1c59bedSAndreas Gohr        $this->assertSame("bar\n", $codeCalls[0][1][0]);
175b1c59bedSAndreas Gohr    }
176b1c59bedSAndreas Gohr
177b1c59bedSAndreas Gohr    function testEmptyInfoStringMeansNullLanguage()
178b1c59bedSAndreas Gohr    {
179b1c59bedSAndreas Gohr        $this->addModes();
180b1c59bedSAndreas Gohr        $this->P->parse("```\nx\n```");
181b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
182b1c59bedSAndreas Gohr            $this->H->calls,
183b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
184b1c59bedSAndreas Gohr        ));
185b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
186b1c59bedSAndreas Gohr        $this->assertNull($codeCalls[0][1][1]);
187b1c59bedSAndreas Gohr    }
188b1c59bedSAndreas Gohr
189b1c59bedSAndreas Gohr    function testInfoStringSpecialChar()
190b1c59bedSAndreas Gohr    {
191b1c59bedSAndreas Gohr        // GFM spec example 114: a semicolon is a valid language token.
192b1c59bedSAndreas Gohr        $this->addModes();
193b1c59bedSAndreas Gohr        $this->P->parse("```;\n```");
194b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
195b1c59bedSAndreas Gohr            $this->H->calls,
196b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
197b1c59bedSAndreas Gohr        ));
198b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
199b1c59bedSAndreas Gohr        $this->assertSame(';', $codeCalls[0][1][1]);
200b1c59bedSAndreas Gohr    }
201b1c59bedSAndreas Gohr
202b1c59bedSAndreas Gohr    function testTildeLineDoesNotCloseBacktickFence()
203b1c59bedSAndreas Gohr    {
204b1c59bedSAndreas Gohr        $this->addModes();
205b1c59bedSAndreas Gohr        $this->P->parse("```\naaa\n~~~\nbbb\n```");
206b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
207b1c59bedSAndreas Gohr            $this->H->calls,
208b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
209b1c59bedSAndreas Gohr        ));
210b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
211b1c59bedSAndreas Gohr        $this->assertSame("aaa\n~~~\nbbb\n", $codeCalls[0][1][0]);
212b1c59bedSAndreas Gohr    }
213b1c59bedSAndreas Gohr
214b1c59bedSAndreas Gohr    function testFilenameAfterLanguage()
215b1c59bedSAndreas Gohr    {
216b1c59bedSAndreas Gohr        // DokuWiki's Code mode treats the second whitespace token as
217b1c59bedSAndreas Gohr        // the filename (turns the block into a download link). GfmCode
218b1c59bedSAndreas Gohr        // accepts the same vocabulary on the info string.
219b1c59bedSAndreas Gohr        $this->addModes();
220b1c59bedSAndreas Gohr        $this->P->parse("```php myfile.php\n<?php\n```");
221b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
222b1c59bedSAndreas Gohr            $this->H->calls,
223b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
224b1c59bedSAndreas Gohr        ));
225b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
226b1c59bedSAndreas Gohr        $this->assertSame('php', $codeCalls[0][1][1]);
227b1c59bedSAndreas Gohr        $this->assertSame('myfile.php', $codeCalls[0][1][2]);
228b1c59bedSAndreas Gohr    }
229b1c59bedSAndreas Gohr
230b1c59bedSAndreas Gohr    function testHtmlAliasedToHtml4Strict()
231b1c59bedSAndreas Gohr    {
232b1c59bedSAndreas Gohr        // Same GeSHi alias DokuWiki's Code mode applies.
233b1c59bedSAndreas Gohr        $this->addModes();
234b1c59bedSAndreas Gohr        $this->P->parse("```html\n<p>\n```");
235b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
236b1c59bedSAndreas Gohr            $this->H->calls,
237b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
238b1c59bedSAndreas Gohr        ));
239b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
240b1c59bedSAndreas Gohr        $this->assertSame('html4strict', $codeCalls[0][1][1]);
241b1c59bedSAndreas Gohr    }
242b1c59bedSAndreas Gohr
243b1c59bedSAndreas Gohr    function testDashMeansNoLanguage()
244b1c59bedSAndreas Gohr    {
245b1c59bedSAndreas Gohr        // DokuWiki uses `-` as an explicit "no language" marker; lets
246b1c59bedSAndreas Gohr        // a filename follow without a language argument first.
247b1c59bedSAndreas Gohr        $this->addModes();
248b1c59bedSAndreas Gohr        $this->P->parse("```- somefile.txt\nx\n```");
249b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
250b1c59bedSAndreas Gohr            $this->H->calls,
251b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
252b1c59bedSAndreas Gohr        ));
253b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
254b1c59bedSAndreas Gohr        $this->assertNull($codeCalls[0][1][1]);
255b1c59bedSAndreas Gohr        $this->assertSame('somefile.txt', $codeCalls[0][1][2]);
256b1c59bedSAndreas Gohr    }
257b1c59bedSAndreas Gohr
258b1c59bedSAndreas Gohr    function testHighlightOptions()
259b1c59bedSAndreas Gohr    {
260b1c59bedSAndreas Gohr        // DokuWiki uses space-separated keys inside `[...]`; comma
261b1c59bedSAndreas Gohr        // separators inside a value survive (as GeSHi line lists).
262b1c59bedSAndreas Gohr        $this->addModes();
263b1c59bedSAndreas Gohr        $this->P->parse("```php [enable_line_numbers start_line_numbers_at=\"10\"]\nx\n```");
264b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
265b1c59bedSAndreas Gohr            $this->H->calls,
266b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
267b1c59bedSAndreas Gohr        ));
268b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
269b1c59bedSAndreas Gohr        $this->assertSame('php', $codeCalls[0][1][1]);
270b1c59bedSAndreas Gohr        $this->assertNull($codeCalls[0][1][2]);
271b1c59bedSAndreas Gohr        $this->assertCount(4, $codeCalls[0][1]);
272b1c59bedSAndreas Gohr        $this->assertSame(
273b1c59bedSAndreas Gohr            ['enable_line_numbers' => true, 'start_line_numbers_at' => 10],
274b1c59bedSAndreas Gohr            $codeCalls[0][1][3]
275b1c59bedSAndreas Gohr        );
276b1c59bedSAndreas Gohr    }
277b1c59bedSAndreas Gohr
278b1c59bedSAndreas Gohr    function testFilenameAndOptions()
279b1c59bedSAndreas Gohr    {
280b1c59bedSAndreas Gohr        $this->addModes();
281b1c59bedSAndreas Gohr        $this->P->parse("```php myfile.php [enable_line_numbers]\nx\n```");
282b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
283b1c59bedSAndreas Gohr            $this->H->calls,
284b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
285b1c59bedSAndreas Gohr        ));
286b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
287b1c59bedSAndreas Gohr        $this->assertSame('php', $codeCalls[0][1][1]);
288b1c59bedSAndreas Gohr        $this->assertSame('myfile.php', $codeCalls[0][1][2]);
289b1c59bedSAndreas Gohr        $this->assertSame(['enable_line_numbers' => true], $codeCalls[0][1][3]);
290b1c59bedSAndreas Gohr    }
291b1c59bedSAndreas Gohr
292*74031e46SAndreas Gohr    function testInfoStringBackslashEscapeIsResolved()
293*74031e46SAndreas Gohr    {
294*74031e46SAndreas Gohr        // GFM §6.1 (spec example 322): backslash-escaped punctuation in
295*74031e46SAndreas Gohr        // the info string is unescaped before parseAttributes splits it
296*74031e46SAndreas Gohr        // into language / filename / options.
297*74031e46SAndreas Gohr        $this->addModes();
298*74031e46SAndreas Gohr        $this->P->parse("```c\\#\nx\n```");
299*74031e46SAndreas Gohr        $codeCalls = array_values(array_filter(
300*74031e46SAndreas Gohr            $this->H->calls,
301*74031e46SAndreas Gohr            static fn($c) => $c[0] === 'code'
302*74031e46SAndreas Gohr        ));
303*74031e46SAndreas Gohr        $this->assertCount(1, $codeCalls);
304*74031e46SAndreas Gohr        $this->assertSame('c#', $codeCalls[0][1][1]);
305*74031e46SAndreas Gohr    }
306*74031e46SAndreas Gohr
307*74031e46SAndreas Gohr    function testCodeBodyKeepsBackslashEscapes()
308*74031e46SAndreas Gohr    {
309*74031e46SAndreas Gohr        // The body of a fenced code block is captured verbatim — escapes
310*74031e46SAndreas Gohr        // inside it must NOT collapse (spec: escapes don't fire in code
311*74031e46SAndreas Gohr        // blocks). Only the info string is touched by Escape::unescape.
312*74031e46SAndreas Gohr        $this->addModes();
313*74031e46SAndreas Gohr        $this->P->parse("```\nfoo \\* bar\n```");
314*74031e46SAndreas Gohr        $codeCalls = array_values(array_filter(
315*74031e46SAndreas Gohr            $this->H->calls,
316*74031e46SAndreas Gohr            static fn($c) => $c[0] === 'code'
317*74031e46SAndreas Gohr        ));
318*74031e46SAndreas Gohr        $this->assertCount(1, $codeCalls);
319*74031e46SAndreas Gohr        $this->assertSame("foo \\* bar\n", $codeCalls[0][1][0]);
320*74031e46SAndreas Gohr    }
321*74031e46SAndreas Gohr
322b1c59bedSAndreas Gohr    function testSortValue()
323b1c59bedSAndreas Gohr    {
324b1c59bedSAndreas Gohr        $this->assertSame(200, (new GfmCode())->getSort());
325b1c59bedSAndreas Gohr    }
326b1c59bedSAndreas Gohr}
327