xref: /dokuwiki/_test/tests/Parsing/ParserMode/GfmCodeTest.php (revision 47a02a102092be9e1e6f1ddaf158bdfffdb13d4f)
1b1c59bedSAndreas Gohr<?php
2b1c59bedSAndreas Gohr
3b1c59bedSAndreas Gohrnamespace dokuwiki\test\Parsing\ParserMode;
4b1c59bedSAndreas Gohr
5b1c59bedSAndreas Gohruse dokuwiki\Parsing\ParserMode\Eol;
6b1c59bedSAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmCode;
7b1c59bedSAndreas Gohr
8b1c59bedSAndreas Gohr/**
9b1c59bedSAndreas Gohr * Tests for GFM backtick-fenced code blocks (`GfmCode`).
10b1c59bedSAndreas Gohr */
11b1c59bedSAndreas Gohrclass GfmCodeTest extends ParserTestBase
12b1c59bedSAndreas Gohr{
13b1c59bedSAndreas Gohr    public function setUp(): void
14b1c59bedSAndreas Gohr    {
15b1c59bedSAndreas Gohr        parent::setUp();
16*47a02a10SAndreas Gohr        $this->setSyntax('md');
17b1c59bedSAndreas Gohr    }
18b1c59bedSAndreas Gohr
19b1c59bedSAndreas Gohr    /**
20b1c59bedSAndreas Gohr     * Register the mode plus Eol. Order matters: the ParallelRegex
21b1c59bedSAndreas Gohr     * alternates patterns in insertion order and leftmost-match picks the
22b1c59bedSAndreas Gohr     * first alternative, so the block mode must be added before Eol
23b1c59bedSAndreas Gohr     * (same effect ModeRegistry achieves in production via sort values).
24b1c59bedSAndreas Gohr     */
25b1c59bedSAndreas Gohr    private function addModes(): void
26b1c59bedSAndreas Gohr    {
27b1c59bedSAndreas Gohr        $this->P->addMode('gfm_code', new GfmCode());
28b1c59bedSAndreas Gohr        $this->P->addMode('eol', new Eol());
29b1c59bedSAndreas Gohr    }
30b1c59bedSAndreas Gohr
31b1c59bedSAndreas Gohr    function testBasicBacktickFence()
32b1c59bedSAndreas Gohr    {
33b1c59bedSAndreas Gohr        $this->addModes();
34b1c59bedSAndreas Gohr        $this->P->parse("```\nhello\n```");
35b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
36b1c59bedSAndreas Gohr            $this->H->calls,
37b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
38b1c59bedSAndreas Gohr        ));
39b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
40b1c59bedSAndreas Gohr        $this->assertSame("hello\n", $codeCalls[0][1][0]);
41b1c59bedSAndreas Gohr        $this->assertNull($codeCalls[0][1][1]);
42b1c59bedSAndreas Gohr        $this->assertNull($codeCalls[0][1][2]);
43b1c59bedSAndreas Gohr    }
44b1c59bedSAndreas Gohr
45b1c59bedSAndreas Gohr    function testLanguageFromInfoString()
46b1c59bedSAndreas Gohr    {
47b1c59bedSAndreas Gohr        $this->addModes();
48b1c59bedSAndreas Gohr        $this->P->parse("```ruby\nx\n```");
49b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
50b1c59bedSAndreas Gohr            $this->H->calls,
51b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
52b1c59bedSAndreas Gohr        ));
53b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
54b1c59bedSAndreas Gohr        $this->assertSame("x\n", $codeCalls[0][1][0]);
55b1c59bedSAndreas Gohr        $this->assertSame('ruby', $codeCalls[0][1][1]);
56b1c59bedSAndreas Gohr    }
57b1c59bedSAndreas Gohr
58b1c59bedSAndreas Gohr    function testLanguageIsFirstWord()
59b1c59bedSAndreas Gohr    {
60b1c59bedSAndreas Gohr        // GFM spec example 113: only the first token of the info string
61b1c59bedSAndreas Gohr        // is treated as a language; extra junk is dropped.
62b1c59bedSAndreas Gohr        $this->addModes();
63b1c59bedSAndreas Gohr        $this->P->parse("```ruby startline=3 \$%@#\$\nx\n```");
64b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
65b1c59bedSAndreas Gohr            $this->H->calls,
66b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
67b1c59bedSAndreas Gohr        ));
68b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
69b1c59bedSAndreas Gohr        $this->assertSame('ruby', $codeCalls[0][1][1]);
70b1c59bedSAndreas Gohr    }
71b1c59bedSAndreas Gohr
72b1c59bedSAndreas Gohr    function testBacktickInfoRejectsBackticks()
73b1c59bedSAndreas Gohr    {
74b1c59bedSAndreas Gohr        // GFM spec example 115: a backtick run with backticks in its
75b1c59bedSAndreas Gohr        // info string is NOT a fence — stays for inline code parsing.
76b1c59bedSAndreas Gohr        $this->addModes();
77b1c59bedSAndreas Gohr        $this->P->parse("``` aa ```\nfoo");
78b1c59bedSAndreas Gohr        $modes = array_column($this->H->calls, 0);
79b1c59bedSAndreas Gohr        $this->assertNotContains('code', $modes,
80b1c59bedSAndreas Gohr            'Backtick fence must reject backticks in info string');
81b1c59bedSAndreas Gohr    }
82b1c59bedSAndreas Gohr
83b1c59bedSAndreas Gohr    function testLongerCloseFenceIsValid()
84b1c59bedSAndreas Gohr    {
85b1c59bedSAndreas Gohr        // Opener 3, closer 5 — valid because closer is ≥ opener.
86b1c59bedSAndreas Gohr        $this->addModes();
87b1c59bedSAndreas Gohr        $this->P->parse("```\naaa\n`````");
88b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
89b1c59bedSAndreas Gohr            $this->H->calls,
90b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
91b1c59bedSAndreas Gohr        ));
92b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
93b1c59bedSAndreas Gohr        $this->assertSame("aaa\n", $codeCalls[0][1][0]);
94b1c59bedSAndreas Gohr    }
95b1c59bedSAndreas Gohr
96b1c59bedSAndreas Gohr    function testIndentedFenceIsNotFence()
97b1c59bedSAndreas Gohr    {
98b1c59bedSAndreas Gohr        // Column-0-only policy: any leading space rejects the fence.
99b1c59bedSAndreas Gohr        $this->addModes();
100b1c59bedSAndreas Gohr        $this->P->parse(" ```\nx\n ```");
101b1c59bedSAndreas Gohr        $modes = array_column($this->H->calls, 0);
102b1c59bedSAndreas Gohr        $this->assertNotContains('code', $modes,
103b1c59bedSAndreas Gohr            'Fence must start at column 0; indent is out of scope');
104b1c59bedSAndreas Gohr    }
105b1c59bedSAndreas Gohr
106b1c59bedSAndreas Gohr    function testUnclosedFenceStaysLiteral()
107b1c59bedSAndreas Gohr    {
108b1c59bedSAndreas Gohr        // An unclosed fence must not emit a code call — the ``` stays as
109b1c59bedSAndreas Gohr        // paragraph text. Diverges from strict GFM (which would consume
110b1c59bedSAndreas Gohr        // to EOF); see class docblock for the rationale.
111b1c59bedSAndreas Gohr        $this->addModes();
112b1c59bedSAndreas Gohr        $this->P->parse("```\nabc\ndef");
113b1c59bedSAndreas Gohr        $modes = array_column($this->H->calls, 0);
114b1c59bedSAndreas Gohr        $this->assertNotContains('code', $modes,
115b1c59bedSAndreas Gohr            'Unclosed fences must stay literal, not emit code');
116b1c59bedSAndreas Gohr    }
117b1c59bedSAndreas Gohr
118b1c59bedSAndreas Gohr    function testEmptyBody()
119b1c59bedSAndreas Gohr    {
120b1c59bedSAndreas Gohr        $this->addModes();
121b1c59bedSAndreas Gohr        $this->P->parse("```\n```");
122b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
123b1c59bedSAndreas Gohr            $this->H->calls,
124b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
125b1c59bedSAndreas Gohr        ));
126b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
127b1c59bedSAndreas Gohr        $this->assertSame('', $codeCalls[0][1][0]);
128b1c59bedSAndreas Gohr    }
129b1c59bedSAndreas Gohr
130b1c59bedSAndreas Gohr    function testCloseWithTrailingSpaces()
131b1c59bedSAndreas Gohr    {
132b1c59bedSAndreas Gohr        $this->addModes();
133b1c59bedSAndreas Gohr        $this->P->parse("```\nx\n```   ");
134b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
135b1c59bedSAndreas Gohr            $this->H->calls,
136b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
137b1c59bedSAndreas Gohr        ));
138b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
139b1c59bedSAndreas Gohr        $this->assertSame("x\n", $codeCalls[0][1][0]);
140b1c59bedSAndreas Gohr    }
141b1c59bedSAndreas Gohr
142b1c59bedSAndreas Gohr    function testCloseWithTrailingTabs()
143b1c59bedSAndreas Gohr    {
144b1c59bedSAndreas Gohr        $this->addModes();
145b1c59bedSAndreas Gohr        $this->P->parse("```\nx\n```\t\t");
146b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
147b1c59bedSAndreas Gohr            $this->H->calls,
148b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
149b1c59bedSAndreas Gohr        ));
150b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
151b1c59bedSAndreas Gohr        $this->assertSame("x\n", $codeCalls[0][1][0]);
152b1c59bedSAndreas Gohr    }
153b1c59bedSAndreas Gohr
154b1c59bedSAndreas Gohr    function testFenceInterruptsParagraph()
155b1c59bedSAndreas Gohr    {
156b1c59bedSAndreas Gohr        // GFM spec example 110: a fence doesn't need a blank line before
157b1c59bedSAndreas Gohr        // it; the `code` instruction is block-level and paragraphs break.
158b1c59bedSAndreas Gohr        $this->addModes();
159b1c59bedSAndreas Gohr        $this->P->parse("foo\n```\nbar\n```\nbaz");
160b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
161b1c59bedSAndreas Gohr            $this->H->calls,
162b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
163b1c59bedSAndreas Gohr        ));
164b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
165b1c59bedSAndreas Gohr        $this->assertSame("bar\n", $codeCalls[0][1][0]);
166b1c59bedSAndreas Gohr    }
167b1c59bedSAndreas Gohr
168b1c59bedSAndreas Gohr    function testEmptyInfoStringMeansNullLanguage()
169b1c59bedSAndreas Gohr    {
170b1c59bedSAndreas Gohr        $this->addModes();
171b1c59bedSAndreas Gohr        $this->P->parse("```\nx\n```");
172b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
173b1c59bedSAndreas Gohr            $this->H->calls,
174b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
175b1c59bedSAndreas Gohr        ));
176b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
177b1c59bedSAndreas Gohr        $this->assertNull($codeCalls[0][1][1]);
178b1c59bedSAndreas Gohr    }
179b1c59bedSAndreas Gohr
180b1c59bedSAndreas Gohr    function testInfoStringSpecialChar()
181b1c59bedSAndreas Gohr    {
182b1c59bedSAndreas Gohr        // GFM spec example 114: a semicolon is a valid language token.
183b1c59bedSAndreas Gohr        $this->addModes();
184b1c59bedSAndreas Gohr        $this->P->parse("```;\n```");
185b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
186b1c59bedSAndreas Gohr            $this->H->calls,
187b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
188b1c59bedSAndreas Gohr        ));
189b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
190b1c59bedSAndreas Gohr        $this->assertSame(';', $codeCalls[0][1][1]);
191b1c59bedSAndreas Gohr    }
192b1c59bedSAndreas Gohr
193b1c59bedSAndreas Gohr    function testTildeLineDoesNotCloseBacktickFence()
194b1c59bedSAndreas Gohr    {
195b1c59bedSAndreas Gohr        $this->addModes();
196b1c59bedSAndreas Gohr        $this->P->parse("```\naaa\n~~~\nbbb\n```");
197b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
198b1c59bedSAndreas Gohr            $this->H->calls,
199b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
200b1c59bedSAndreas Gohr        ));
201b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
202b1c59bedSAndreas Gohr        $this->assertSame("aaa\n~~~\nbbb\n", $codeCalls[0][1][0]);
203b1c59bedSAndreas Gohr    }
204b1c59bedSAndreas Gohr
205b1c59bedSAndreas Gohr    function testFilenameAfterLanguage()
206b1c59bedSAndreas Gohr    {
207b1c59bedSAndreas Gohr        // DokuWiki's Code mode treats the second whitespace token as
208b1c59bedSAndreas Gohr        // the filename (turns the block into a download link). GfmCode
209b1c59bedSAndreas Gohr        // accepts the same vocabulary on the info string.
210b1c59bedSAndreas Gohr        $this->addModes();
211b1c59bedSAndreas Gohr        $this->P->parse("```php myfile.php\n<?php\n```");
212b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
213b1c59bedSAndreas Gohr            $this->H->calls,
214b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
215b1c59bedSAndreas Gohr        ));
216b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
217b1c59bedSAndreas Gohr        $this->assertSame('php', $codeCalls[0][1][1]);
218b1c59bedSAndreas Gohr        $this->assertSame('myfile.php', $codeCalls[0][1][2]);
219b1c59bedSAndreas Gohr    }
220b1c59bedSAndreas Gohr
221b1c59bedSAndreas Gohr    function testHtmlAliasedToHtml4Strict()
222b1c59bedSAndreas Gohr    {
223b1c59bedSAndreas Gohr        // Same GeSHi alias DokuWiki's Code mode applies.
224b1c59bedSAndreas Gohr        $this->addModes();
225b1c59bedSAndreas Gohr        $this->P->parse("```html\n<p>\n```");
226b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
227b1c59bedSAndreas Gohr            $this->H->calls,
228b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
229b1c59bedSAndreas Gohr        ));
230b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
231b1c59bedSAndreas Gohr        $this->assertSame('html4strict', $codeCalls[0][1][1]);
232b1c59bedSAndreas Gohr    }
233b1c59bedSAndreas Gohr
234b1c59bedSAndreas Gohr    function testDashMeansNoLanguage()
235b1c59bedSAndreas Gohr    {
236b1c59bedSAndreas Gohr        // DokuWiki uses `-` as an explicit "no language" marker; lets
237b1c59bedSAndreas Gohr        // a filename follow without a language argument first.
238b1c59bedSAndreas Gohr        $this->addModes();
239b1c59bedSAndreas Gohr        $this->P->parse("```- somefile.txt\nx\n```");
240b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
241b1c59bedSAndreas Gohr            $this->H->calls,
242b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
243b1c59bedSAndreas Gohr        ));
244b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
245b1c59bedSAndreas Gohr        $this->assertNull($codeCalls[0][1][1]);
246b1c59bedSAndreas Gohr        $this->assertSame('somefile.txt', $codeCalls[0][1][2]);
247b1c59bedSAndreas Gohr    }
248b1c59bedSAndreas Gohr
249b1c59bedSAndreas Gohr    function testHighlightOptions()
250b1c59bedSAndreas Gohr    {
251b1c59bedSAndreas Gohr        // DokuWiki uses space-separated keys inside `[...]`; comma
252b1c59bedSAndreas Gohr        // separators inside a value survive (as GeSHi line lists).
253b1c59bedSAndreas Gohr        $this->addModes();
254b1c59bedSAndreas Gohr        $this->P->parse("```php [enable_line_numbers start_line_numbers_at=\"10\"]\nx\n```");
255b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
256b1c59bedSAndreas Gohr            $this->H->calls,
257b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
258b1c59bedSAndreas Gohr        ));
259b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
260b1c59bedSAndreas Gohr        $this->assertSame('php', $codeCalls[0][1][1]);
261b1c59bedSAndreas Gohr        $this->assertNull($codeCalls[0][1][2]);
262b1c59bedSAndreas Gohr        $this->assertCount(4, $codeCalls[0][1]);
263b1c59bedSAndreas Gohr        $this->assertSame(
264b1c59bedSAndreas Gohr            ['enable_line_numbers' => true, 'start_line_numbers_at' => 10],
265b1c59bedSAndreas Gohr            $codeCalls[0][1][3]
266b1c59bedSAndreas Gohr        );
267b1c59bedSAndreas Gohr    }
268b1c59bedSAndreas Gohr
269b1c59bedSAndreas Gohr    function testFilenameAndOptions()
270b1c59bedSAndreas Gohr    {
271b1c59bedSAndreas Gohr        $this->addModes();
272b1c59bedSAndreas Gohr        $this->P->parse("```php myfile.php [enable_line_numbers]\nx\n```");
273b1c59bedSAndreas Gohr        $codeCalls = array_values(array_filter(
274b1c59bedSAndreas Gohr            $this->H->calls,
275b1c59bedSAndreas Gohr            static fn($c) => $c[0] === 'code'
276b1c59bedSAndreas Gohr        ));
277b1c59bedSAndreas Gohr        $this->assertCount(1, $codeCalls);
278b1c59bedSAndreas Gohr        $this->assertSame('php', $codeCalls[0][1][1]);
279b1c59bedSAndreas Gohr        $this->assertSame('myfile.php', $codeCalls[0][1][2]);
280b1c59bedSAndreas Gohr        $this->assertSame(['enable_line_numbers' => true], $codeCalls[0][1][3]);
281b1c59bedSAndreas Gohr    }
282b1c59bedSAndreas Gohr
28374031e46SAndreas Gohr    function testInfoStringBackslashEscapeIsResolved()
28474031e46SAndreas Gohr    {
28574031e46SAndreas Gohr        // GFM §6.1 (spec example 322): backslash-escaped punctuation in
28674031e46SAndreas Gohr        // the info string is unescaped before parseAttributes splits it
28774031e46SAndreas Gohr        // into language / filename / options.
28874031e46SAndreas Gohr        $this->addModes();
28974031e46SAndreas Gohr        $this->P->parse("```c\\#\nx\n```");
29074031e46SAndreas Gohr        $codeCalls = array_values(array_filter(
29174031e46SAndreas Gohr            $this->H->calls,
29274031e46SAndreas Gohr            static fn($c) => $c[0] === 'code'
29374031e46SAndreas Gohr        ));
29474031e46SAndreas Gohr        $this->assertCount(1, $codeCalls);
29574031e46SAndreas Gohr        $this->assertSame('c#', $codeCalls[0][1][1]);
29674031e46SAndreas Gohr    }
29774031e46SAndreas Gohr
29874031e46SAndreas Gohr    function testCodeBodyKeepsBackslashEscapes()
29974031e46SAndreas Gohr    {
30074031e46SAndreas Gohr        // The body of a fenced code block is captured verbatim — escapes
30174031e46SAndreas Gohr        // inside it must NOT collapse (spec: escapes don't fire in code
30274031e46SAndreas Gohr        // blocks). Only the info string is touched by Escape::unescape.
30374031e46SAndreas Gohr        $this->addModes();
30474031e46SAndreas Gohr        $this->P->parse("```\nfoo \\* bar\n```");
30574031e46SAndreas Gohr        $codeCalls = array_values(array_filter(
30674031e46SAndreas Gohr            $this->H->calls,
30774031e46SAndreas Gohr            static fn($c) => $c[0] === 'code'
30874031e46SAndreas Gohr        ));
30974031e46SAndreas Gohr        $this->assertCount(1, $codeCalls);
31074031e46SAndreas Gohr        $this->assertSame("foo \\* bar\n", $codeCalls[0][1][0]);
31174031e46SAndreas Gohr    }
31274031e46SAndreas Gohr
313b1c59bedSAndreas Gohr    function testSortValue()
314b1c59bedSAndreas Gohr    {
315b1c59bedSAndreas Gohr        $this->assertSame(200, (new GfmCode())->getSort());
316b1c59bedSAndreas Gohr    }
317b1c59bedSAndreas Gohr}
318