1<?php 2 3namespace dokuwiki\test\Parsing\ParserMode; 4 5use dokuwiki\Parsing\ParserMode\Eol; 6use dokuwiki\Parsing\ParserMode\GfmCode; 7 8/** 9 * Tests for GFM backtick-fenced code blocks (`GfmCode`). 10 */ 11class GfmCodeTest extends ParserTestBase 12{ 13 public function setUp(): void 14 { 15 parent::setUp(); 16 $this->setSyntax('md'); 17 } 18 19 /** 20 * Register the mode plus Eol. Order matters: the ParallelRegex 21 * alternates patterns in insertion order and leftmost-match picks the 22 * first alternative, so the block mode must be added before Eol 23 * (same effect ModeRegistry achieves in production via sort values). 24 */ 25 private function addModes(): void 26 { 27 $this->P->addMode('gfm_code', new GfmCode()); 28 $this->P->addMode('eol', new Eol()); 29 } 30 31 function testBasicBacktickFence() 32 { 33 $this->addModes(); 34 $this->P->parse("```\nhello\n```"); 35 $codeCalls = array_values(array_filter( 36 $this->H->calls, 37 static fn($c) => $c[0] === 'code' 38 )); 39 $this->assertCount(1, $codeCalls); 40 $this->assertSame("hello\n", $codeCalls[0][1][0]); 41 $this->assertNull($codeCalls[0][1][1]); 42 $this->assertNull($codeCalls[0][1][2]); 43 } 44 45 function testLanguageFromInfoString() 46 { 47 $this->addModes(); 48 $this->P->parse("```ruby\nx\n```"); 49 $codeCalls = array_values(array_filter( 50 $this->H->calls, 51 static fn($c) => $c[0] === 'code' 52 )); 53 $this->assertCount(1, $codeCalls); 54 $this->assertSame("x\n", $codeCalls[0][1][0]); 55 $this->assertSame('ruby', $codeCalls[0][1][1]); 56 } 57 58 function testLanguageIsFirstWord() 59 { 60 // GFM spec example 113: only the first token of the info string 61 // is treated as a language; extra junk is dropped. 62 $this->addModes(); 63 $this->P->parse("```ruby startline=3 \$%@#\$\nx\n```"); 64 $codeCalls = array_values(array_filter( 65 $this->H->calls, 66 static fn($c) => $c[0] === 'code' 67 )); 68 $this->assertCount(1, $codeCalls); 69 $this->assertSame('ruby', $codeCalls[0][1][1]); 70 } 71 72 function testBacktickInfoRejectsBackticks() 73 { 74 // GFM spec example 115: a backtick run with backticks in its 75 // info string is NOT a fence — stays for inline code parsing. 76 $this->addModes(); 77 $this->P->parse("``` aa ```\nfoo"); 78 $modes = array_column($this->H->calls, 0); 79 $this->assertNotContains('code', $modes, 80 'Backtick fence must reject backticks in info string'); 81 } 82 83 function testLongerCloseFenceIsValid() 84 { 85 // Opener 3, closer 5 — valid because closer is ≥ opener. 86 $this->addModes(); 87 $this->P->parse("```\naaa\n`````"); 88 $codeCalls = array_values(array_filter( 89 $this->H->calls, 90 static fn($c) => $c[0] === 'code' 91 )); 92 $this->assertCount(1, $codeCalls); 93 $this->assertSame("aaa\n", $codeCalls[0][1][0]); 94 } 95 96 function testIndentedFenceIsNotFence() 97 { 98 // Column-0-only policy: any leading space rejects the fence. 99 $this->addModes(); 100 $this->P->parse(" ```\nx\n ```"); 101 $modes = array_column($this->H->calls, 0); 102 $this->assertNotContains('code', $modes, 103 'Fence must start at column 0; indent is out of scope'); 104 } 105 106 function testUnclosedFenceStaysLiteral() 107 { 108 // An unclosed fence must not emit a code call — the ``` stays as 109 // paragraph text. Diverges from strict GFM (which would consume 110 // to EOF); see class docblock for the rationale. 111 $this->addModes(); 112 $this->P->parse("```\nabc\ndef"); 113 $modes = array_column($this->H->calls, 0); 114 $this->assertNotContains('code', $modes, 115 'Unclosed fences must stay literal, not emit code'); 116 } 117 118 function testEmptyBody() 119 { 120 $this->addModes(); 121 $this->P->parse("```\n```"); 122 $codeCalls = array_values(array_filter( 123 $this->H->calls, 124 static fn($c) => $c[0] === 'code' 125 )); 126 $this->assertCount(1, $codeCalls); 127 $this->assertSame('', $codeCalls[0][1][0]); 128 } 129 130 function testCloseWithTrailingSpaces() 131 { 132 $this->addModes(); 133 $this->P->parse("```\nx\n``` "); 134 $codeCalls = array_values(array_filter( 135 $this->H->calls, 136 static fn($c) => $c[0] === 'code' 137 )); 138 $this->assertCount(1, $codeCalls); 139 $this->assertSame("x\n", $codeCalls[0][1][0]); 140 } 141 142 function testCloseWithTrailingTabs() 143 { 144 $this->addModes(); 145 $this->P->parse("```\nx\n```\t\t"); 146 $codeCalls = array_values(array_filter( 147 $this->H->calls, 148 static fn($c) => $c[0] === 'code' 149 )); 150 $this->assertCount(1, $codeCalls); 151 $this->assertSame("x\n", $codeCalls[0][1][0]); 152 } 153 154 function testFenceInterruptsParagraph() 155 { 156 // GFM spec example 110: a fence doesn't need a blank line before 157 // it; the `code` instruction is block-level and paragraphs break. 158 $this->addModes(); 159 $this->P->parse("foo\n```\nbar\n```\nbaz"); 160 $codeCalls = array_values(array_filter( 161 $this->H->calls, 162 static fn($c) => $c[0] === 'code' 163 )); 164 $this->assertCount(1, $codeCalls); 165 $this->assertSame("bar\n", $codeCalls[0][1][0]); 166 } 167 168 function testEmptyInfoStringMeansNullLanguage() 169 { 170 $this->addModes(); 171 $this->P->parse("```\nx\n```"); 172 $codeCalls = array_values(array_filter( 173 $this->H->calls, 174 static fn($c) => $c[0] === 'code' 175 )); 176 $this->assertCount(1, $codeCalls); 177 $this->assertNull($codeCalls[0][1][1]); 178 } 179 180 function testInfoStringSpecialChar() 181 { 182 // GFM spec example 114: a semicolon is a valid language token. 183 $this->addModes(); 184 $this->P->parse("```;\n```"); 185 $codeCalls = array_values(array_filter( 186 $this->H->calls, 187 static fn($c) => $c[0] === 'code' 188 )); 189 $this->assertCount(1, $codeCalls); 190 $this->assertSame(';', $codeCalls[0][1][1]); 191 } 192 193 function testTildeLineDoesNotCloseBacktickFence() 194 { 195 $this->addModes(); 196 $this->P->parse("```\naaa\n~~~\nbbb\n```"); 197 $codeCalls = array_values(array_filter( 198 $this->H->calls, 199 static fn($c) => $c[0] === 'code' 200 )); 201 $this->assertCount(1, $codeCalls); 202 $this->assertSame("aaa\n~~~\nbbb\n", $codeCalls[0][1][0]); 203 } 204 205 function testFilenameAfterLanguage() 206 { 207 // DokuWiki's Code mode treats the second whitespace token as 208 // the filename (turns the block into a download link). GfmCode 209 // accepts the same vocabulary on the info string. 210 $this->addModes(); 211 $this->P->parse("```php myfile.php\n<?php\n```"); 212 $codeCalls = array_values(array_filter( 213 $this->H->calls, 214 static fn($c) => $c[0] === 'code' 215 )); 216 $this->assertCount(1, $codeCalls); 217 $this->assertSame('php', $codeCalls[0][1][1]); 218 $this->assertSame('myfile.php', $codeCalls[0][1][2]); 219 } 220 221 function testHtmlAliasedToHtml4Strict() 222 { 223 // Same GeSHi alias DokuWiki's Code mode applies. 224 $this->addModes(); 225 $this->P->parse("```html\n<p>\n```"); 226 $codeCalls = array_values(array_filter( 227 $this->H->calls, 228 static fn($c) => $c[0] === 'code' 229 )); 230 $this->assertCount(1, $codeCalls); 231 $this->assertSame('html4strict', $codeCalls[0][1][1]); 232 } 233 234 function testDashMeansNoLanguage() 235 { 236 // DokuWiki uses `-` as an explicit "no language" marker; lets 237 // a filename follow without a language argument first. 238 $this->addModes(); 239 $this->P->parse("```- somefile.txt\nx\n```"); 240 $codeCalls = array_values(array_filter( 241 $this->H->calls, 242 static fn($c) => $c[0] === 'code' 243 )); 244 $this->assertCount(1, $codeCalls); 245 $this->assertNull($codeCalls[0][1][1]); 246 $this->assertSame('somefile.txt', $codeCalls[0][1][2]); 247 } 248 249 function testHighlightOptions() 250 { 251 // DokuWiki uses space-separated keys inside `[...]`; comma 252 // separators inside a value survive (as GeSHi line lists). 253 $this->addModes(); 254 $this->P->parse("```php [enable_line_numbers start_line_numbers_at=\"10\"]\nx\n```"); 255 $codeCalls = array_values(array_filter( 256 $this->H->calls, 257 static fn($c) => $c[0] === 'code' 258 )); 259 $this->assertCount(1, $codeCalls); 260 $this->assertSame('php', $codeCalls[0][1][1]); 261 $this->assertNull($codeCalls[0][1][2]); 262 $this->assertCount(4, $codeCalls[0][1]); 263 $this->assertSame( 264 ['enable_line_numbers' => true, 'start_line_numbers_at' => 10], 265 $codeCalls[0][1][3] 266 ); 267 } 268 269 function testFilenameAndOptions() 270 { 271 $this->addModes(); 272 $this->P->parse("```php myfile.php [enable_line_numbers]\nx\n```"); 273 $codeCalls = array_values(array_filter( 274 $this->H->calls, 275 static fn($c) => $c[0] === 'code' 276 )); 277 $this->assertCount(1, $codeCalls); 278 $this->assertSame('php', $codeCalls[0][1][1]); 279 $this->assertSame('myfile.php', $codeCalls[0][1][2]); 280 $this->assertSame(['enable_line_numbers' => true], $codeCalls[0][1][3]); 281 } 282 283 function testInfoStringBackslashEscapeIsResolved() 284 { 285 // GFM §6.1 (spec example 322): backslash-escaped punctuation in 286 // the info string is unescaped before parseAttributes splits it 287 // into language / filename / options. 288 $this->addModes(); 289 $this->P->parse("```c\\#\nx\n```"); 290 $codeCalls = array_values(array_filter( 291 $this->H->calls, 292 static fn($c) => $c[0] === 'code' 293 )); 294 $this->assertCount(1, $codeCalls); 295 $this->assertSame('c#', $codeCalls[0][1][1]); 296 } 297 298 function testCodeBodyKeepsBackslashEscapes() 299 { 300 // The body of a fenced code block is captured verbatim — escapes 301 // inside it must NOT collapse (spec: escapes don't fire in code 302 // blocks). Only the info string is touched by Escape::unescape. 303 $this->addModes(); 304 $this->P->parse("```\nfoo \\* bar\n```"); 305 $codeCalls = array_values(array_filter( 306 $this->H->calls, 307 static fn($c) => $c[0] === 'code' 308 )); 309 $this->assertCount(1, $codeCalls); 310 $this->assertSame("foo \\* bar\n", $codeCalls[0][1][0]); 311 } 312 313 function testSortValue() 314 { 315 $this->assertSame(200, (new GfmCode())->getSort()); 316 } 317} 318