1<?php 2 3namespace dokuwiki\test\Parsing\ParserMode; 4 5use dokuwiki\Parsing\ModeRegistry; 6use dokuwiki\Parsing\ParserMode\GfmBacktickSingle; 7use dokuwiki\Parsing\ParserMode\GfmEmphasis; 8use dokuwiki\Parsing\ParserMode\GfmEscape; 9use dokuwiki\Parsing\ParserMode\GfmHeader; 10use dokuwiki\Parsing\ParserMode\Linebreak; 11 12/** 13 * Tests for the GFM backslash-escape mode. 14 */ 15class GfmEscapeTest extends ParserTestBase 16{ 17 public function setUp(): void 18 { 19 parent::setUp(); 20 global $conf; 21 $conf['syntax'] = 'md'; 22 ModeRegistry::reset(); 23 } 24 25 public function tearDown(): void 26 { 27 ModeRegistry::reset(); 28 parent::tearDown(); 29 } 30 31 /** 32 * Every ASCII punctuation character is escapable per GFM §6.1. 33 * 34 * @dataProvider provideEscapableChars 35 */ 36 function testEscapableAsciiPunctuationProducesLiteral(string $char) 37 { 38 $this->P->addMode('gfm_escape', new GfmEscape()); 39 $this->P->parse('foo \\' . $char . ' bar'); 40 41 $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 42 $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 43 44 $this->assertSame("\nfoo " . $char . ' bar', $joined, 45 "Escaped {$char} must collapse to the literal char in cdata stream"); 46 } 47 48 public static function provideEscapableChars(): array 49 { 50 $chars = str_split('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'); 51 return array_combine( 52 array_map(static fn($c) => 'char_' . bin2hex($c), $chars), 53 array_map(static fn($c) => [$c], $chars), 54 ); 55 } 56 57 /** 58 * Backslash before non-ASCII-punctuation stays literal — letters, 59 * digits, multibyte chars, spaces, and tabs are not escapable. The 60 * pattern simply doesn't match, so the bytes flow through as cdata. 61 * 62 * @dataProvider provideNonEscapableChars 63 */ 64 function testNonEscapableCharsKeepBackslash(string $tail) 65 { 66 $this->P->addMode('gfm_escape', new GfmEscape()); 67 $this->P->parse('a \\' . $tail . ' b'); 68 69 $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 70 $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 71 72 $this->assertSame("\na \\" . $tail . ' b', $joined); 73 } 74 75 public static function provideNonEscapableChars(): array 76 { 77 return [ 78 'letter_upper' => ['A'], 79 'letter_lower' => ['a'], 80 'digit' => ['3'], 81 'multibyte' => ['α'], 82 'space' => [' '], 83 'tab' => ["\t"], 84 ]; 85 } 86 87 function testDoubleBackslashCollapsesToSingleBackslash() 88 { 89 // \\ is the escaped-backslash form. The first char in the match 90 // is consumed as the escape introducer; the second is emitted as 91 // a literal backslash. 92 $this->P->addMode('gfm_escape', new GfmEscape()); 93 $this->P->parse('foo \\\\ bar'); 94 95 $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 96 $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 97 98 $this->assertSame("\nfoo \\ bar", $joined); 99 } 100 101 function testEscapedAsteriskBlocksEmphasis() 102 { 103 // GFM spec example 310 fragment. \* must consume the asterisk 104 // before GfmEmphasis can use it as an opener. 105 $this->P->addMode('gfm_escape', new GfmEscape()); 106 $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 107 $this->P->parse('\\*not emphasized*'); 108 109 $modes = array_column($this->H->calls, 0); 110 $this->assertNotContains('emphasis_open', $modes, 111 'Escaped opener must not start emphasis'); 112 } 113 114 function testEscapedBackslashThenEmphasisOpens() 115 { 116 // GFM spec example 311. \\ collapses to a literal backslash, and 117 // the *emphasis* that follows is now seen by GfmEmphasis with 118 // its full text intact. 119 $this->P->addMode('gfm_escape', new GfmEscape()); 120 $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 121 $this->P->parse('\\\\*emphasis*'); 122 123 $modes = array_column($this->H->calls, 0); 124 $this->assertContains('emphasis_open', $modes, 125 'After \\\\ collapses, the surviving *emphasis* must open emphasis'); 126 } 127 128 function testEscapedHashBlocksHeader() 129 { 130 // \# must defeat GfmHeader's column-0 # match. The trailing text 131 // becomes a normal paragraph instead. 132 $this->P->addMode('gfm_escape', new GfmEscape()); 133 $this->P->addMode('gfm_header', new GfmHeader()); 134 $this->P->parse("\\# not a heading"); 135 136 $modes = array_column($this->H->calls, 0); 137 $this->assertNotContains('header', $modes, 138 'Escaped # must not produce a header'); 139 } 140 141 function testNoEscapeInsideBacktickSpan() 142 { 143 // GFM spec example 313. The whole `\[\`` is captured by 144 // GfmBacktickSingle in one regex shot, so GfmEscape never runs 145 // on its body. The body must retain the literal backslashes. 146 $this->P->addMode('gfm_escape', new GfmEscape()); 147 $this->P->addMode('gfm_backtick_single', new GfmBacktickSingle()); 148 $this->P->parse('`\\[\\`'); 149 150 $unformatted = array_filter($this->H->calls, static fn($c) => $c[0] === 'unformatted'); 151 $bodies = array_map(static fn($c) => $c[1][0], $unformatted); 152 $this->assertContains('\\[\\', $bodies, 153 'Backtick span body must preserve the literal backslashes'); 154 } 155 156 function testSortValue() 157 { 158 $mode = new GfmEscape(); 159 $this->assertSame(5, $mode->getSort()); 160 } 161 162 /** 163 * In pure `md` mode, `\\` before a newline still escapes to a literal 164 * backslash per GFM §6.1 — no DW Linebreak is loaded to defer to. 165 */ 166 function testDoubleBackslashBeforeNewlineEscapesInPureMd() 167 { 168 $this->P->addMode('gfm_escape', new GfmEscape()); 169 $this->P->parse("foo \\\\\nbar"); 170 171 $names = array_column($this->H->calls, 0); 172 $this->assertNotContains('linebreak', $names, 173 'No DW Linebreak is loaded in pure md mode — `\\\\\\n` must stay an escape'); 174 175 $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 176 $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 177 $this->assertSame("\nfoo \\\nbar", $joined, 178 '`\\\\` collapses to a literal backslash; the newline survives as cdata'); 179 } 180 181 /** 182 * In any DW-loaded mode (`dw+md` / `md+dw`), `\\` before a space, tab, 183 * or newline must defer to DW's Linebreak mode. GfmEscape would 184 * otherwise consume those two bytes first (sort 5 vs Linebreak's 140) 185 * and the forced linebreak would never fire. 186 * 187 * @dataProvider provideDwLoadedSyntaxes 188 */ 189 function testDoubleBackslashBeforeNewlineDefersToLinebreakWhenDwLoaded(string $syntax) 190 { 191 global $conf; 192 $conf['syntax'] = $syntax; 193 194 $this->P->addMode('gfm_escape', new GfmEscape()); 195 $this->P->addMode('linebreak', new Linebreak()); 196 $this->P->parse("foo\\\\\nbar"); 197 198 $names = array_column($this->H->calls, 0); 199 $this->assertContains('linebreak', $names, 200 "Under $syntax, `\\\\\\\\\\n` must yield a DW linebreak instead of an escape"); 201 } 202 203 /** 204 * Same deferral applies for `\\` before a literal space — the 205 * canonical DW forced-linebreak form. 206 * 207 * @dataProvider provideDwLoadedSyntaxes 208 */ 209 function testDoubleBackslashBeforeSpaceDefersToLinebreakWhenDwLoaded(string $syntax) 210 { 211 global $conf; 212 $conf['syntax'] = $syntax; 213 214 $this->P->addMode('gfm_escape', new GfmEscape()); 215 $this->P->addMode('linebreak', new Linebreak()); 216 $this->P->parse('foo \\\\ bar'); 217 218 $names = array_column($this->H->calls, 0); 219 $this->assertContains('linebreak', $names, 220 "Under $syntax, `\\\\\\\\ ` must yield a DW linebreak instead of an escape"); 221 } 222 223 /** 224 * The deferral is narrow: `\\` followed by non-whitespace still 225 * escapes to a literal backslash, even with DW Linebreak loaded. 226 * UNC-style paths like `\\\\host\\share` would otherwise become a 227 * surprise of literal double-backslashes for a user who typed two 228 * GFM-escapes back-to-back. 229 * 230 * @dataProvider provideDwLoadedSyntaxes 231 */ 232 function testMidLineDoubleBackslashStillEscapesWhenDwLoaded(string $syntax) 233 { 234 global $conf; 235 $conf['syntax'] = $syntax; 236 237 $this->P->addMode('gfm_escape', new GfmEscape()); 238 $this->P->addMode('linebreak', new Linebreak()); 239 $this->P->parse('\\\\\\\\host\\\\share'); 240 241 $names = array_column($this->H->calls, 0); 242 $this->assertNotContains('linebreak', $names, 243 'Mid-line `\\\\` (no EOL whitespace) must not fire a linebreak'); 244 245 $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 246 $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 247 $this->assertSame("\n\\\\host\\share", $joined, 248 'Each `\\\\` collapses to a single literal backslash, GFM-style'); 249 } 250 251 public static function provideDwLoadedSyntaxes(): array 252 { 253 return [ 254 'dw_md' => ['dw+md'], 255 'md_dw' => ['md+dw'], 256 ]; 257 } 258} 259