1<?php 2 3namespace dokuwiki\test\Parsing\ParserMode; 4 5use dokuwiki\Parsing\ParserMode\GfmBacktickSingle; 6use dokuwiki\Parsing\ParserMode\GfmEmphasis; 7use dokuwiki\Parsing\ParserMode\GfmEscape; 8use dokuwiki\Parsing\ParserMode\GfmHeader; 9use dokuwiki\Parsing\ParserMode\Linebreak; 10 11/** 12 * Tests for the GFM backslash-escape mode. 13 */ 14class GfmEscapeTest extends ParserTestBase 15{ 16 public function setUp(): void 17 { 18 parent::setUp(); 19 $this->setSyntax('md'); 20 } 21 22 /** 23 * Every ASCII punctuation character is escapable per GFM §6.1. 24 * 25 * @dataProvider provideEscapableChars 26 */ 27 function testEscapableAsciiPunctuationProducesLiteral(string $char) 28 { 29 $this->P->addMode('gfm_escape', new GfmEscape()); 30 $this->P->parse('foo \\' . $char . ' bar'); 31 32 $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 33 $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 34 35 $this->assertSame("\nfoo " . $char . ' bar', $joined, 36 "Escaped {$char} must collapse to the literal char in cdata stream"); 37 } 38 39 public static function provideEscapableChars(): array 40 { 41 $chars = str_split('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'); 42 return array_combine( 43 array_map(static fn($c) => 'char_' . bin2hex($c), $chars), 44 array_map(static fn($c) => [$c], $chars), 45 ); 46 } 47 48 /** 49 * Backslash before non-ASCII-punctuation stays literal — letters, 50 * digits, multibyte chars, spaces, and tabs are not escapable. The 51 * pattern simply doesn't match, so the bytes flow through as cdata. 52 * 53 * @dataProvider provideNonEscapableChars 54 */ 55 function testNonEscapableCharsKeepBackslash(string $tail) 56 { 57 $this->P->addMode('gfm_escape', new GfmEscape()); 58 $this->P->parse('a \\' . $tail . ' b'); 59 60 $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 61 $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 62 63 $this->assertSame("\na \\" . $tail . ' b', $joined); 64 } 65 66 public static function provideNonEscapableChars(): array 67 { 68 return [ 69 'letter_upper' => ['A'], 70 'letter_lower' => ['a'], 71 'digit' => ['3'], 72 'multibyte' => ['α'], 73 'space' => [' '], 74 'tab' => ["\t"], 75 ]; 76 } 77 78 function testDoubleBackslashCollapsesToSingleBackslash() 79 { 80 // \\ is the escaped-backslash form. The first char in the match 81 // is consumed as the escape introducer; the second is emitted as 82 // a literal backslash. 83 $this->P->addMode('gfm_escape', new GfmEscape()); 84 $this->P->parse('foo \\\\ bar'); 85 86 $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 87 $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 88 89 $this->assertSame("\nfoo \\ bar", $joined); 90 } 91 92 function testEscapedAsteriskBlocksEmphasis() 93 { 94 // GFM spec example 310 fragment. \* must consume the asterisk 95 // before GfmEmphasis can use it as an opener. 96 $this->P->addMode('gfm_escape', new GfmEscape()); 97 $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 98 $this->P->parse('\\*not emphasized*'); 99 100 $modes = array_column($this->H->calls, 0); 101 $this->assertNotContains('emphasis_open', $modes, 102 'Escaped opener must not start emphasis'); 103 } 104 105 function testEscapedBackslashThenEmphasisOpens() 106 { 107 // GFM spec example 311. \\ collapses to a literal backslash, and 108 // the *emphasis* that follows is now seen by GfmEmphasis with 109 // its full text intact. 110 $this->P->addMode('gfm_escape', new GfmEscape()); 111 $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 112 $this->P->parse('\\\\*emphasis*'); 113 114 $modes = array_column($this->H->calls, 0); 115 $this->assertContains('emphasis_open', $modes, 116 'After \\\\ collapses, the surviving *emphasis* must open emphasis'); 117 } 118 119 function testEscapedHashBlocksHeader() 120 { 121 // \# must defeat GfmHeader's column-0 # match. The trailing text 122 // becomes a normal paragraph instead. 123 $this->P->addMode('gfm_escape', new GfmEscape()); 124 $this->P->addMode('gfm_header', new GfmHeader()); 125 $this->P->parse("\\# not a heading"); 126 127 $modes = array_column($this->H->calls, 0); 128 $this->assertNotContains('header', $modes, 129 'Escaped # must not produce a header'); 130 } 131 132 function testNoEscapeInsideBacktickSpan() 133 { 134 // GFM spec example 313. The whole `\[\`` is captured by 135 // GfmBacktickSingle in one regex shot, so GfmEscape never runs 136 // on its body. The body must retain the literal backslashes. 137 $this->P->addMode('gfm_escape', new GfmEscape()); 138 $this->P->addMode('gfm_backtick_single', new GfmBacktickSingle()); 139 $this->P->parse('`\\[\\`'); 140 141 $unformatted = array_filter($this->H->calls, static fn($c) => $c[0] === 'unformatted'); 142 $bodies = array_map(static fn($c) => $c[1][0], $unformatted); 143 $this->assertContains('\\[\\', $bodies, 144 'Backtick span body must preserve the literal backslashes'); 145 } 146 147 function testSortValue() 148 { 149 $mode = new GfmEscape(); 150 $this->assertSame(5, $mode->getSort()); 151 } 152 153 /** 154 * In pure `md` mode, `\\` before a newline still escapes to a literal 155 * backslash per GFM §6.1 — no DW Linebreak is loaded to defer to. 156 */ 157 function testDoubleBackslashBeforeNewlineEscapesInPureMd() 158 { 159 $this->P->addMode('gfm_escape', new GfmEscape()); 160 $this->P->parse("foo \\\\\nbar"); 161 162 $names = array_column($this->H->calls, 0); 163 $this->assertNotContains('linebreak', $names, 164 'No DW Linebreak is loaded in pure md mode — `\\\\\\n` must stay an escape'); 165 166 $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 167 $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 168 $this->assertSame("\nfoo \\\nbar", $joined, 169 '`\\\\` collapses to a literal backslash; the newline survives as cdata'); 170 } 171 172 /** 173 * In any DW-loaded mode (`dw+md` / `md+dw`), `\\` before a space, tab, 174 * or newline must defer to DW's Linebreak mode. GfmEscape would 175 * otherwise consume those two bytes first (sort 5 vs Linebreak's 140) 176 * and the forced linebreak would never fire. 177 * 178 * @dataProvider provideDwLoadedSyntaxes 179 */ 180 function testDoubleBackslashBeforeNewlineDefersToLinebreakWhenDwLoaded(string $syntax) 181 { 182 $this->setSyntax($syntax); 183 184 $this->P->addMode('gfm_escape', new GfmEscape()); 185 $this->P->addMode('linebreak', new Linebreak()); 186 $this->P->parse("foo\\\\\nbar"); 187 188 $names = array_column($this->H->calls, 0); 189 $this->assertContains('linebreak', $names, 190 "Under $syntax, `\\\\\\\\\\n` must yield a DW linebreak instead of an escape"); 191 } 192 193 /** 194 * Same deferral applies for `\\` before a literal space — the 195 * canonical DW forced-linebreak form. 196 * 197 * @dataProvider provideDwLoadedSyntaxes 198 */ 199 function testDoubleBackslashBeforeSpaceDefersToLinebreakWhenDwLoaded(string $syntax) 200 { 201 $this->setSyntax($syntax); 202 203 $this->P->addMode('gfm_escape', new GfmEscape()); 204 $this->P->addMode('linebreak', new Linebreak()); 205 $this->P->parse('foo \\\\ bar'); 206 207 $names = array_column($this->H->calls, 0); 208 $this->assertContains('linebreak', $names, 209 "Under $syntax, `\\\\\\\\ ` must yield a DW linebreak instead of an escape"); 210 } 211 212 /** 213 * The deferral is narrow: `\\` followed by non-whitespace still 214 * escapes to a literal backslash, even with DW Linebreak loaded. 215 * UNC-style paths like `\\\\host\\share` would otherwise become a 216 * surprise of literal double-backslashes for a user who typed two 217 * GFM-escapes back-to-back. 218 * 219 * @dataProvider provideDwLoadedSyntaxes 220 */ 221 function testMidLineDoubleBackslashStillEscapesWhenDwLoaded(string $syntax) 222 { 223 $this->setSyntax($syntax); 224 225 $this->P->addMode('gfm_escape', new GfmEscape()); 226 $this->P->addMode('linebreak', new Linebreak()); 227 $this->P->parse('\\\\\\\\host\\\\share'); 228 229 $names = array_column($this->H->calls, 0); 230 $this->assertNotContains('linebreak', $names, 231 'Mid-line `\\\\` (no EOL whitespace) must not fire a linebreak'); 232 233 $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 234 $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 235 $this->assertSame("\n\\\\host\\share", $joined, 236 'Each `\\\\` collapses to a single literal backslash, GFM-style'); 237 } 238 239 public static function provideDwLoadedSyntaxes(): array 240 { 241 return [ 242 'dw_md' => ['dw+md'], 243 'md_dw' => ['md+dw'], 244 ]; 245 } 246} 247