174031e46SAndreas Gohr<?php 274031e46SAndreas Gohr 374031e46SAndreas Gohrnamespace dokuwiki\test\Parsing\ParserMode; 474031e46SAndreas Gohr 574031e46SAndreas Gohruse dokuwiki\Parsing\ModeRegistry; 674031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmBacktickSingle; 774031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmEmphasis; 874031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmEscape; 974031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmHeader; 10*65dd2042SAndreas Gohruse dokuwiki\Parsing\ParserMode\Linebreak; 1174031e46SAndreas Gohr 1274031e46SAndreas Gohr/** 1374031e46SAndreas Gohr * Tests for the GFM backslash-escape mode. 1474031e46SAndreas Gohr */ 1574031e46SAndreas Gohrclass GfmEscapeTest extends ParserTestBase 1674031e46SAndreas Gohr{ 1774031e46SAndreas Gohr public function setUp(): void 1874031e46SAndreas Gohr { 1974031e46SAndreas Gohr parent::setUp(); 2074031e46SAndreas Gohr global $conf; 2113a62f81SAndreas Gohr $conf['syntax'] = 'md'; 2274031e46SAndreas Gohr ModeRegistry::reset(); 2374031e46SAndreas Gohr } 2474031e46SAndreas Gohr 2574031e46SAndreas Gohr public function tearDown(): void 2674031e46SAndreas Gohr { 2774031e46SAndreas Gohr ModeRegistry::reset(); 2874031e46SAndreas Gohr parent::tearDown(); 2974031e46SAndreas Gohr } 3074031e46SAndreas Gohr 3174031e46SAndreas Gohr /** 3274031e46SAndreas Gohr * Every ASCII punctuation character is escapable per GFM §6.1. 3374031e46SAndreas Gohr * 3474031e46SAndreas Gohr * @dataProvider provideEscapableChars 3574031e46SAndreas Gohr */ 3674031e46SAndreas Gohr function testEscapableAsciiPunctuationProducesLiteral(string $char) 3774031e46SAndreas Gohr { 3874031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 3974031e46SAndreas Gohr $this->P->parse('foo \\' . $char . ' bar'); 4074031e46SAndreas Gohr 4174031e46SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 4274031e46SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 4374031e46SAndreas Gohr 4474031e46SAndreas Gohr $this->assertSame("\nfoo " . $char . ' bar', $joined, 4574031e46SAndreas Gohr "Escaped {$char} must collapse to the literal char in cdata stream"); 4674031e46SAndreas Gohr } 4774031e46SAndreas Gohr 4874031e46SAndreas Gohr public static function provideEscapableChars(): array 4974031e46SAndreas Gohr { 5074031e46SAndreas Gohr $chars = str_split('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'); 5174031e46SAndreas Gohr return array_combine( 5274031e46SAndreas Gohr array_map(static fn($c) => 'char_' . bin2hex($c), $chars), 5374031e46SAndreas Gohr array_map(static fn($c) => [$c], $chars), 5474031e46SAndreas Gohr ); 5574031e46SAndreas Gohr } 5674031e46SAndreas Gohr 5774031e46SAndreas Gohr /** 5874031e46SAndreas Gohr * Backslash before non-ASCII-punctuation stays literal — letters, 5974031e46SAndreas Gohr * digits, multibyte chars, spaces, and tabs are not escapable. The 6074031e46SAndreas Gohr * pattern simply doesn't match, so the bytes flow through as cdata. 6174031e46SAndreas Gohr * 6274031e46SAndreas Gohr * @dataProvider provideNonEscapableChars 6374031e46SAndreas Gohr */ 6474031e46SAndreas Gohr function testNonEscapableCharsKeepBackslash(string $tail) 6574031e46SAndreas Gohr { 6674031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 6774031e46SAndreas Gohr $this->P->parse('a \\' . $tail . ' b'); 6874031e46SAndreas Gohr 6974031e46SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 7074031e46SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 7174031e46SAndreas Gohr 7274031e46SAndreas Gohr $this->assertSame("\na \\" . $tail . ' b', $joined); 7374031e46SAndreas Gohr } 7474031e46SAndreas Gohr 7574031e46SAndreas Gohr public static function provideNonEscapableChars(): array 7674031e46SAndreas Gohr { 7774031e46SAndreas Gohr return [ 7874031e46SAndreas Gohr 'letter_upper' => ['A'], 7974031e46SAndreas Gohr 'letter_lower' => ['a'], 8074031e46SAndreas Gohr 'digit' => ['3'], 8174031e46SAndreas Gohr 'multibyte' => ['α'], 8274031e46SAndreas Gohr 'space' => [' '], 8374031e46SAndreas Gohr 'tab' => ["\t"], 8474031e46SAndreas Gohr ]; 8574031e46SAndreas Gohr } 8674031e46SAndreas Gohr 8774031e46SAndreas Gohr function testDoubleBackslashCollapsesToSingleBackslash() 8874031e46SAndreas Gohr { 8974031e46SAndreas Gohr // \\ is the escaped-backslash form. The first char in the match 9074031e46SAndreas Gohr // is consumed as the escape introducer; the second is emitted as 9174031e46SAndreas Gohr // a literal backslash. 9274031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 9374031e46SAndreas Gohr $this->P->parse('foo \\\\ bar'); 9474031e46SAndreas Gohr 9574031e46SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 9674031e46SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 9774031e46SAndreas Gohr 9874031e46SAndreas Gohr $this->assertSame("\nfoo \\ bar", $joined); 9974031e46SAndreas Gohr } 10074031e46SAndreas Gohr 10174031e46SAndreas Gohr function testEscapedAsteriskBlocksEmphasis() 10274031e46SAndreas Gohr { 10374031e46SAndreas Gohr // GFM spec example 310 fragment. \* must consume the asterisk 10474031e46SAndreas Gohr // before GfmEmphasis can use it as an opener. 10574031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 10674031e46SAndreas Gohr $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 10774031e46SAndreas Gohr $this->P->parse('\\*not emphasized*'); 10874031e46SAndreas Gohr 10974031e46SAndreas Gohr $modes = array_column($this->H->calls, 0); 11074031e46SAndreas Gohr $this->assertNotContains('emphasis_open', $modes, 11174031e46SAndreas Gohr 'Escaped opener must not start emphasis'); 11274031e46SAndreas Gohr } 11374031e46SAndreas Gohr 11474031e46SAndreas Gohr function testEscapedBackslashThenEmphasisOpens() 11574031e46SAndreas Gohr { 11674031e46SAndreas Gohr // GFM spec example 311. \\ collapses to a literal backslash, and 11774031e46SAndreas Gohr // the *emphasis* that follows is now seen by GfmEmphasis with 11874031e46SAndreas Gohr // its full text intact. 11974031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 12074031e46SAndreas Gohr $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 12174031e46SAndreas Gohr $this->P->parse('\\\\*emphasis*'); 12274031e46SAndreas Gohr 12374031e46SAndreas Gohr $modes = array_column($this->H->calls, 0); 12474031e46SAndreas Gohr $this->assertContains('emphasis_open', $modes, 12574031e46SAndreas Gohr 'After \\\\ collapses, the surviving *emphasis* must open emphasis'); 12674031e46SAndreas Gohr } 12774031e46SAndreas Gohr 12874031e46SAndreas Gohr function testEscapedHashBlocksHeader() 12974031e46SAndreas Gohr { 13074031e46SAndreas Gohr // \# must defeat GfmHeader's column-0 # match. The trailing text 13174031e46SAndreas Gohr // becomes a normal paragraph instead. 13274031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 13374031e46SAndreas Gohr $this->P->addMode('gfm_header', new GfmHeader()); 13474031e46SAndreas Gohr $this->P->parse("\\# not a heading"); 13574031e46SAndreas Gohr 13674031e46SAndreas Gohr $modes = array_column($this->H->calls, 0); 13774031e46SAndreas Gohr $this->assertNotContains('header', $modes, 13874031e46SAndreas Gohr 'Escaped # must not produce a header'); 13974031e46SAndreas Gohr } 14074031e46SAndreas Gohr 14174031e46SAndreas Gohr function testNoEscapeInsideBacktickSpan() 14274031e46SAndreas Gohr { 14374031e46SAndreas Gohr // GFM spec example 313. The whole `\[\`` is captured by 14474031e46SAndreas Gohr // GfmBacktickSingle in one regex shot, so GfmEscape never runs 14574031e46SAndreas Gohr // on its body. The body must retain the literal backslashes. 14674031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 14774031e46SAndreas Gohr $this->P->addMode('gfm_backtick_single', new GfmBacktickSingle()); 14874031e46SAndreas Gohr $this->P->parse('`\\[\\`'); 14974031e46SAndreas Gohr 15074031e46SAndreas Gohr $unformatted = array_filter($this->H->calls, static fn($c) => $c[0] === 'unformatted'); 15174031e46SAndreas Gohr $bodies = array_map(static fn($c) => $c[1][0], $unformatted); 15274031e46SAndreas Gohr $this->assertContains('\\[\\', $bodies, 15374031e46SAndreas Gohr 'Backtick span body must preserve the literal backslashes'); 15474031e46SAndreas Gohr } 15574031e46SAndreas Gohr 15674031e46SAndreas Gohr function testSortValue() 15774031e46SAndreas Gohr { 15874031e46SAndreas Gohr $mode = new GfmEscape(); 15974031e46SAndreas Gohr $this->assertSame(5, $mode->getSort()); 16074031e46SAndreas Gohr } 161*65dd2042SAndreas Gohr 162*65dd2042SAndreas Gohr /** 163*65dd2042SAndreas Gohr * In pure `md` mode, `\\` before a newline still escapes to a literal 164*65dd2042SAndreas Gohr * backslash per GFM §6.1 — no DW Linebreak is loaded to defer to. 165*65dd2042SAndreas Gohr */ 166*65dd2042SAndreas Gohr function testDoubleBackslashBeforeNewlineEscapesInPureMd() 167*65dd2042SAndreas Gohr { 168*65dd2042SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 169*65dd2042SAndreas Gohr $this->P->parse("foo \\\\\nbar"); 170*65dd2042SAndreas Gohr 171*65dd2042SAndreas Gohr $names = array_column($this->H->calls, 0); 172*65dd2042SAndreas Gohr $this->assertNotContains('linebreak', $names, 173*65dd2042SAndreas Gohr 'No DW Linebreak is loaded in pure md mode — `\\\\\\n` must stay an escape'); 174*65dd2042SAndreas Gohr 175*65dd2042SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 176*65dd2042SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 177*65dd2042SAndreas Gohr $this->assertSame("\nfoo \\\nbar", $joined, 178*65dd2042SAndreas Gohr '`\\\\` collapses to a literal backslash; the newline survives as cdata'); 179*65dd2042SAndreas Gohr } 180*65dd2042SAndreas Gohr 181*65dd2042SAndreas Gohr /** 182*65dd2042SAndreas Gohr * In any DW-loaded mode (`dw+md` / `md+dw`), `\\` before a space, tab, 183*65dd2042SAndreas Gohr * or newline must defer to DW's Linebreak mode. GfmEscape would 184*65dd2042SAndreas Gohr * otherwise consume those two bytes first (sort 5 vs Linebreak's 140) 185*65dd2042SAndreas Gohr * and the forced linebreak would never fire. 186*65dd2042SAndreas Gohr * 187*65dd2042SAndreas Gohr * @dataProvider provideDwLoadedSyntaxes 188*65dd2042SAndreas Gohr */ 189*65dd2042SAndreas Gohr function testDoubleBackslashBeforeNewlineDefersToLinebreakWhenDwLoaded(string $syntax) 190*65dd2042SAndreas Gohr { 191*65dd2042SAndreas Gohr global $conf; 192*65dd2042SAndreas Gohr $conf['syntax'] = $syntax; 193*65dd2042SAndreas Gohr 194*65dd2042SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 195*65dd2042SAndreas Gohr $this->P->addMode('linebreak', new Linebreak()); 196*65dd2042SAndreas Gohr $this->P->parse("foo\\\\\nbar"); 197*65dd2042SAndreas Gohr 198*65dd2042SAndreas Gohr $names = array_column($this->H->calls, 0); 199*65dd2042SAndreas Gohr $this->assertContains('linebreak', $names, 200*65dd2042SAndreas Gohr "Under $syntax, `\\\\\\\\\\n` must yield a DW linebreak instead of an escape"); 201*65dd2042SAndreas Gohr } 202*65dd2042SAndreas Gohr 203*65dd2042SAndreas Gohr /** 204*65dd2042SAndreas Gohr * Same deferral applies for `\\` before a literal space — the 205*65dd2042SAndreas Gohr * canonical DW forced-linebreak form. 206*65dd2042SAndreas Gohr * 207*65dd2042SAndreas Gohr * @dataProvider provideDwLoadedSyntaxes 208*65dd2042SAndreas Gohr */ 209*65dd2042SAndreas Gohr function testDoubleBackslashBeforeSpaceDefersToLinebreakWhenDwLoaded(string $syntax) 210*65dd2042SAndreas Gohr { 211*65dd2042SAndreas Gohr global $conf; 212*65dd2042SAndreas Gohr $conf['syntax'] = $syntax; 213*65dd2042SAndreas Gohr 214*65dd2042SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 215*65dd2042SAndreas Gohr $this->P->addMode('linebreak', new Linebreak()); 216*65dd2042SAndreas Gohr $this->P->parse('foo \\\\ bar'); 217*65dd2042SAndreas Gohr 218*65dd2042SAndreas Gohr $names = array_column($this->H->calls, 0); 219*65dd2042SAndreas Gohr $this->assertContains('linebreak', $names, 220*65dd2042SAndreas Gohr "Under $syntax, `\\\\\\\\ ` must yield a DW linebreak instead of an escape"); 221*65dd2042SAndreas Gohr } 222*65dd2042SAndreas Gohr 223*65dd2042SAndreas Gohr /** 224*65dd2042SAndreas Gohr * The deferral is narrow: `\\` followed by non-whitespace still 225*65dd2042SAndreas Gohr * escapes to a literal backslash, even with DW Linebreak loaded. 226*65dd2042SAndreas Gohr * UNC-style paths like `\\\\host\\share` would otherwise become a 227*65dd2042SAndreas Gohr * surprise of literal double-backslashes for a user who typed two 228*65dd2042SAndreas Gohr * GFM-escapes back-to-back. 229*65dd2042SAndreas Gohr * 230*65dd2042SAndreas Gohr * @dataProvider provideDwLoadedSyntaxes 231*65dd2042SAndreas Gohr */ 232*65dd2042SAndreas Gohr function testMidLineDoubleBackslashStillEscapesWhenDwLoaded(string $syntax) 233*65dd2042SAndreas Gohr { 234*65dd2042SAndreas Gohr global $conf; 235*65dd2042SAndreas Gohr $conf['syntax'] = $syntax; 236*65dd2042SAndreas Gohr 237*65dd2042SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 238*65dd2042SAndreas Gohr $this->P->addMode('linebreak', new Linebreak()); 239*65dd2042SAndreas Gohr $this->P->parse('\\\\\\\\host\\\\share'); 240*65dd2042SAndreas Gohr 241*65dd2042SAndreas Gohr $names = array_column($this->H->calls, 0); 242*65dd2042SAndreas Gohr $this->assertNotContains('linebreak', $names, 243*65dd2042SAndreas Gohr 'Mid-line `\\\\` (no EOL whitespace) must not fire a linebreak'); 244*65dd2042SAndreas Gohr 245*65dd2042SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 246*65dd2042SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 247*65dd2042SAndreas Gohr $this->assertSame("\n\\\\host\\share", $joined, 248*65dd2042SAndreas Gohr 'Each `\\\\` collapses to a single literal backslash, GFM-style'); 249*65dd2042SAndreas Gohr } 250*65dd2042SAndreas Gohr 251*65dd2042SAndreas Gohr public static function provideDwLoadedSyntaxes(): array 252*65dd2042SAndreas Gohr { 253*65dd2042SAndreas Gohr return [ 254*65dd2042SAndreas Gohr 'dw_md' => ['dw+md'], 255*65dd2042SAndreas Gohr 'md_dw' => ['md+dw'], 256*65dd2042SAndreas Gohr ]; 257*65dd2042SAndreas Gohr } 25874031e46SAndreas Gohr} 259