174031e46SAndreas Gohr<?php 274031e46SAndreas Gohr 374031e46SAndreas Gohrnamespace dokuwiki\test\Parsing\ParserMode; 474031e46SAndreas Gohr 574031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmBacktickSingle; 674031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmEmphasis; 774031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmEscape; 874031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmHeader; 965dd2042SAndreas Gohruse dokuwiki\Parsing\ParserMode\Linebreak; 1074031e46SAndreas Gohr 1174031e46SAndreas Gohr/** 1274031e46SAndreas Gohr * Tests for the GFM backslash-escape mode. 1374031e46SAndreas Gohr */ 1474031e46SAndreas Gohrclass GfmEscapeTest extends ParserTestBase 1574031e46SAndreas Gohr{ 1674031e46SAndreas Gohr public function setUp(): void 1774031e46SAndreas Gohr { 1874031e46SAndreas Gohr parent::setUp(); 19*47a02a10SAndreas Gohr $this->setSyntax('md'); 2074031e46SAndreas Gohr } 2174031e46SAndreas Gohr 2274031e46SAndreas Gohr /** 2374031e46SAndreas Gohr * Every ASCII punctuation character is escapable per GFM §6.1. 2474031e46SAndreas Gohr * 2574031e46SAndreas Gohr * @dataProvider provideEscapableChars 2674031e46SAndreas Gohr */ 2774031e46SAndreas Gohr function testEscapableAsciiPunctuationProducesLiteral(string $char) 2874031e46SAndreas Gohr { 2974031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 3074031e46SAndreas Gohr $this->P->parse('foo \\' . $char . ' bar'); 3174031e46SAndreas Gohr 3274031e46SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 3374031e46SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 3474031e46SAndreas Gohr 3574031e46SAndreas Gohr $this->assertSame("\nfoo " . $char . ' bar', $joined, 3674031e46SAndreas Gohr "Escaped {$char} must collapse to the literal char in cdata stream"); 3774031e46SAndreas Gohr } 3874031e46SAndreas Gohr 3974031e46SAndreas Gohr public static function provideEscapableChars(): array 4074031e46SAndreas Gohr { 4174031e46SAndreas Gohr $chars = str_split('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'); 4274031e46SAndreas Gohr return array_combine( 4374031e46SAndreas Gohr array_map(static fn($c) => 'char_' . bin2hex($c), $chars), 4474031e46SAndreas Gohr array_map(static fn($c) => [$c], $chars), 4574031e46SAndreas Gohr ); 4674031e46SAndreas Gohr } 4774031e46SAndreas Gohr 4874031e46SAndreas Gohr /** 4974031e46SAndreas Gohr * Backslash before non-ASCII-punctuation stays literal — letters, 5074031e46SAndreas Gohr * digits, multibyte chars, spaces, and tabs are not escapable. The 5174031e46SAndreas Gohr * pattern simply doesn't match, so the bytes flow through as cdata. 5274031e46SAndreas Gohr * 5374031e46SAndreas Gohr * @dataProvider provideNonEscapableChars 5474031e46SAndreas Gohr */ 5574031e46SAndreas Gohr function testNonEscapableCharsKeepBackslash(string $tail) 5674031e46SAndreas Gohr { 5774031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 5874031e46SAndreas Gohr $this->P->parse('a \\' . $tail . ' b'); 5974031e46SAndreas Gohr 6074031e46SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 6174031e46SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 6274031e46SAndreas Gohr 6374031e46SAndreas Gohr $this->assertSame("\na \\" . $tail . ' b', $joined); 6474031e46SAndreas Gohr } 6574031e46SAndreas Gohr 6674031e46SAndreas Gohr public static function provideNonEscapableChars(): array 6774031e46SAndreas Gohr { 6874031e46SAndreas Gohr return [ 6974031e46SAndreas Gohr 'letter_upper' => ['A'], 7074031e46SAndreas Gohr 'letter_lower' => ['a'], 7174031e46SAndreas Gohr 'digit' => ['3'], 7274031e46SAndreas Gohr 'multibyte' => ['α'], 7374031e46SAndreas Gohr 'space' => [' '], 7474031e46SAndreas Gohr 'tab' => ["\t"], 7574031e46SAndreas Gohr ]; 7674031e46SAndreas Gohr } 7774031e46SAndreas Gohr 7874031e46SAndreas Gohr function testDoubleBackslashCollapsesToSingleBackslash() 7974031e46SAndreas Gohr { 8074031e46SAndreas Gohr // \\ is the escaped-backslash form. The first char in the match 8174031e46SAndreas Gohr // is consumed as the escape introducer; the second is emitted as 8274031e46SAndreas Gohr // a literal backslash. 8374031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 8474031e46SAndreas Gohr $this->P->parse('foo \\\\ bar'); 8574031e46SAndreas Gohr 8674031e46SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 8774031e46SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 8874031e46SAndreas Gohr 8974031e46SAndreas Gohr $this->assertSame("\nfoo \\ bar", $joined); 9074031e46SAndreas Gohr } 9174031e46SAndreas Gohr 9274031e46SAndreas Gohr function testEscapedAsteriskBlocksEmphasis() 9374031e46SAndreas Gohr { 9474031e46SAndreas Gohr // GFM spec example 310 fragment. \* must consume the asterisk 9574031e46SAndreas Gohr // before GfmEmphasis can use it as an opener. 9674031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 9774031e46SAndreas Gohr $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 9874031e46SAndreas Gohr $this->P->parse('\\*not emphasized*'); 9974031e46SAndreas Gohr 10074031e46SAndreas Gohr $modes = array_column($this->H->calls, 0); 10174031e46SAndreas Gohr $this->assertNotContains('emphasis_open', $modes, 10274031e46SAndreas Gohr 'Escaped opener must not start emphasis'); 10374031e46SAndreas Gohr } 10474031e46SAndreas Gohr 10574031e46SAndreas Gohr function testEscapedBackslashThenEmphasisOpens() 10674031e46SAndreas Gohr { 10774031e46SAndreas Gohr // GFM spec example 311. \\ collapses to a literal backslash, and 10874031e46SAndreas Gohr // the *emphasis* that follows is now seen by GfmEmphasis with 10974031e46SAndreas Gohr // its full text intact. 11074031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 11174031e46SAndreas Gohr $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 11274031e46SAndreas Gohr $this->P->parse('\\\\*emphasis*'); 11374031e46SAndreas Gohr 11474031e46SAndreas Gohr $modes = array_column($this->H->calls, 0); 11574031e46SAndreas Gohr $this->assertContains('emphasis_open', $modes, 11674031e46SAndreas Gohr 'After \\\\ collapses, the surviving *emphasis* must open emphasis'); 11774031e46SAndreas Gohr } 11874031e46SAndreas Gohr 11974031e46SAndreas Gohr function testEscapedHashBlocksHeader() 12074031e46SAndreas Gohr { 12174031e46SAndreas Gohr // \# must defeat GfmHeader's column-0 # match. The trailing text 12274031e46SAndreas Gohr // becomes a normal paragraph instead. 12374031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 12474031e46SAndreas Gohr $this->P->addMode('gfm_header', new GfmHeader()); 12574031e46SAndreas Gohr $this->P->parse("\\# not a heading"); 12674031e46SAndreas Gohr 12774031e46SAndreas Gohr $modes = array_column($this->H->calls, 0); 12874031e46SAndreas Gohr $this->assertNotContains('header', $modes, 12974031e46SAndreas Gohr 'Escaped # must not produce a header'); 13074031e46SAndreas Gohr } 13174031e46SAndreas Gohr 13274031e46SAndreas Gohr function testNoEscapeInsideBacktickSpan() 13374031e46SAndreas Gohr { 13474031e46SAndreas Gohr // GFM spec example 313. The whole `\[\`` is captured by 13574031e46SAndreas Gohr // GfmBacktickSingle in one regex shot, so GfmEscape never runs 13674031e46SAndreas Gohr // on its body. The body must retain the literal backslashes. 13774031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 13874031e46SAndreas Gohr $this->P->addMode('gfm_backtick_single', new GfmBacktickSingle()); 13974031e46SAndreas Gohr $this->P->parse('`\\[\\`'); 14074031e46SAndreas Gohr 14174031e46SAndreas Gohr $unformatted = array_filter($this->H->calls, static fn($c) => $c[0] === 'unformatted'); 14274031e46SAndreas Gohr $bodies = array_map(static fn($c) => $c[1][0], $unformatted); 14374031e46SAndreas Gohr $this->assertContains('\\[\\', $bodies, 14474031e46SAndreas Gohr 'Backtick span body must preserve the literal backslashes'); 14574031e46SAndreas Gohr } 14674031e46SAndreas Gohr 14774031e46SAndreas Gohr function testSortValue() 14874031e46SAndreas Gohr { 14974031e46SAndreas Gohr $mode = new GfmEscape(); 15074031e46SAndreas Gohr $this->assertSame(5, $mode->getSort()); 15174031e46SAndreas Gohr } 15265dd2042SAndreas Gohr 15365dd2042SAndreas Gohr /** 15465dd2042SAndreas Gohr * In pure `md` mode, `\\` before a newline still escapes to a literal 15565dd2042SAndreas Gohr * backslash per GFM §6.1 — no DW Linebreak is loaded to defer to. 15665dd2042SAndreas Gohr */ 15765dd2042SAndreas Gohr function testDoubleBackslashBeforeNewlineEscapesInPureMd() 15865dd2042SAndreas Gohr { 15965dd2042SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 16065dd2042SAndreas Gohr $this->P->parse("foo \\\\\nbar"); 16165dd2042SAndreas Gohr 16265dd2042SAndreas Gohr $names = array_column($this->H->calls, 0); 16365dd2042SAndreas Gohr $this->assertNotContains('linebreak', $names, 16465dd2042SAndreas Gohr 'No DW Linebreak is loaded in pure md mode — `\\\\\\n` must stay an escape'); 16565dd2042SAndreas Gohr 16665dd2042SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 16765dd2042SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 16865dd2042SAndreas Gohr $this->assertSame("\nfoo \\\nbar", $joined, 16965dd2042SAndreas Gohr '`\\\\` collapses to a literal backslash; the newline survives as cdata'); 17065dd2042SAndreas Gohr } 17165dd2042SAndreas Gohr 17265dd2042SAndreas Gohr /** 17365dd2042SAndreas Gohr * In any DW-loaded mode (`dw+md` / `md+dw`), `\\` before a space, tab, 17465dd2042SAndreas Gohr * or newline must defer to DW's Linebreak mode. GfmEscape would 17565dd2042SAndreas Gohr * otherwise consume those two bytes first (sort 5 vs Linebreak's 140) 17665dd2042SAndreas Gohr * and the forced linebreak would never fire. 17765dd2042SAndreas Gohr * 17865dd2042SAndreas Gohr * @dataProvider provideDwLoadedSyntaxes 17965dd2042SAndreas Gohr */ 18065dd2042SAndreas Gohr function testDoubleBackslashBeforeNewlineDefersToLinebreakWhenDwLoaded(string $syntax) 18165dd2042SAndreas Gohr { 182*47a02a10SAndreas Gohr $this->setSyntax($syntax); 18365dd2042SAndreas Gohr 18465dd2042SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 18565dd2042SAndreas Gohr $this->P->addMode('linebreak', new Linebreak()); 18665dd2042SAndreas Gohr $this->P->parse("foo\\\\\nbar"); 18765dd2042SAndreas Gohr 18865dd2042SAndreas Gohr $names = array_column($this->H->calls, 0); 18965dd2042SAndreas Gohr $this->assertContains('linebreak', $names, 19065dd2042SAndreas Gohr "Under $syntax, `\\\\\\\\\\n` must yield a DW linebreak instead of an escape"); 19165dd2042SAndreas Gohr } 19265dd2042SAndreas Gohr 19365dd2042SAndreas Gohr /** 19465dd2042SAndreas Gohr * Same deferral applies for `\\` before a literal space — the 19565dd2042SAndreas Gohr * canonical DW forced-linebreak form. 19665dd2042SAndreas Gohr * 19765dd2042SAndreas Gohr * @dataProvider provideDwLoadedSyntaxes 19865dd2042SAndreas Gohr */ 19965dd2042SAndreas Gohr function testDoubleBackslashBeforeSpaceDefersToLinebreakWhenDwLoaded(string $syntax) 20065dd2042SAndreas Gohr { 201*47a02a10SAndreas Gohr $this->setSyntax($syntax); 20265dd2042SAndreas Gohr 20365dd2042SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 20465dd2042SAndreas Gohr $this->P->addMode('linebreak', new Linebreak()); 20565dd2042SAndreas Gohr $this->P->parse('foo \\\\ bar'); 20665dd2042SAndreas Gohr 20765dd2042SAndreas Gohr $names = array_column($this->H->calls, 0); 20865dd2042SAndreas Gohr $this->assertContains('linebreak', $names, 20965dd2042SAndreas Gohr "Under $syntax, `\\\\\\\\ ` must yield a DW linebreak instead of an escape"); 21065dd2042SAndreas Gohr } 21165dd2042SAndreas Gohr 21265dd2042SAndreas Gohr /** 21365dd2042SAndreas Gohr * The deferral is narrow: `\\` followed by non-whitespace still 21465dd2042SAndreas Gohr * escapes to a literal backslash, even with DW Linebreak loaded. 21565dd2042SAndreas Gohr * UNC-style paths like `\\\\host\\share` would otherwise become a 21665dd2042SAndreas Gohr * surprise of literal double-backslashes for a user who typed two 21765dd2042SAndreas Gohr * GFM-escapes back-to-back. 21865dd2042SAndreas Gohr * 21965dd2042SAndreas Gohr * @dataProvider provideDwLoadedSyntaxes 22065dd2042SAndreas Gohr */ 22165dd2042SAndreas Gohr function testMidLineDoubleBackslashStillEscapesWhenDwLoaded(string $syntax) 22265dd2042SAndreas Gohr { 223*47a02a10SAndreas Gohr $this->setSyntax($syntax); 22465dd2042SAndreas Gohr 22565dd2042SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 22665dd2042SAndreas Gohr $this->P->addMode('linebreak', new Linebreak()); 22765dd2042SAndreas Gohr $this->P->parse('\\\\\\\\host\\\\share'); 22865dd2042SAndreas Gohr 22965dd2042SAndreas Gohr $names = array_column($this->H->calls, 0); 23065dd2042SAndreas Gohr $this->assertNotContains('linebreak', $names, 23165dd2042SAndreas Gohr 'Mid-line `\\\\` (no EOL whitespace) must not fire a linebreak'); 23265dd2042SAndreas Gohr 23365dd2042SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 23465dd2042SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 23565dd2042SAndreas Gohr $this->assertSame("\n\\\\host\\share", $joined, 23665dd2042SAndreas Gohr 'Each `\\\\` collapses to a single literal backslash, GFM-style'); 23765dd2042SAndreas Gohr } 23865dd2042SAndreas Gohr 23965dd2042SAndreas Gohr public static function provideDwLoadedSyntaxes(): array 24065dd2042SAndreas Gohr { 24165dd2042SAndreas Gohr return [ 24265dd2042SAndreas Gohr 'dw_md' => ['dw+md'], 24365dd2042SAndreas Gohr 'md_dw' => ['md+dw'], 24465dd2042SAndreas Gohr ]; 24565dd2042SAndreas Gohr } 24674031e46SAndreas Gohr} 247