1*74031e46SAndreas Gohr<?php 2*74031e46SAndreas Gohr 3*74031e46SAndreas Gohrnamespace dokuwiki\test\Parsing\ParserMode; 4*74031e46SAndreas Gohr 5*74031e46SAndreas Gohruse dokuwiki\Parsing\ModeRegistry; 6*74031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmBacktickSingle; 7*74031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmEmphasis; 8*74031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmEscape; 9*74031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmHeader; 10*74031e46SAndreas Gohr 11*74031e46SAndreas Gohr/** 12*74031e46SAndreas Gohr * Tests for the GFM backslash-escape mode. 13*74031e46SAndreas Gohr */ 14*74031e46SAndreas Gohrclass GfmEscapeTest extends ParserTestBase 15*74031e46SAndreas Gohr{ 16*74031e46SAndreas Gohr public function setUp(): void 17*74031e46SAndreas Gohr { 18*74031e46SAndreas Gohr parent::setUp(); 19*74031e46SAndreas Gohr global $conf; 20*74031e46SAndreas Gohr $conf['syntax'] = 'markdown'; 21*74031e46SAndreas Gohr ModeRegistry::reset(); 22*74031e46SAndreas Gohr } 23*74031e46SAndreas Gohr 24*74031e46SAndreas Gohr public function tearDown(): void 25*74031e46SAndreas Gohr { 26*74031e46SAndreas Gohr ModeRegistry::reset(); 27*74031e46SAndreas Gohr parent::tearDown(); 28*74031e46SAndreas Gohr } 29*74031e46SAndreas Gohr 30*74031e46SAndreas Gohr /** 31*74031e46SAndreas Gohr * Every ASCII punctuation character is escapable per GFM §6.1. 32*74031e46SAndreas Gohr * 33*74031e46SAndreas Gohr * @dataProvider provideEscapableChars 34*74031e46SAndreas Gohr */ 35*74031e46SAndreas Gohr function testEscapableAsciiPunctuationProducesLiteral(string $char) 36*74031e46SAndreas Gohr { 37*74031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 38*74031e46SAndreas Gohr $this->P->parse('foo \\' . $char . ' bar'); 39*74031e46SAndreas Gohr 40*74031e46SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 41*74031e46SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 42*74031e46SAndreas Gohr 43*74031e46SAndreas Gohr $this->assertSame("\nfoo " . $char . ' bar', $joined, 44*74031e46SAndreas Gohr "Escaped {$char} must collapse to the literal char in cdata stream"); 45*74031e46SAndreas Gohr } 46*74031e46SAndreas Gohr 47*74031e46SAndreas Gohr public static function provideEscapableChars(): array 48*74031e46SAndreas Gohr { 49*74031e46SAndreas Gohr $chars = str_split('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'); 50*74031e46SAndreas Gohr return array_combine( 51*74031e46SAndreas Gohr array_map(static fn($c) => 'char_' . bin2hex($c), $chars), 52*74031e46SAndreas Gohr array_map(static fn($c) => [$c], $chars), 53*74031e46SAndreas Gohr ); 54*74031e46SAndreas Gohr } 55*74031e46SAndreas Gohr 56*74031e46SAndreas Gohr /** 57*74031e46SAndreas Gohr * Backslash before non-ASCII-punctuation stays literal — letters, 58*74031e46SAndreas Gohr * digits, multibyte chars, spaces, and tabs are not escapable. The 59*74031e46SAndreas Gohr * pattern simply doesn't match, so the bytes flow through as cdata. 60*74031e46SAndreas Gohr * 61*74031e46SAndreas Gohr * @dataProvider provideNonEscapableChars 62*74031e46SAndreas Gohr */ 63*74031e46SAndreas Gohr function testNonEscapableCharsKeepBackslash(string $tail) 64*74031e46SAndreas Gohr { 65*74031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 66*74031e46SAndreas Gohr $this->P->parse('a \\' . $tail . ' b'); 67*74031e46SAndreas Gohr 68*74031e46SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 69*74031e46SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 70*74031e46SAndreas Gohr 71*74031e46SAndreas Gohr $this->assertSame("\na \\" . $tail . ' b', $joined); 72*74031e46SAndreas Gohr } 73*74031e46SAndreas Gohr 74*74031e46SAndreas Gohr public static function provideNonEscapableChars(): array 75*74031e46SAndreas Gohr { 76*74031e46SAndreas Gohr return [ 77*74031e46SAndreas Gohr 'letter_upper' => ['A'], 78*74031e46SAndreas Gohr 'letter_lower' => ['a'], 79*74031e46SAndreas Gohr 'digit' => ['3'], 80*74031e46SAndreas Gohr 'multibyte' => ['α'], 81*74031e46SAndreas Gohr 'space' => [' '], 82*74031e46SAndreas Gohr 'tab' => ["\t"], 83*74031e46SAndreas Gohr ]; 84*74031e46SAndreas Gohr } 85*74031e46SAndreas Gohr 86*74031e46SAndreas Gohr function testDoubleBackslashCollapsesToSingleBackslash() 87*74031e46SAndreas Gohr { 88*74031e46SAndreas Gohr // \\ is the escaped-backslash form. The first char in the match 89*74031e46SAndreas Gohr // is consumed as the escape introducer; the second is emitted as 90*74031e46SAndreas Gohr // a literal backslash. 91*74031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 92*74031e46SAndreas Gohr $this->P->parse('foo \\\\ bar'); 93*74031e46SAndreas Gohr 94*74031e46SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 95*74031e46SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 96*74031e46SAndreas Gohr 97*74031e46SAndreas Gohr $this->assertSame("\nfoo \\ bar", $joined); 98*74031e46SAndreas Gohr } 99*74031e46SAndreas Gohr 100*74031e46SAndreas Gohr function testEscapedAsteriskBlocksEmphasis() 101*74031e46SAndreas Gohr { 102*74031e46SAndreas Gohr // GFM spec example 310 fragment. \* must consume the asterisk 103*74031e46SAndreas Gohr // before GfmEmphasis can use it as an opener. 104*74031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 105*74031e46SAndreas Gohr $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 106*74031e46SAndreas Gohr $this->P->parse('\\*not emphasized*'); 107*74031e46SAndreas Gohr 108*74031e46SAndreas Gohr $modes = array_column($this->H->calls, 0); 109*74031e46SAndreas Gohr $this->assertNotContains('emphasis_open', $modes, 110*74031e46SAndreas Gohr 'Escaped opener must not start emphasis'); 111*74031e46SAndreas Gohr } 112*74031e46SAndreas Gohr 113*74031e46SAndreas Gohr function testEscapedBackslashThenEmphasisOpens() 114*74031e46SAndreas Gohr { 115*74031e46SAndreas Gohr // GFM spec example 311. \\ collapses to a literal backslash, and 116*74031e46SAndreas Gohr // the *emphasis* that follows is now seen by GfmEmphasis with 117*74031e46SAndreas Gohr // its full text intact. 118*74031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 119*74031e46SAndreas Gohr $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 120*74031e46SAndreas Gohr $this->P->parse('\\\\*emphasis*'); 121*74031e46SAndreas Gohr 122*74031e46SAndreas Gohr $modes = array_column($this->H->calls, 0); 123*74031e46SAndreas Gohr $this->assertContains('emphasis_open', $modes, 124*74031e46SAndreas Gohr 'After \\\\ collapses, the surviving *emphasis* must open emphasis'); 125*74031e46SAndreas Gohr } 126*74031e46SAndreas Gohr 127*74031e46SAndreas Gohr function testEscapedHashBlocksHeader() 128*74031e46SAndreas Gohr { 129*74031e46SAndreas Gohr // \# must defeat GfmHeader's column-0 # match. The trailing text 130*74031e46SAndreas Gohr // becomes a normal paragraph instead. 131*74031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 132*74031e46SAndreas Gohr $this->P->addMode('gfm_header', new GfmHeader()); 133*74031e46SAndreas Gohr $this->P->parse("\\# not a heading"); 134*74031e46SAndreas Gohr 135*74031e46SAndreas Gohr $modes = array_column($this->H->calls, 0); 136*74031e46SAndreas Gohr $this->assertNotContains('header', $modes, 137*74031e46SAndreas Gohr 'Escaped # must not produce a header'); 138*74031e46SAndreas Gohr } 139*74031e46SAndreas Gohr 140*74031e46SAndreas Gohr function testNoEscapeInsideBacktickSpan() 141*74031e46SAndreas Gohr { 142*74031e46SAndreas Gohr // GFM spec example 313. The whole `\[\`` is captured by 143*74031e46SAndreas Gohr // GfmBacktickSingle in one regex shot, so GfmEscape never runs 144*74031e46SAndreas Gohr // on its body. The body must retain the literal backslashes. 145*74031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 146*74031e46SAndreas Gohr $this->P->addMode('gfm_backtick_single', new GfmBacktickSingle()); 147*74031e46SAndreas Gohr $this->P->parse('`\\[\\`'); 148*74031e46SAndreas Gohr 149*74031e46SAndreas Gohr $unformatted = array_filter($this->H->calls, static fn($c) => $c[0] === 'unformatted'); 150*74031e46SAndreas Gohr $bodies = array_map(static fn($c) => $c[1][0], $unformatted); 151*74031e46SAndreas Gohr $this->assertContains('\\[\\', $bodies, 152*74031e46SAndreas Gohr 'Backtick span body must preserve the literal backslashes'); 153*74031e46SAndreas Gohr } 154*74031e46SAndreas Gohr 155*74031e46SAndreas Gohr function testSortValue() 156*74031e46SAndreas Gohr { 157*74031e46SAndreas Gohr $mode = new GfmEscape(); 158*74031e46SAndreas Gohr $this->assertSame(5, $mode->getSort()); 159*74031e46SAndreas Gohr } 160*74031e46SAndreas Gohr} 161