174031e46SAndreas Gohr<?php 274031e46SAndreas Gohr 374031e46SAndreas Gohrnamespace dokuwiki\test\Parsing\ParserMode; 474031e46SAndreas Gohr 574031e46SAndreas Gohruse dokuwiki\Parsing\ModeRegistry; 674031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmBacktickSingle; 774031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmEmphasis; 874031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmEscape; 974031e46SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmHeader; 1074031e46SAndreas Gohr 1174031e46SAndreas Gohr/** 1274031e46SAndreas Gohr * Tests for the GFM backslash-escape mode. 1374031e46SAndreas Gohr */ 1474031e46SAndreas Gohrclass GfmEscapeTest extends ParserTestBase 1574031e46SAndreas Gohr{ 1674031e46SAndreas Gohr public function setUp(): void 1774031e46SAndreas Gohr { 1874031e46SAndreas Gohr parent::setUp(); 1974031e46SAndreas Gohr global $conf; 20*13a62f81SAndreas Gohr $conf['syntax'] = 'md'; 2174031e46SAndreas Gohr ModeRegistry::reset(); 2274031e46SAndreas Gohr } 2374031e46SAndreas Gohr 2474031e46SAndreas Gohr public function tearDown(): void 2574031e46SAndreas Gohr { 2674031e46SAndreas Gohr ModeRegistry::reset(); 2774031e46SAndreas Gohr parent::tearDown(); 2874031e46SAndreas Gohr } 2974031e46SAndreas Gohr 3074031e46SAndreas Gohr /** 3174031e46SAndreas Gohr * Every ASCII punctuation character is escapable per GFM §6.1. 3274031e46SAndreas Gohr * 3374031e46SAndreas Gohr * @dataProvider provideEscapableChars 3474031e46SAndreas Gohr */ 3574031e46SAndreas Gohr function testEscapableAsciiPunctuationProducesLiteral(string $char) 3674031e46SAndreas Gohr { 3774031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 3874031e46SAndreas Gohr $this->P->parse('foo \\' . $char . ' bar'); 3974031e46SAndreas Gohr 4074031e46SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 4174031e46SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 4274031e46SAndreas Gohr 4374031e46SAndreas Gohr $this->assertSame("\nfoo " . $char . ' bar', $joined, 4474031e46SAndreas Gohr "Escaped {$char} must collapse to the literal char in cdata stream"); 4574031e46SAndreas Gohr } 4674031e46SAndreas Gohr 4774031e46SAndreas Gohr public static function provideEscapableChars(): array 4874031e46SAndreas Gohr { 4974031e46SAndreas Gohr $chars = str_split('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'); 5074031e46SAndreas Gohr return array_combine( 5174031e46SAndreas Gohr array_map(static fn($c) => 'char_' . bin2hex($c), $chars), 5274031e46SAndreas Gohr array_map(static fn($c) => [$c], $chars), 5374031e46SAndreas Gohr ); 5474031e46SAndreas Gohr } 5574031e46SAndreas Gohr 5674031e46SAndreas Gohr /** 5774031e46SAndreas Gohr * Backslash before non-ASCII-punctuation stays literal — letters, 5874031e46SAndreas Gohr * digits, multibyte chars, spaces, and tabs are not escapable. The 5974031e46SAndreas Gohr * pattern simply doesn't match, so the bytes flow through as cdata. 6074031e46SAndreas Gohr * 6174031e46SAndreas Gohr * @dataProvider provideNonEscapableChars 6274031e46SAndreas Gohr */ 6374031e46SAndreas Gohr function testNonEscapableCharsKeepBackslash(string $tail) 6474031e46SAndreas Gohr { 6574031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 6674031e46SAndreas Gohr $this->P->parse('a \\' . $tail . ' b'); 6774031e46SAndreas Gohr 6874031e46SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 6974031e46SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 7074031e46SAndreas Gohr 7174031e46SAndreas Gohr $this->assertSame("\na \\" . $tail . ' b', $joined); 7274031e46SAndreas Gohr } 7374031e46SAndreas Gohr 7474031e46SAndreas Gohr public static function provideNonEscapableChars(): array 7574031e46SAndreas Gohr { 7674031e46SAndreas Gohr return [ 7774031e46SAndreas Gohr 'letter_upper' => ['A'], 7874031e46SAndreas Gohr 'letter_lower' => ['a'], 7974031e46SAndreas Gohr 'digit' => ['3'], 8074031e46SAndreas Gohr 'multibyte' => ['α'], 8174031e46SAndreas Gohr 'space' => [' '], 8274031e46SAndreas Gohr 'tab' => ["\t"], 8374031e46SAndreas Gohr ]; 8474031e46SAndreas Gohr } 8574031e46SAndreas Gohr 8674031e46SAndreas Gohr function testDoubleBackslashCollapsesToSingleBackslash() 8774031e46SAndreas Gohr { 8874031e46SAndreas Gohr // \\ is the escaped-backslash form. The first char in the match 8974031e46SAndreas Gohr // is consumed as the escape introducer; the second is emitted as 9074031e46SAndreas Gohr // a literal backslash. 9174031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 9274031e46SAndreas Gohr $this->P->parse('foo \\\\ bar'); 9374031e46SAndreas Gohr 9474031e46SAndreas Gohr $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 9574031e46SAndreas Gohr $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 9674031e46SAndreas Gohr 9774031e46SAndreas Gohr $this->assertSame("\nfoo \\ bar", $joined); 9874031e46SAndreas Gohr } 9974031e46SAndreas Gohr 10074031e46SAndreas Gohr function testEscapedAsteriskBlocksEmphasis() 10174031e46SAndreas Gohr { 10274031e46SAndreas Gohr // GFM spec example 310 fragment. \* must consume the asterisk 10374031e46SAndreas Gohr // before GfmEmphasis can use it as an opener. 10474031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 10574031e46SAndreas Gohr $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 10674031e46SAndreas Gohr $this->P->parse('\\*not emphasized*'); 10774031e46SAndreas Gohr 10874031e46SAndreas Gohr $modes = array_column($this->H->calls, 0); 10974031e46SAndreas Gohr $this->assertNotContains('emphasis_open', $modes, 11074031e46SAndreas Gohr 'Escaped opener must not start emphasis'); 11174031e46SAndreas Gohr } 11274031e46SAndreas Gohr 11374031e46SAndreas Gohr function testEscapedBackslashThenEmphasisOpens() 11474031e46SAndreas Gohr { 11574031e46SAndreas Gohr // GFM spec example 311. \\ collapses to a literal backslash, and 11674031e46SAndreas Gohr // the *emphasis* that follows is now seen by GfmEmphasis with 11774031e46SAndreas Gohr // its full text intact. 11874031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 11974031e46SAndreas Gohr $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 12074031e46SAndreas Gohr $this->P->parse('\\\\*emphasis*'); 12174031e46SAndreas Gohr 12274031e46SAndreas Gohr $modes = array_column($this->H->calls, 0); 12374031e46SAndreas Gohr $this->assertContains('emphasis_open', $modes, 12474031e46SAndreas Gohr 'After \\\\ collapses, the surviving *emphasis* must open emphasis'); 12574031e46SAndreas Gohr } 12674031e46SAndreas Gohr 12774031e46SAndreas Gohr function testEscapedHashBlocksHeader() 12874031e46SAndreas Gohr { 12974031e46SAndreas Gohr // \# must defeat GfmHeader's column-0 # match. The trailing text 13074031e46SAndreas Gohr // becomes a normal paragraph instead. 13174031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 13274031e46SAndreas Gohr $this->P->addMode('gfm_header', new GfmHeader()); 13374031e46SAndreas Gohr $this->P->parse("\\# not a heading"); 13474031e46SAndreas Gohr 13574031e46SAndreas Gohr $modes = array_column($this->H->calls, 0); 13674031e46SAndreas Gohr $this->assertNotContains('header', $modes, 13774031e46SAndreas Gohr 'Escaped # must not produce a header'); 13874031e46SAndreas Gohr } 13974031e46SAndreas Gohr 14074031e46SAndreas Gohr function testNoEscapeInsideBacktickSpan() 14174031e46SAndreas Gohr { 14274031e46SAndreas Gohr // GFM spec example 313. The whole `\[\`` is captured by 14374031e46SAndreas Gohr // GfmBacktickSingle in one regex shot, so GfmEscape never runs 14474031e46SAndreas Gohr // on its body. The body must retain the literal backslashes. 14574031e46SAndreas Gohr $this->P->addMode('gfm_escape', new GfmEscape()); 14674031e46SAndreas Gohr $this->P->addMode('gfm_backtick_single', new GfmBacktickSingle()); 14774031e46SAndreas Gohr $this->P->parse('`\\[\\`'); 14874031e46SAndreas Gohr 14974031e46SAndreas Gohr $unformatted = array_filter($this->H->calls, static fn($c) => $c[0] === 'unformatted'); 15074031e46SAndreas Gohr $bodies = array_map(static fn($c) => $c[1][0], $unformatted); 15174031e46SAndreas Gohr $this->assertContains('\\[\\', $bodies, 15274031e46SAndreas Gohr 'Backtick span body must preserve the literal backslashes'); 15374031e46SAndreas Gohr } 15474031e46SAndreas Gohr 15574031e46SAndreas Gohr function testSortValue() 15674031e46SAndreas Gohr { 15774031e46SAndreas Gohr $mode = new GfmEscape(); 15874031e46SAndreas Gohr $this->assertSame(5, $mode->getSort()); 15974031e46SAndreas Gohr } 16074031e46SAndreas Gohr} 161