1<?php 2 3namespace dokuwiki\test\Parsing\ParserMode; 4 5use dokuwiki\Parsing\ModeRegistry; 6use dokuwiki\Parsing\ParserMode\GfmBacktickSingle; 7use dokuwiki\Parsing\ParserMode\GfmEmphasis; 8use dokuwiki\Parsing\ParserMode\GfmEscape; 9use dokuwiki\Parsing\ParserMode\GfmHeader; 10 11/** 12 * Tests for the GFM backslash-escape mode. 13 */ 14class GfmEscapeTest extends ParserTestBase 15{ 16 public function setUp(): void 17 { 18 parent::setUp(); 19 global $conf; 20 $conf['syntax'] = 'md'; 21 ModeRegistry::reset(); 22 } 23 24 public function tearDown(): void 25 { 26 ModeRegistry::reset(); 27 parent::tearDown(); 28 } 29 30 /** 31 * Every ASCII punctuation character is escapable per GFM §6.1. 32 * 33 * @dataProvider provideEscapableChars 34 */ 35 function testEscapableAsciiPunctuationProducesLiteral(string $char) 36 { 37 $this->P->addMode('gfm_escape', new GfmEscape()); 38 $this->P->parse('foo \\' . $char . ' bar'); 39 40 $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 41 $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 42 43 $this->assertSame("\nfoo " . $char . ' bar', $joined, 44 "Escaped {$char} must collapse to the literal char in cdata stream"); 45 } 46 47 public static function provideEscapableChars(): array 48 { 49 $chars = str_split('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'); 50 return array_combine( 51 array_map(static fn($c) => 'char_' . bin2hex($c), $chars), 52 array_map(static fn($c) => [$c], $chars), 53 ); 54 } 55 56 /** 57 * Backslash before non-ASCII-punctuation stays literal — letters, 58 * digits, multibyte chars, spaces, and tabs are not escapable. The 59 * pattern simply doesn't match, so the bytes flow through as cdata. 60 * 61 * @dataProvider provideNonEscapableChars 62 */ 63 function testNonEscapableCharsKeepBackslash(string $tail) 64 { 65 $this->P->addMode('gfm_escape', new GfmEscape()); 66 $this->P->parse('a \\' . $tail . ' b'); 67 68 $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 69 $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 70 71 $this->assertSame("\na \\" . $tail . ' b', $joined); 72 } 73 74 public static function provideNonEscapableChars(): array 75 { 76 return [ 77 'letter_upper' => ['A'], 78 'letter_lower' => ['a'], 79 'digit' => ['3'], 80 'multibyte' => ['α'], 81 'space' => [' '], 82 'tab' => ["\t"], 83 ]; 84 } 85 86 function testDoubleBackslashCollapsesToSingleBackslash() 87 { 88 // \\ is the escaped-backslash form. The first char in the match 89 // is consumed as the escape introducer; the second is emitted as 90 // a literal backslash. 91 $this->P->addMode('gfm_escape', new GfmEscape()); 92 $this->P->parse('foo \\\\ bar'); 93 94 $cdata = array_filter($this->H->calls, static fn($c) => $c[0] === 'cdata'); 95 $joined = implode('', array_map(static fn($c) => $c[1][0], $cdata)); 96 97 $this->assertSame("\nfoo \\ bar", $joined); 98 } 99 100 function testEscapedAsteriskBlocksEmphasis() 101 { 102 // GFM spec example 310 fragment. \* must consume the asterisk 103 // before GfmEmphasis can use it as an opener. 104 $this->P->addMode('gfm_escape', new GfmEscape()); 105 $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 106 $this->P->parse('\\*not emphasized*'); 107 108 $modes = array_column($this->H->calls, 0); 109 $this->assertNotContains('emphasis_open', $modes, 110 'Escaped opener must not start emphasis'); 111 } 112 113 function testEscapedBackslashThenEmphasisOpens() 114 { 115 // GFM spec example 311. \\ collapses to a literal backslash, and 116 // the *emphasis* that follows is now seen by GfmEmphasis with 117 // its full text intact. 118 $this->P->addMode('gfm_escape', new GfmEscape()); 119 $this->P->addMode('gfm_emphasis', new GfmEmphasis()); 120 $this->P->parse('\\\\*emphasis*'); 121 122 $modes = array_column($this->H->calls, 0); 123 $this->assertContains('emphasis_open', $modes, 124 'After \\\\ collapses, the surviving *emphasis* must open emphasis'); 125 } 126 127 function testEscapedHashBlocksHeader() 128 { 129 // \# must defeat GfmHeader's column-0 # match. The trailing text 130 // becomes a normal paragraph instead. 131 $this->P->addMode('gfm_escape', new GfmEscape()); 132 $this->P->addMode('gfm_header', new GfmHeader()); 133 $this->P->parse("\\# not a heading"); 134 135 $modes = array_column($this->H->calls, 0); 136 $this->assertNotContains('header', $modes, 137 'Escaped # must not produce a header'); 138 } 139 140 function testNoEscapeInsideBacktickSpan() 141 { 142 // GFM spec example 313. The whole `\[\`` is captured by 143 // GfmBacktickSingle in one regex shot, so GfmEscape never runs 144 // on its body. The body must retain the literal backslashes. 145 $this->P->addMode('gfm_escape', new GfmEscape()); 146 $this->P->addMode('gfm_backtick_single', new GfmBacktickSingle()); 147 $this->P->parse('`\\[\\`'); 148 149 $unformatted = array_filter($this->H->calls, static fn($c) => $c[0] === 'unformatted'); 150 $bodies = array_map(static fn($c) => $c[1][0], $unformatted); 151 $this->assertContains('\\[\\', $bodies, 152 'Backtick span body must preserve the literal backslashes'); 153 } 154 155 function testSortValue() 156 { 157 $mode = new GfmEscape(); 158 $this->assertSame(5, $mode->getSort()); 159 } 160} 161