1*d2085866SAndreas Gohr<?php 2*d2085866SAndreas Gohr 3*d2085866SAndreas Gohrnamespace dokuwiki\test\Parsing\ParserMode; 4*d2085866SAndreas Gohr 5*d2085866SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmHtmlEntity; 6*d2085866SAndreas Gohruse dokuwiki\Utf8\Unicode; 7*d2085866SAndreas Gohr 8*d2085866SAndreas Gohr/** 9*d2085866SAndreas Gohr * Consecutive cdata calls are coalesced by Handler\Block::addCall during 10*d2085866SAndreas Gohr * finalize(), so a successful match shows up as a single cdata containing 11*d2085866SAndreas Gohr * the decoded character spliced into the surrounding text. Non-matching 12*d2085866SAndreas Gohr * inputs leave the literal `&...;` bytes in the cdata. 13*d2085866SAndreas Gohr */ 14*d2085866SAndreas Gohrclass GfmHtmlEntityTest extends ParserTestBase 15*d2085866SAndreas Gohr{ 16*d2085866SAndreas Gohr private function assertParsedCdata(string $input, string $expectedCdata): void 17*d2085866SAndreas Gohr { 18*d2085866SAndreas Gohr $this->P->addMode('gfm_html_entity', new GfmHtmlEntity()); 19*d2085866SAndreas Gohr $this->P->parse($input); 20*d2085866SAndreas Gohr $this->assertCalls([ 21*d2085866SAndreas Gohr ['document_start', []], 22*d2085866SAndreas Gohr ['p_open', []], 23*d2085866SAndreas Gohr ['cdata', [$expectedCdata]], 24*d2085866SAndreas Gohr ['p_close', []], 25*d2085866SAndreas Gohr ['document_end', []], 26*d2085866SAndreas Gohr ], $this->H->calls); 27*d2085866SAndreas Gohr } 28*d2085866SAndreas Gohr 29*d2085866SAndreas Gohr public function testDecimalAscii() 30*d2085866SAndreas Gohr { 31*d2085866SAndreas Gohr $this->assertParsedCdata('x # y', "\nx # y"); 32*d2085866SAndreas Gohr } 33*d2085866SAndreas Gohr 34*d2085866SAndreas Gohr public function testDecimalMultibyte() 35*d2085866SAndreas Gohr { 36*d2085866SAndreas Gohr $this->assertParsedCdata('aӒb', "\na\u{04D2}b"); 37*d2085866SAndreas Gohr } 38*d2085866SAndreas Gohr 39*d2085866SAndreas Gohr public function testHexLowercase() 40*d2085866SAndreas Gohr { 41*d2085866SAndreas Gohr $this->assertParsedCdata('aಫb', "\na\u{0CAB}b"); 42*d2085866SAndreas Gohr } 43*d2085866SAndreas Gohr 44*d2085866SAndreas Gohr public function testHexUppercase() 45*d2085866SAndreas Gohr { 46*d2085866SAndreas Gohr $this->assertParsedCdata('aആb', "\na\u{0D06}b"); 47*d2085866SAndreas Gohr } 48*d2085866SAndreas Gohr 49*d2085866SAndreas Gohr public function testHexQuoteCharacter() 50*d2085866SAndreas Gohr { 51*d2085866SAndreas Gohr $this->assertParsedCdata('a"b', "\na\"b"); 52*d2085866SAndreas Gohr } 53*d2085866SAndreas Gohr 54*d2085866SAndreas Gohr public function testZeroMapsToReplacement() 55*d2085866SAndreas Gohr { 56*d2085866SAndreas Gohr $this->assertParsedCdata('a�b', "\na\u{FFFD}b"); 57*d2085866SAndreas Gohr } 58*d2085866SAndreas Gohr 59*d2085866SAndreas Gohr public function testSurrogateMapsToReplacement() 60*d2085866SAndreas Gohr { 61*d2085866SAndreas Gohr $this->assertParsedCdata('a�b', "\na\u{FFFD}b"); 62*d2085866SAndreas Gohr } 63*d2085866SAndreas Gohr 64*d2085866SAndreas Gohr public function testMaxValidCodepoint() 65*d2085866SAndreas Gohr { 66*d2085866SAndreas Gohr $this->assertParsedCdata('ab', "\na" . Unicode::toUtf8([0x10FFFF]) . 'b'); 67*d2085866SAndreas Gohr } 68*d2085866SAndreas Gohr 69*d2085866SAndreas Gohr public function testNonEntityTooManyDecimalDigitsStaysLiteral() 70*d2085866SAndreas Gohr { 71*d2085866SAndreas Gohr $this->assertParsedCdata('a�b', "\na�b"); 72*d2085866SAndreas Gohr } 73*d2085866SAndreas Gohr 74*d2085866SAndreas Gohr public function testNonEntityHexLetterAfterAmpStaysLiteral() 75*d2085866SAndreas Gohr { 76*d2085866SAndreas Gohr $this->assertParsedCdata('a&#abcdef0;b', "\na&#abcdef0;b"); 77*d2085866SAndreas Gohr } 78*d2085866SAndreas Gohr 79*d2085866SAndreas Gohr public function testEmptyEntityStaysLiteral() 80*d2085866SAndreas Gohr { 81*d2085866SAndreas Gohr $this->assertParsedCdata('a&#;b', "\na&#;b"); 82*d2085866SAndreas Gohr } 83*d2085866SAndreas Gohr 84*d2085866SAndreas Gohr public function testMissingSemicolonStaysLiteral() 85*d2085866SAndreas Gohr { 86*d2085866SAndreas Gohr $this->assertParsedCdata('a# b', "\na# b"); 87*d2085866SAndreas Gohr } 88*d2085866SAndreas Gohr 89*d2085866SAndreas Gohr public function testMultipleNumericEntitiesInSequence() 90*d2085866SAndreas Gohr { 91*d2085866SAndreas Gohr $this->assertParsedCdata('#Ӓಫ', "\n#\u{04D2}\u{0CAB}"); 92*d2085866SAndreas Gohr } 93*d2085866SAndreas Gohr 94*d2085866SAndreas Gohr public function testTabDecodes() 95*d2085866SAndreas Gohr { 96*d2085866SAndreas Gohr $this->assertParsedCdata('a	b', "\na\tb"); 97*d2085866SAndreas Gohr } 98*d2085866SAndreas Gohr 99*d2085866SAndreas Gohr public function testNewlineDecodes() 100*d2085866SAndreas Gohr { 101*d2085866SAndreas Gohr $this->assertParsedCdata('foo bar', "\nfoo\n\nbar"); 102*d2085866SAndreas Gohr } 103*d2085866SAndreas Gohr 104*d2085866SAndreas Gohr public function testNamedAmp() 105*d2085866SAndreas Gohr { 106*d2085866SAndreas Gohr // & decodes to '&', renderer re-escapes on output 107*d2085866SAndreas Gohr $this->assertParsedCdata('a&b', "\na&b"); 108*d2085866SAndreas Gohr } 109*d2085866SAndreas Gohr 110*d2085866SAndreas Gohr public function testNamedCopy() 111*d2085866SAndreas Gohr { 112*d2085866SAndreas Gohr $this->assertParsedCdata('a©b', "\na\u{00A9}b"); 113*d2085866SAndreas Gohr } 114*d2085866SAndreas Gohr 115*d2085866SAndreas Gohr public function testNamedAElig() 116*d2085866SAndreas Gohr { 117*d2085866SAndreas Gohr $this->assertParsedCdata('aÆb', "\na\u{00C6}b"); 118*d2085866SAndreas Gohr } 119*d2085866SAndreas Gohr 120*d2085866SAndreas Gohr public function testNamedNbsp() 121*d2085866SAndreas Gohr { 122*d2085866SAndreas Gohr $this->assertParsedCdata('a b', "\na\u{00A0}b"); 123*d2085866SAndreas Gohr } 124*d2085866SAndreas Gohr 125*d2085866SAndreas Gohr public function testNamedMultiCodepoint() 126*d2085866SAndreas Gohr { 127*d2085866SAndreas Gohr // ≧̸ -> U+2267 + U+0338 (combining solidus) 128*d2085866SAndreas Gohr $this->assertParsedCdata('a≧̸b', "\na\u{2267}\u{0338}b"); 129*d2085866SAndreas Gohr } 130*d2085866SAndreas Gohr 131*d2085866SAndreas Gohr public function testNamedUnknownStaysLiteral() 132*d2085866SAndreas Gohr { 133*d2085866SAndreas Gohr $this->assertParsedCdata('a&MadeUpEntity;b', "\na&MadeUpEntity;b"); 134*d2085866SAndreas Gohr } 135*d2085866SAndreas Gohr 136*d2085866SAndreas Gohr public function testNamedNoSemicolonStaysLiteral() 137*d2085866SAndreas Gohr { 138*d2085866SAndreas Gohr $this->assertParsedCdata('a© b', "\na© b"); 139*d2085866SAndreas Gohr } 140*d2085866SAndreas Gohr 141*d2085866SAndreas Gohr public function testMixedNumericAndNamed() 142*d2085866SAndreas Gohr { 143*d2085866SAndreas Gohr $this->assertParsedCdata('#©"', "\n#\u{00A9}\""); 144*d2085866SAndreas Gohr } 145*d2085866SAndreas Gohr} 146