1<?php 2 3namespace dokuwiki\test\Parsing\ParserMode; 4 5use dokuwiki\Parsing\ParserMode\GfmHtmlEntity; 6use dokuwiki\Utf8\Unicode; 7 8/** 9 * Consecutive cdata calls are coalesced by Handler\Block::addCall during 10 * finalize(), so a successful match shows up as a single cdata containing 11 * the decoded character spliced into the surrounding text. Non-matching 12 * inputs leave the literal `&...;` bytes in the cdata. 13 */ 14class GfmHtmlEntityTest extends ParserTestBase 15{ 16 private function assertParsedCdata(string $input, string $expectedCdata): void 17 { 18 $this->P->addMode('gfm_html_entity', new GfmHtmlEntity()); 19 $this->P->parse($input); 20 $this->assertCalls([ 21 ['document_start', []], 22 ['p_open', []], 23 ['cdata', [$expectedCdata]], 24 ['p_close', []], 25 ['document_end', []], 26 ], $this->H->calls); 27 } 28 29 public function testDecimalAscii() 30 { 31 $this->assertParsedCdata('x # y', "\nx # y"); 32 } 33 34 public function testDecimalMultibyte() 35 { 36 $this->assertParsedCdata('aӒb', "\na\u{04D2}b"); 37 } 38 39 public function testHexLowercase() 40 { 41 $this->assertParsedCdata('aಫb', "\na\u{0CAB}b"); 42 } 43 44 public function testHexUppercase() 45 { 46 $this->assertParsedCdata('aആb', "\na\u{0D06}b"); 47 } 48 49 public function testHexQuoteCharacter() 50 { 51 $this->assertParsedCdata('a"b', "\na\"b"); 52 } 53 54 public function testZeroMapsToReplacement() 55 { 56 $this->assertParsedCdata('a�b', "\na\u{FFFD}b"); 57 } 58 59 public function testSurrogateMapsToReplacement() 60 { 61 $this->assertParsedCdata('a�b', "\na\u{FFFD}b"); 62 } 63 64 public function testMaxValidCodepoint() 65 { 66 $this->assertParsedCdata('ab', "\na" . Unicode::toUtf8([0x10FFFF]) . 'b'); 67 } 68 69 public function testNonEntityTooManyDecimalDigitsStaysLiteral() 70 { 71 $this->assertParsedCdata('a�b', "\na�b"); 72 } 73 74 public function testNonEntityHexLetterAfterAmpStaysLiteral() 75 { 76 $this->assertParsedCdata('a&#abcdef0;b', "\na&#abcdef0;b"); 77 } 78 79 public function testEmptyEntityStaysLiteral() 80 { 81 $this->assertParsedCdata('a&#;b', "\na&#;b"); 82 } 83 84 public function testMissingSemicolonStaysLiteral() 85 { 86 $this->assertParsedCdata('a# b', "\na# b"); 87 } 88 89 public function testMultipleNumericEntitiesInSequence() 90 { 91 $this->assertParsedCdata('#Ӓಫ', "\n#\u{04D2}\u{0CAB}"); 92 } 93 94 public function testTabDecodes() 95 { 96 $this->assertParsedCdata('a	b', "\na\tb"); 97 } 98 99 public function testNewlineDecodes() 100 { 101 $this->assertParsedCdata('foo bar', "\nfoo\n\nbar"); 102 } 103 104 public function testNamedAmp() 105 { 106 // & decodes to '&', renderer re-escapes on output 107 $this->assertParsedCdata('a&b', "\na&b"); 108 } 109 110 public function testNamedCopy() 111 { 112 $this->assertParsedCdata('a©b', "\na\u{00A9}b"); 113 } 114 115 public function testNamedAElig() 116 { 117 $this->assertParsedCdata('aÆb', "\na\u{00C6}b"); 118 } 119 120 public function testNamedNbsp() 121 { 122 $this->assertParsedCdata('a b', "\na\u{00A0}b"); 123 } 124 125 public function testNamedMultiCodepoint() 126 { 127 // ≧̸ -> U+2267 + U+0338 (combining solidus) 128 $this->assertParsedCdata('a≧̸b', "\na\u{2267}\u{0338}b"); 129 } 130 131 public function testNamedUnknownStaysLiteral() 132 { 133 $this->assertParsedCdata('a&MadeUpEntity;b', "\na&MadeUpEntity;b"); 134 } 135 136 public function testNamedNoSemicolonStaysLiteral() 137 { 138 $this->assertParsedCdata('a© b', "\na© b"); 139 } 140 141 public function testMixedNumericAndNamed() 142 { 143 $this->assertParsedCdata('#©"', "\n#\u{00A9}\""); 144 } 145} 146