1<?php 2 3namespace dokuwiki\test\Parsing\Helpers; 4 5use dokuwiki\Parsing\Helpers\HtmlEntity; 6use dokuwiki\Utf8\Unicode; 7 8/** 9 * Tests for the HTML entity-decoding post-hoc helper. 10 * 11 * The lexer-mode coverage is in {@see \dokuwiki\test\Parsing\ParserMode\GfmHtmlEntityTest}; 12 * this class exercises the helper that GfmLink and GfmCode call on text 13 * the lexer never reached. 14 */ 15class HtmlEntityTest extends \DokuWikiTest 16{ 17 public function testDecimalDecodes() 18 { 19 $this->assertSame('a#b', HtmlEntity::decode('a#b')); 20 } 21 22 public function testDecimalMultibyte() 23 { 24 $this->assertSame("a\u{04D2}b", HtmlEntity::decode('aӒb')); 25 } 26 27 public function testHexLowercase() 28 { 29 $this->assertSame("a\u{0CAB}b", HtmlEntity::decode('aಫb')); 30 } 31 32 public function testHexUppercase() 33 { 34 $this->assertSame("a\u{0D06}b", HtmlEntity::decode('aആb')); 35 } 36 37 public function testZeroMapsToReplacement() 38 { 39 $this->assertSame("a\u{FFFD}b", HtmlEntity::decode('a�b')); 40 } 41 42 public function testSurrogateMapsToReplacement() 43 { 44 $this->assertSame("a\u{FFFD}b", HtmlEntity::decode('a�b')); 45 } 46 47 public function testOverflowMapsToReplacement() 48 { 49 $this->assertSame("a\u{FFFD}b", HtmlEntity::decode('a�b')); 50 } 51 52 public function testMaxValidCodepoint() 53 { 54 $this->assertSame( 55 'a' . Unicode::toUtf8([0x10FFFF]) . 'b', 56 HtmlEntity::decode('ab') 57 ); 58 } 59 60 public function testNamedAmp() 61 { 62 $this->assertSame('a&b', HtmlEntity::decode('a&b')); 63 } 64 65 public function testNamedCopy() 66 { 67 $this->assertSame("a\u{00A9}b", HtmlEntity::decode('a©b')); 68 } 69 70 public function testNamedAElig() 71 { 72 $this->assertSame("a\u{00C6}b", HtmlEntity::decode('aÆb')); 73 } 74 75 public function testNamedNbsp() 76 { 77 $this->assertSame("a\u{00A0}b", HtmlEntity::decode('a b')); 78 } 79 80 public function testNamedMultiCodepoint() 81 { 82 // ≧̸ -> U+2267 + U+0338 (combining solidus) 83 $this->assertSame("a\u{2267}\u{0338}b", HtmlEntity::decode('a≧̸b')); 84 } 85 86 public function testUnknownNameStaysLiteral() 87 { 88 $this->assertSame('a&MadeUpEntity;b', HtmlEntity::decode('a&MadeUpEntity;b')); 89 } 90 91 public function testNoSemicolonStaysLiteral() 92 { 93 $this->assertSame('a© b', HtmlEntity::decode('a© b')); 94 } 95 96 public function testTooManyDecimalDigitsStaysLiteral() 97 { 98 $this->assertSame('a�b', HtmlEntity::decode('a�b')); 99 } 100 101 public function testHexLetterAfterAmpStaysLiteral() 102 { 103 $this->assertSame('a&#abcdef0;b', HtmlEntity::decode('a&#abcdef0;b')); 104 } 105 106 public function testEmptyEntityStaysLiteral() 107 { 108 $this->assertSame('a&#;b', HtmlEntity::decode('a&#;b')); 109 } 110 111 public function testMultipleEntitiesInSequence() 112 { 113 $this->assertSame( 114 "#\u{04D2}\u{00A9}", 115 HtmlEntity::decode('#Ӓ©') 116 ); 117 } 118 119 public function testNonEntityBytesPassThrough() 120 { 121 $this->assertSame('plain text without entities', HtmlEntity::decode('plain text without entities')); 122 } 123 124 public function testEmptyInput() 125 { 126 $this->assertSame('', HtmlEntity::decode('')); 127 } 128} 129