1*eb15e634SAndreas Gohr<?php 2*eb15e634SAndreas Gohr 3*eb15e634SAndreas Gohrnamespace dokuwiki\test\Parsing\Helpers; 4*eb15e634SAndreas Gohr 5*eb15e634SAndreas Gohruse dokuwiki\Parsing\Helpers\HtmlEntity; 6*eb15e634SAndreas Gohruse dokuwiki\Utf8\Unicode; 7*eb15e634SAndreas Gohr 8*eb15e634SAndreas Gohr/** 9*eb15e634SAndreas Gohr * Tests for the HTML entity-decoding post-hoc helper. 10*eb15e634SAndreas Gohr * 11*eb15e634SAndreas Gohr * The lexer-mode coverage is in {@see \dokuwiki\test\Parsing\ParserMode\GfmHtmlEntityTest}; 12*eb15e634SAndreas Gohr * this class exercises the helper that GfmLink and GfmCode call on text 13*eb15e634SAndreas Gohr * the lexer never reached. 14*eb15e634SAndreas Gohr */ 15*eb15e634SAndreas Gohrclass HtmlEntityTest extends \DokuWikiTest 16*eb15e634SAndreas Gohr{ 17*eb15e634SAndreas Gohr public function testDecimalDecodes() 18*eb15e634SAndreas Gohr { 19*eb15e634SAndreas Gohr $this->assertSame('a#b', HtmlEntity::decode('a#b')); 20*eb15e634SAndreas Gohr } 21*eb15e634SAndreas Gohr 22*eb15e634SAndreas Gohr public function testDecimalMultibyte() 23*eb15e634SAndreas Gohr { 24*eb15e634SAndreas Gohr $this->assertSame("a\u{04D2}b", HtmlEntity::decode('aӒb')); 25*eb15e634SAndreas Gohr } 26*eb15e634SAndreas Gohr 27*eb15e634SAndreas Gohr public function testHexLowercase() 28*eb15e634SAndreas Gohr { 29*eb15e634SAndreas Gohr $this->assertSame("a\u{0CAB}b", HtmlEntity::decode('aಫb')); 30*eb15e634SAndreas Gohr } 31*eb15e634SAndreas Gohr 32*eb15e634SAndreas Gohr public function testHexUppercase() 33*eb15e634SAndreas Gohr { 34*eb15e634SAndreas Gohr $this->assertSame("a\u{0D06}b", HtmlEntity::decode('aആb')); 35*eb15e634SAndreas Gohr } 36*eb15e634SAndreas Gohr 37*eb15e634SAndreas Gohr public function testZeroMapsToReplacement() 38*eb15e634SAndreas Gohr { 39*eb15e634SAndreas Gohr $this->assertSame("a\u{FFFD}b", HtmlEntity::decode('a�b')); 40*eb15e634SAndreas Gohr } 41*eb15e634SAndreas Gohr 42*eb15e634SAndreas Gohr public function testSurrogateMapsToReplacement() 43*eb15e634SAndreas Gohr { 44*eb15e634SAndreas Gohr $this->assertSame("a\u{FFFD}b", HtmlEntity::decode('a�b')); 45*eb15e634SAndreas Gohr } 46*eb15e634SAndreas Gohr 47*eb15e634SAndreas Gohr public function testOverflowMapsToReplacement() 48*eb15e634SAndreas Gohr { 49*eb15e634SAndreas Gohr $this->assertSame("a\u{FFFD}b", HtmlEntity::decode('a�b')); 50*eb15e634SAndreas Gohr } 51*eb15e634SAndreas Gohr 52*eb15e634SAndreas Gohr public function testMaxValidCodepoint() 53*eb15e634SAndreas Gohr { 54*eb15e634SAndreas Gohr $this->assertSame( 55*eb15e634SAndreas Gohr 'a' . Unicode::toUtf8([0x10FFFF]) . 'b', 56*eb15e634SAndreas Gohr HtmlEntity::decode('ab') 57*eb15e634SAndreas Gohr ); 58*eb15e634SAndreas Gohr } 59*eb15e634SAndreas Gohr 60*eb15e634SAndreas Gohr public function testNamedAmp() 61*eb15e634SAndreas Gohr { 62*eb15e634SAndreas Gohr $this->assertSame('a&b', HtmlEntity::decode('a&b')); 63*eb15e634SAndreas Gohr } 64*eb15e634SAndreas Gohr 65*eb15e634SAndreas Gohr public function testNamedCopy() 66*eb15e634SAndreas Gohr { 67*eb15e634SAndreas Gohr $this->assertSame("a\u{00A9}b", HtmlEntity::decode('a©b')); 68*eb15e634SAndreas Gohr } 69*eb15e634SAndreas Gohr 70*eb15e634SAndreas Gohr public function testNamedAElig() 71*eb15e634SAndreas Gohr { 72*eb15e634SAndreas Gohr $this->assertSame("a\u{00C6}b", HtmlEntity::decode('aÆb')); 73*eb15e634SAndreas Gohr } 74*eb15e634SAndreas Gohr 75*eb15e634SAndreas Gohr public function testNamedNbsp() 76*eb15e634SAndreas Gohr { 77*eb15e634SAndreas Gohr $this->assertSame("a\u{00A0}b", HtmlEntity::decode('a b')); 78*eb15e634SAndreas Gohr } 79*eb15e634SAndreas Gohr 80*eb15e634SAndreas Gohr public function testNamedMultiCodepoint() 81*eb15e634SAndreas Gohr { 82*eb15e634SAndreas Gohr // ≧̸ -> U+2267 + U+0338 (combining solidus) 83*eb15e634SAndreas Gohr $this->assertSame("a\u{2267}\u{0338}b", HtmlEntity::decode('a≧̸b')); 84*eb15e634SAndreas Gohr } 85*eb15e634SAndreas Gohr 86*eb15e634SAndreas Gohr public function testUnknownNameStaysLiteral() 87*eb15e634SAndreas Gohr { 88*eb15e634SAndreas Gohr $this->assertSame('a&MadeUpEntity;b', HtmlEntity::decode('a&MadeUpEntity;b')); 89*eb15e634SAndreas Gohr } 90*eb15e634SAndreas Gohr 91*eb15e634SAndreas Gohr public function testNoSemicolonStaysLiteral() 92*eb15e634SAndreas Gohr { 93*eb15e634SAndreas Gohr $this->assertSame('a© b', HtmlEntity::decode('a© b')); 94*eb15e634SAndreas Gohr } 95*eb15e634SAndreas Gohr 96*eb15e634SAndreas Gohr public function testTooManyDecimalDigitsStaysLiteral() 97*eb15e634SAndreas Gohr { 98*eb15e634SAndreas Gohr $this->assertSame('a�b', HtmlEntity::decode('a�b')); 99*eb15e634SAndreas Gohr } 100*eb15e634SAndreas Gohr 101*eb15e634SAndreas Gohr public function testHexLetterAfterAmpStaysLiteral() 102*eb15e634SAndreas Gohr { 103*eb15e634SAndreas Gohr $this->assertSame('a&#abcdef0;b', HtmlEntity::decode('a&#abcdef0;b')); 104*eb15e634SAndreas Gohr } 105*eb15e634SAndreas Gohr 106*eb15e634SAndreas Gohr public function testEmptyEntityStaysLiteral() 107*eb15e634SAndreas Gohr { 108*eb15e634SAndreas Gohr $this->assertSame('a&#;b', HtmlEntity::decode('a&#;b')); 109*eb15e634SAndreas Gohr } 110*eb15e634SAndreas Gohr 111*eb15e634SAndreas Gohr public function testMultipleEntitiesInSequence() 112*eb15e634SAndreas Gohr { 113*eb15e634SAndreas Gohr $this->assertSame( 114*eb15e634SAndreas Gohr "#\u{04D2}\u{00A9}", 115*eb15e634SAndreas Gohr HtmlEntity::decode('#Ӓ©') 116*eb15e634SAndreas Gohr ); 117*eb15e634SAndreas Gohr } 118*eb15e634SAndreas Gohr 119*eb15e634SAndreas Gohr public function testNonEntityBytesPassThrough() 120*eb15e634SAndreas Gohr { 121*eb15e634SAndreas Gohr $this->assertSame('plain text without entities', HtmlEntity::decode('plain text without entities')); 122*eb15e634SAndreas Gohr } 123*eb15e634SAndreas Gohr 124*eb15e634SAndreas Gohr public function testEmptyInput() 125*eb15e634SAndreas Gohr { 126*eb15e634SAndreas Gohr $this->assertSame('', HtmlEntity::decode('')); 127*eb15e634SAndreas Gohr } 128*eb15e634SAndreas Gohr} 129