xref: /dokuwiki/_test/tests/Parsing/Helpers/HtmlEntityTest.php (revision 2e43b79909f3bc04928779d886f68c1242b5d436)
1<?php
2
3namespace dokuwiki\test\Parsing\Helpers;
4
5use dokuwiki\Parsing\Helpers\HtmlEntity;
6use dokuwiki\Utf8\Unicode;
7
8/**
9 * Tests for the HTML entity-decoding post-hoc helper.
10 *
11 * The lexer-mode coverage is in {@see \dokuwiki\test\Parsing\ParserMode\GfmHtmlEntityTest};
12 * this class exercises the helper that GfmLink and GfmCode call on text
13 * the lexer never reached.
14 */
15class HtmlEntityTest extends \DokuWikiTest
16{
17    public function testDecimalDecodes()
18    {
19        $this->assertSame('a#b', HtmlEntity::decode('a&#35;b'));
20    }
21
22    public function testDecimalMultibyte()
23    {
24        $this->assertSame("a\u{04D2}b", HtmlEntity::decode('a&#1234;b'));
25    }
26
27    public function testHexLowercase()
28    {
29        $this->assertSame("a\u{0CAB}b", HtmlEntity::decode('a&#xcab;b'));
30    }
31
32    public function testHexUppercase()
33    {
34        $this->assertSame("a\u{0D06}b", HtmlEntity::decode('a&#XD06;b'));
35    }
36
37    public function testZeroMapsToReplacement()
38    {
39        $this->assertSame("a\u{FFFD}b", HtmlEntity::decode('a&#0;b'));
40    }
41
42    public function testSurrogateMapsToReplacement()
43    {
44        $this->assertSame("a\u{FFFD}b", HtmlEntity::decode('a&#xD800;b'));
45    }
46
47    public function testOverflowMapsToReplacement()
48    {
49        $this->assertSame("a\u{FFFD}b", HtmlEntity::decode('a&#1114112;b'));
50    }
51
52    public function testMaxValidCodepoint()
53    {
54        $this->assertSame(
55            'a' . Unicode::toUtf8([0x10FFFF]) . 'b',
56            HtmlEntity::decode('a&#x10FFFF;b')
57        );
58    }
59
60    public function testNamedAmp()
61    {
62        $this->assertSame('a&b', HtmlEntity::decode('a&amp;b'));
63    }
64
65    public function testNamedCopy()
66    {
67        $this->assertSame("a\u{00A9}b", HtmlEntity::decode('a&copy;b'));
68    }
69
70    public function testNamedAElig()
71    {
72        $this->assertSame("a\u{00C6}b", HtmlEntity::decode('a&AElig;b'));
73    }
74
75    public function testNamedNbsp()
76    {
77        $this->assertSame("a\u{00A0}b", HtmlEntity::decode('a&nbsp;b'));
78    }
79
80    public function testNamedMultiCodepoint()
81    {
82        // &ngE; -> U+2267 + U+0338 (combining solidus)
83        $this->assertSame("a\u{2267}\u{0338}b", HtmlEntity::decode('a&ngE;b'));
84    }
85
86    public function testUnknownNameStaysLiteral()
87    {
88        $this->assertSame('a&MadeUpEntity;b', HtmlEntity::decode('a&MadeUpEntity;b'));
89    }
90
91    public function testNoSemicolonStaysLiteral()
92    {
93        $this->assertSame('a&copy b', HtmlEntity::decode('a&copy b'));
94    }
95
96    public function testTooManyDecimalDigitsStaysLiteral()
97    {
98        $this->assertSame('a&#987654321;b', HtmlEntity::decode('a&#987654321;b'));
99    }
100
101    public function testHexLetterAfterAmpStaysLiteral()
102    {
103        $this->assertSame('a&#abcdef0;b', HtmlEntity::decode('a&#abcdef0;b'));
104    }
105
106    public function testEmptyEntityStaysLiteral()
107    {
108        $this->assertSame('a&#;b', HtmlEntity::decode('a&#;b'));
109    }
110
111    public function testMultipleEntitiesInSequence()
112    {
113        $this->assertSame(
114            "#\u{04D2}\u{00A9}",
115            HtmlEntity::decode('&#35;&#1234;&copy;')
116        );
117    }
118
119    public function testNonEntityBytesPassThrough()
120    {
121        $this->assertSame('plain text without entities', HtmlEntity::decode('plain text without entities'));
122    }
123
124    public function testEmptyInput()
125    {
126        $this->assertSame('', HtmlEntity::decode(''));
127    }
128}
129