xref: /dokuwiki/_test/tests/Parsing/Helpers/HtmlEntityTest.php (revision eb15e634e1400f6c4d78f5fb40179ca25f41574d)
1*eb15e634SAndreas Gohr<?php
2*eb15e634SAndreas Gohr
3*eb15e634SAndreas Gohrnamespace dokuwiki\test\Parsing\Helpers;
4*eb15e634SAndreas Gohr
5*eb15e634SAndreas Gohruse dokuwiki\Parsing\Helpers\HtmlEntity;
6*eb15e634SAndreas Gohruse dokuwiki\Utf8\Unicode;
7*eb15e634SAndreas Gohr
8*eb15e634SAndreas Gohr/**
9*eb15e634SAndreas Gohr * Tests for the HTML entity-decoding post-hoc helper.
10*eb15e634SAndreas Gohr *
11*eb15e634SAndreas Gohr * The lexer-mode coverage is in {@see \dokuwiki\test\Parsing\ParserMode\GfmHtmlEntityTest};
12*eb15e634SAndreas Gohr * this class exercises the helper that GfmLink and GfmCode call on text
13*eb15e634SAndreas Gohr * the lexer never reached.
14*eb15e634SAndreas Gohr */
15*eb15e634SAndreas Gohrclass HtmlEntityTest extends \DokuWikiTest
16*eb15e634SAndreas Gohr{
17*eb15e634SAndreas Gohr    public function testDecimalDecodes()
18*eb15e634SAndreas Gohr    {
19*eb15e634SAndreas Gohr        $this->assertSame('a#b', HtmlEntity::decode('a&#35;b'));
20*eb15e634SAndreas Gohr    }
21*eb15e634SAndreas Gohr
22*eb15e634SAndreas Gohr    public function testDecimalMultibyte()
23*eb15e634SAndreas Gohr    {
24*eb15e634SAndreas Gohr        $this->assertSame("a\u{04D2}b", HtmlEntity::decode('a&#1234;b'));
25*eb15e634SAndreas Gohr    }
26*eb15e634SAndreas Gohr
27*eb15e634SAndreas Gohr    public function testHexLowercase()
28*eb15e634SAndreas Gohr    {
29*eb15e634SAndreas Gohr        $this->assertSame("a\u{0CAB}b", HtmlEntity::decode('a&#xcab;b'));
30*eb15e634SAndreas Gohr    }
31*eb15e634SAndreas Gohr
32*eb15e634SAndreas Gohr    public function testHexUppercase()
33*eb15e634SAndreas Gohr    {
34*eb15e634SAndreas Gohr        $this->assertSame("a\u{0D06}b", HtmlEntity::decode('a&#XD06;b'));
35*eb15e634SAndreas Gohr    }
36*eb15e634SAndreas Gohr
37*eb15e634SAndreas Gohr    public function testZeroMapsToReplacement()
38*eb15e634SAndreas Gohr    {
39*eb15e634SAndreas Gohr        $this->assertSame("a\u{FFFD}b", HtmlEntity::decode('a&#0;b'));
40*eb15e634SAndreas Gohr    }
41*eb15e634SAndreas Gohr
42*eb15e634SAndreas Gohr    public function testSurrogateMapsToReplacement()
43*eb15e634SAndreas Gohr    {
44*eb15e634SAndreas Gohr        $this->assertSame("a\u{FFFD}b", HtmlEntity::decode('a&#xD800;b'));
45*eb15e634SAndreas Gohr    }
46*eb15e634SAndreas Gohr
47*eb15e634SAndreas Gohr    public function testOverflowMapsToReplacement()
48*eb15e634SAndreas Gohr    {
49*eb15e634SAndreas Gohr        $this->assertSame("a\u{FFFD}b", HtmlEntity::decode('a&#1114112;b'));
50*eb15e634SAndreas Gohr    }
51*eb15e634SAndreas Gohr
52*eb15e634SAndreas Gohr    public function testMaxValidCodepoint()
53*eb15e634SAndreas Gohr    {
54*eb15e634SAndreas Gohr        $this->assertSame(
55*eb15e634SAndreas Gohr            'a' . Unicode::toUtf8([0x10FFFF]) . 'b',
56*eb15e634SAndreas Gohr            HtmlEntity::decode('a&#x10FFFF;b')
57*eb15e634SAndreas Gohr        );
58*eb15e634SAndreas Gohr    }
59*eb15e634SAndreas Gohr
60*eb15e634SAndreas Gohr    public function testNamedAmp()
61*eb15e634SAndreas Gohr    {
62*eb15e634SAndreas Gohr        $this->assertSame('a&b', HtmlEntity::decode('a&amp;b'));
63*eb15e634SAndreas Gohr    }
64*eb15e634SAndreas Gohr
65*eb15e634SAndreas Gohr    public function testNamedCopy()
66*eb15e634SAndreas Gohr    {
67*eb15e634SAndreas Gohr        $this->assertSame("a\u{00A9}b", HtmlEntity::decode('a&copy;b'));
68*eb15e634SAndreas Gohr    }
69*eb15e634SAndreas Gohr
70*eb15e634SAndreas Gohr    public function testNamedAElig()
71*eb15e634SAndreas Gohr    {
72*eb15e634SAndreas Gohr        $this->assertSame("a\u{00C6}b", HtmlEntity::decode('a&AElig;b'));
73*eb15e634SAndreas Gohr    }
74*eb15e634SAndreas Gohr
75*eb15e634SAndreas Gohr    public function testNamedNbsp()
76*eb15e634SAndreas Gohr    {
77*eb15e634SAndreas Gohr        $this->assertSame("a\u{00A0}b", HtmlEntity::decode('a&nbsp;b'));
78*eb15e634SAndreas Gohr    }
79*eb15e634SAndreas Gohr
80*eb15e634SAndreas Gohr    public function testNamedMultiCodepoint()
81*eb15e634SAndreas Gohr    {
82*eb15e634SAndreas Gohr        // &ngE; -> U+2267 + U+0338 (combining solidus)
83*eb15e634SAndreas Gohr        $this->assertSame("a\u{2267}\u{0338}b", HtmlEntity::decode('a&ngE;b'));
84*eb15e634SAndreas Gohr    }
85*eb15e634SAndreas Gohr
86*eb15e634SAndreas Gohr    public function testUnknownNameStaysLiteral()
87*eb15e634SAndreas Gohr    {
88*eb15e634SAndreas Gohr        $this->assertSame('a&MadeUpEntity;b', HtmlEntity::decode('a&MadeUpEntity;b'));
89*eb15e634SAndreas Gohr    }
90*eb15e634SAndreas Gohr
91*eb15e634SAndreas Gohr    public function testNoSemicolonStaysLiteral()
92*eb15e634SAndreas Gohr    {
93*eb15e634SAndreas Gohr        $this->assertSame('a&copy b', HtmlEntity::decode('a&copy b'));
94*eb15e634SAndreas Gohr    }
95*eb15e634SAndreas Gohr
96*eb15e634SAndreas Gohr    public function testTooManyDecimalDigitsStaysLiteral()
97*eb15e634SAndreas Gohr    {
98*eb15e634SAndreas Gohr        $this->assertSame('a&#987654321;b', HtmlEntity::decode('a&#987654321;b'));
99*eb15e634SAndreas Gohr    }
100*eb15e634SAndreas Gohr
101*eb15e634SAndreas Gohr    public function testHexLetterAfterAmpStaysLiteral()
102*eb15e634SAndreas Gohr    {
103*eb15e634SAndreas Gohr        $this->assertSame('a&#abcdef0;b', HtmlEntity::decode('a&#abcdef0;b'));
104*eb15e634SAndreas Gohr    }
105*eb15e634SAndreas Gohr
106*eb15e634SAndreas Gohr    public function testEmptyEntityStaysLiteral()
107*eb15e634SAndreas Gohr    {
108*eb15e634SAndreas Gohr        $this->assertSame('a&#;b', HtmlEntity::decode('a&#;b'));
109*eb15e634SAndreas Gohr    }
110*eb15e634SAndreas Gohr
111*eb15e634SAndreas Gohr    public function testMultipleEntitiesInSequence()
112*eb15e634SAndreas Gohr    {
113*eb15e634SAndreas Gohr        $this->assertSame(
114*eb15e634SAndreas Gohr            "#\u{04D2}\u{00A9}",
115*eb15e634SAndreas Gohr            HtmlEntity::decode('&#35;&#1234;&copy;')
116*eb15e634SAndreas Gohr        );
117*eb15e634SAndreas Gohr    }
118*eb15e634SAndreas Gohr
119*eb15e634SAndreas Gohr    public function testNonEntityBytesPassThrough()
120*eb15e634SAndreas Gohr    {
121*eb15e634SAndreas Gohr        $this->assertSame('plain text without entities', HtmlEntity::decode('plain text without entities'));
122*eb15e634SAndreas Gohr    }
123*eb15e634SAndreas Gohr
124*eb15e634SAndreas Gohr    public function testEmptyInput()
125*eb15e634SAndreas Gohr    {
126*eb15e634SAndreas Gohr        $this->assertSame('', HtmlEntity::decode(''));
127*eb15e634SAndreas Gohr    }
128*eb15e634SAndreas Gohr}
129