xref: /dokuwiki/_test/tests/Parsing/ParserMode/GfmHtmlEntityTest.php (revision d20858669cbb910f566e0b7d1ba9da293d1b794e)
1<?php
2
3namespace dokuwiki\test\Parsing\ParserMode;
4
5use dokuwiki\Parsing\ParserMode\GfmHtmlEntity;
6use dokuwiki\Utf8\Unicode;
7
8/**
9 * Consecutive cdata calls are coalesced by Handler\Block::addCall during
10 * finalize(), so a successful match shows up as a single cdata containing
11 * the decoded character spliced into the surrounding text. Non-matching
12 * inputs leave the literal `&...;` bytes in the cdata.
13 */
14class GfmHtmlEntityTest extends ParserTestBase
15{
16    private function assertParsedCdata(string $input, string $expectedCdata): void
17    {
18        $this->P->addMode('gfm_html_entity', new GfmHtmlEntity());
19        $this->P->parse($input);
20        $this->assertCalls([
21            ['document_start', []],
22            ['p_open', []],
23            ['cdata', [$expectedCdata]],
24            ['p_close', []],
25            ['document_end', []],
26        ], $this->H->calls);
27    }
28
29    public function testDecimalAscii()
30    {
31        $this->assertParsedCdata('x &#35; y', "\nx # y");
32    }
33
34    public function testDecimalMultibyte()
35    {
36        $this->assertParsedCdata('a&#1234;b', "\na\u{04D2}b");
37    }
38
39    public function testHexLowercase()
40    {
41        $this->assertParsedCdata('a&#xcab;b', "\na\u{0CAB}b");
42    }
43
44    public function testHexUppercase()
45    {
46        $this->assertParsedCdata('a&#XD06;b', "\na\u{0D06}b");
47    }
48
49    public function testHexQuoteCharacter()
50    {
51        $this->assertParsedCdata('a&#X22;b', "\na\"b");
52    }
53
54    public function testZeroMapsToReplacement()
55    {
56        $this->assertParsedCdata('a&#0;b', "\na\u{FFFD}b");
57    }
58
59    public function testSurrogateMapsToReplacement()
60    {
61        $this->assertParsedCdata('a&#xD800;b', "\na\u{FFFD}b");
62    }
63
64    public function testMaxValidCodepoint()
65    {
66        $this->assertParsedCdata('a&#x10FFFF;b', "\na" . Unicode::toUtf8([0x10FFFF]) . 'b');
67    }
68
69    public function testNonEntityTooManyDecimalDigitsStaysLiteral()
70    {
71        $this->assertParsedCdata('a&#987654321;b', "\na&#987654321;b");
72    }
73
74    public function testNonEntityHexLetterAfterAmpStaysLiteral()
75    {
76        $this->assertParsedCdata('a&#abcdef0;b', "\na&#abcdef0;b");
77    }
78
79    public function testEmptyEntityStaysLiteral()
80    {
81        $this->assertParsedCdata('a&#;b', "\na&#;b");
82    }
83
84    public function testMissingSemicolonStaysLiteral()
85    {
86        $this->assertParsedCdata('a&#35 b', "\na&#35 b");
87    }
88
89    public function testMultipleNumericEntitiesInSequence()
90    {
91        $this->assertParsedCdata('&#35;&#1234;&#xcab;', "\n#\u{04D2}\u{0CAB}");
92    }
93
94    public function testTabDecodes()
95    {
96        $this->assertParsedCdata('a&#9;b', "\na\tb");
97    }
98
99    public function testNewlineDecodes()
100    {
101        $this->assertParsedCdata('foo&#10;&#10;bar', "\nfoo\n\nbar");
102    }
103
104    public function testNamedAmp()
105    {
106        // &amp; decodes to '&', renderer re-escapes on output
107        $this->assertParsedCdata('a&amp;b', "\na&b");
108    }
109
110    public function testNamedCopy()
111    {
112        $this->assertParsedCdata('a&copy;b', "\na\u{00A9}b");
113    }
114
115    public function testNamedAElig()
116    {
117        $this->assertParsedCdata('a&AElig;b', "\na\u{00C6}b");
118    }
119
120    public function testNamedNbsp()
121    {
122        $this->assertParsedCdata('a&nbsp;b', "\na\u{00A0}b");
123    }
124
125    public function testNamedMultiCodepoint()
126    {
127        // &ngE; -> U+2267 + U+0338 (combining solidus)
128        $this->assertParsedCdata('a&ngE;b', "\na\u{2267}\u{0338}b");
129    }
130
131    public function testNamedUnknownStaysLiteral()
132    {
133        $this->assertParsedCdata('a&MadeUpEntity;b', "\na&MadeUpEntity;b");
134    }
135
136    public function testNamedNoSemicolonStaysLiteral()
137    {
138        $this->assertParsedCdata('a&copy b', "\na&copy b");
139    }
140
141    public function testMixedNumericAndNamed()
142    {
143        $this->assertParsedCdata('&#35;&copy;&#x22;', "\n#\u{00A9}\"");
144    }
145}
146