xref: /dokuwiki/_test/tests/Parsing/ParserMode/GfmHtmlEntityTest.php (revision d20858669cbb910f566e0b7d1ba9da293d1b794e)
1*d2085866SAndreas Gohr<?php
2*d2085866SAndreas Gohr
3*d2085866SAndreas Gohrnamespace dokuwiki\test\Parsing\ParserMode;
4*d2085866SAndreas Gohr
5*d2085866SAndreas Gohruse dokuwiki\Parsing\ParserMode\GfmHtmlEntity;
6*d2085866SAndreas Gohruse dokuwiki\Utf8\Unicode;
7*d2085866SAndreas Gohr
8*d2085866SAndreas Gohr/**
9*d2085866SAndreas Gohr * Consecutive cdata calls are coalesced by Handler\Block::addCall during
10*d2085866SAndreas Gohr * finalize(), so a successful match shows up as a single cdata containing
11*d2085866SAndreas Gohr * the decoded character spliced into the surrounding text. Non-matching
12*d2085866SAndreas Gohr * inputs leave the literal `&...;` bytes in the cdata.
13*d2085866SAndreas Gohr */
14*d2085866SAndreas Gohrclass GfmHtmlEntityTest extends ParserTestBase
15*d2085866SAndreas Gohr{
16*d2085866SAndreas Gohr    private function assertParsedCdata(string $input, string $expectedCdata): void
17*d2085866SAndreas Gohr    {
18*d2085866SAndreas Gohr        $this->P->addMode('gfm_html_entity', new GfmHtmlEntity());
19*d2085866SAndreas Gohr        $this->P->parse($input);
20*d2085866SAndreas Gohr        $this->assertCalls([
21*d2085866SAndreas Gohr            ['document_start', []],
22*d2085866SAndreas Gohr            ['p_open', []],
23*d2085866SAndreas Gohr            ['cdata', [$expectedCdata]],
24*d2085866SAndreas Gohr            ['p_close', []],
25*d2085866SAndreas Gohr            ['document_end', []],
26*d2085866SAndreas Gohr        ], $this->H->calls);
27*d2085866SAndreas Gohr    }
28*d2085866SAndreas Gohr
29*d2085866SAndreas Gohr    public function testDecimalAscii()
30*d2085866SAndreas Gohr    {
31*d2085866SAndreas Gohr        $this->assertParsedCdata('x &#35; y', "\nx # y");
32*d2085866SAndreas Gohr    }
33*d2085866SAndreas Gohr
34*d2085866SAndreas Gohr    public function testDecimalMultibyte()
35*d2085866SAndreas Gohr    {
36*d2085866SAndreas Gohr        $this->assertParsedCdata('a&#1234;b', "\na\u{04D2}b");
37*d2085866SAndreas Gohr    }
38*d2085866SAndreas Gohr
39*d2085866SAndreas Gohr    public function testHexLowercase()
40*d2085866SAndreas Gohr    {
41*d2085866SAndreas Gohr        $this->assertParsedCdata('a&#xcab;b', "\na\u{0CAB}b");
42*d2085866SAndreas Gohr    }
43*d2085866SAndreas Gohr
44*d2085866SAndreas Gohr    public function testHexUppercase()
45*d2085866SAndreas Gohr    {
46*d2085866SAndreas Gohr        $this->assertParsedCdata('a&#XD06;b', "\na\u{0D06}b");
47*d2085866SAndreas Gohr    }
48*d2085866SAndreas Gohr
49*d2085866SAndreas Gohr    public function testHexQuoteCharacter()
50*d2085866SAndreas Gohr    {
51*d2085866SAndreas Gohr        $this->assertParsedCdata('a&#X22;b', "\na\"b");
52*d2085866SAndreas Gohr    }
53*d2085866SAndreas Gohr
54*d2085866SAndreas Gohr    public function testZeroMapsToReplacement()
55*d2085866SAndreas Gohr    {
56*d2085866SAndreas Gohr        $this->assertParsedCdata('a&#0;b', "\na\u{FFFD}b");
57*d2085866SAndreas Gohr    }
58*d2085866SAndreas Gohr
59*d2085866SAndreas Gohr    public function testSurrogateMapsToReplacement()
60*d2085866SAndreas Gohr    {
61*d2085866SAndreas Gohr        $this->assertParsedCdata('a&#xD800;b', "\na\u{FFFD}b");
62*d2085866SAndreas Gohr    }
63*d2085866SAndreas Gohr
64*d2085866SAndreas Gohr    public function testMaxValidCodepoint()
65*d2085866SAndreas Gohr    {
66*d2085866SAndreas Gohr        $this->assertParsedCdata('a&#x10FFFF;b', "\na" . Unicode::toUtf8([0x10FFFF]) . 'b');
67*d2085866SAndreas Gohr    }
68*d2085866SAndreas Gohr
69*d2085866SAndreas Gohr    public function testNonEntityTooManyDecimalDigitsStaysLiteral()
70*d2085866SAndreas Gohr    {
71*d2085866SAndreas Gohr        $this->assertParsedCdata('a&#987654321;b', "\na&#987654321;b");
72*d2085866SAndreas Gohr    }
73*d2085866SAndreas Gohr
74*d2085866SAndreas Gohr    public function testNonEntityHexLetterAfterAmpStaysLiteral()
75*d2085866SAndreas Gohr    {
76*d2085866SAndreas Gohr        $this->assertParsedCdata('a&#abcdef0;b', "\na&#abcdef0;b");
77*d2085866SAndreas Gohr    }
78*d2085866SAndreas Gohr
79*d2085866SAndreas Gohr    public function testEmptyEntityStaysLiteral()
80*d2085866SAndreas Gohr    {
81*d2085866SAndreas Gohr        $this->assertParsedCdata('a&#;b', "\na&#;b");
82*d2085866SAndreas Gohr    }
83*d2085866SAndreas Gohr
84*d2085866SAndreas Gohr    public function testMissingSemicolonStaysLiteral()
85*d2085866SAndreas Gohr    {
86*d2085866SAndreas Gohr        $this->assertParsedCdata('a&#35 b', "\na&#35 b");
87*d2085866SAndreas Gohr    }
88*d2085866SAndreas Gohr
89*d2085866SAndreas Gohr    public function testMultipleNumericEntitiesInSequence()
90*d2085866SAndreas Gohr    {
91*d2085866SAndreas Gohr        $this->assertParsedCdata('&#35;&#1234;&#xcab;', "\n#\u{04D2}\u{0CAB}");
92*d2085866SAndreas Gohr    }
93*d2085866SAndreas Gohr
94*d2085866SAndreas Gohr    public function testTabDecodes()
95*d2085866SAndreas Gohr    {
96*d2085866SAndreas Gohr        $this->assertParsedCdata('a&#9;b', "\na\tb");
97*d2085866SAndreas Gohr    }
98*d2085866SAndreas Gohr
99*d2085866SAndreas Gohr    public function testNewlineDecodes()
100*d2085866SAndreas Gohr    {
101*d2085866SAndreas Gohr        $this->assertParsedCdata('foo&#10;&#10;bar', "\nfoo\n\nbar");
102*d2085866SAndreas Gohr    }
103*d2085866SAndreas Gohr
104*d2085866SAndreas Gohr    public function testNamedAmp()
105*d2085866SAndreas Gohr    {
106*d2085866SAndreas Gohr        // &amp; decodes to '&', renderer re-escapes on output
107*d2085866SAndreas Gohr        $this->assertParsedCdata('a&amp;b', "\na&b");
108*d2085866SAndreas Gohr    }
109*d2085866SAndreas Gohr
110*d2085866SAndreas Gohr    public function testNamedCopy()
111*d2085866SAndreas Gohr    {
112*d2085866SAndreas Gohr        $this->assertParsedCdata('a&copy;b', "\na\u{00A9}b");
113*d2085866SAndreas Gohr    }
114*d2085866SAndreas Gohr
115*d2085866SAndreas Gohr    public function testNamedAElig()
116*d2085866SAndreas Gohr    {
117*d2085866SAndreas Gohr        $this->assertParsedCdata('a&AElig;b', "\na\u{00C6}b");
118*d2085866SAndreas Gohr    }
119*d2085866SAndreas Gohr
120*d2085866SAndreas Gohr    public function testNamedNbsp()
121*d2085866SAndreas Gohr    {
122*d2085866SAndreas Gohr        $this->assertParsedCdata('a&nbsp;b', "\na\u{00A0}b");
123*d2085866SAndreas Gohr    }
124*d2085866SAndreas Gohr
125*d2085866SAndreas Gohr    public function testNamedMultiCodepoint()
126*d2085866SAndreas Gohr    {
127*d2085866SAndreas Gohr        // &ngE; -> U+2267 + U+0338 (combining solidus)
128*d2085866SAndreas Gohr        $this->assertParsedCdata('a&ngE;b', "\na\u{2267}\u{0338}b");
129*d2085866SAndreas Gohr    }
130*d2085866SAndreas Gohr
131*d2085866SAndreas Gohr    public function testNamedUnknownStaysLiteral()
132*d2085866SAndreas Gohr    {
133*d2085866SAndreas Gohr        $this->assertParsedCdata('a&MadeUpEntity;b', "\na&MadeUpEntity;b");
134*d2085866SAndreas Gohr    }
135*d2085866SAndreas Gohr
136*d2085866SAndreas Gohr    public function testNamedNoSemicolonStaysLiteral()
137*d2085866SAndreas Gohr    {
138*d2085866SAndreas Gohr        $this->assertParsedCdata('a&copy b', "\na&copy b");
139*d2085866SAndreas Gohr    }
140*d2085866SAndreas Gohr
141*d2085866SAndreas Gohr    public function testMixedNumericAndNamed()
142*d2085866SAndreas Gohr    {
143*d2085866SAndreas Gohr        $this->assertParsedCdata('&#35;&copy;&#x22;', "\n#\u{00A9}\"");
144*d2085866SAndreas Gohr    }
145*d2085866SAndreas Gohr}
146