xref: /dokuwiki/vendor/simplepie/simplepie/library/SimplePie/Decode/HTML/Entities.php (revision 8e88a29b81301f78509349ab1152bb09c229123e)
1<?php
2
3// SPDX-FileCopyrightText: 2004-2023 Ryan Parman, Sam Sneddon, Ryan McCue
4// SPDX-License-Identifier: BSD-3-Clause
5
6declare(strict_types=1);
7
8
9/**
10 * Decode HTML Entities
11 *
12 * This implements HTML5 as of revision 967 (2007-06-28)
13 *
14 * @deprecated Use DOMDocument instead!
15 */
16class SimplePie_Decode_HTML_Entities
17{
18    /**
19     * Data to be parsed
20     *
21     * @access private
22     * @var string
23     */
24    public $data = '';
25
26    /**
27     * Currently consumed bytes
28     *
29     * @access private
30     * @var string
31     */
32    public $consumed = '';
33
34    /**
35     * Position of the current byte being parsed
36     *
37     * @access private
38     * @var int
39     */
40    public $position = 0;
41
42    /**
43     * Create an instance of the class with the input data
44     *
45     * @access public
46     * @param string $data Input data
47     */
48    public function __construct(string $data)
49    {
50        $this->data = $data;
51    }
52
53    /**
54     * Parse the input data
55     *
56     * @access public
57     * @return string Output data
58     */
59    public function parse()
60    {
61        while (($position = strpos($this->data, '&', $this->position)) !== false) {
62            $this->position = $position;
63            $this->consume();
64            $this->entity();
65            $this->consumed = '';
66        }
67        return $this->data;
68    }
69
70    /**
71     * Consume the next byte
72     *
73     * @access private
74     * @return string|false The next byte, or false, if there is no more data
75     */
76    public function consume()
77    {
78        if (isset($this->data[$this->position])) {
79            $this->consumed .= $this->data[$this->position];
80            return $this->data[$this->position++];
81        }
82
83        return false;
84    }
85
86    /**
87     * Consume a range of characters
88     *
89     * @access private
90     * @param string $chars Characters to consume
91     * @return string|false A series of characters that match the range, or false
92     */
93    public function consume_range(string $chars)
94    {
95        if ($len = strspn($this->data, $chars, $this->position)) {
96            $data = substr($this->data, $this->position, $len);
97            $this->consumed .= $data;
98            $this->position += $len;
99            return $data;
100        }
101
102        return false;
103    }
104
105    /**
106     * Unconsume one byte
107     *
108     * @access private
109     * @return void
110     */
111    public function unconsume()
112    {
113        $this->consumed = substr($this->consumed, 0, -1);
114        $this->position--;
115    }
116
117    /**
118     * Decode an entity
119     *
120     * @access private
121     * @return void
122     */
123    public function entity()
124    {
125        switch ($this->consume()) {
126            case "\x09":
127            case "\x0A":
128            case "\x0B":
129            case "\x0C":
130            case "\x20":
131            case "\x3C":
132            case "\x26":
133            case false:
134                break;
135
136            case "\x23":
137                switch ($this->consume()) {
138                    case "\x78":
139                    case "\x58":
140                        $range = '0123456789ABCDEFabcdef';
141                        $hex = true;
142                        break;
143
144                    default:
145                        $range = '0123456789';
146                        $hex = false;
147                        $this->unconsume();
148                        break;
149                }
150
151                if ($codepoint = $this->consume_range($range)) {
152                    static $windows_1252_specials = [0x0D => "\x0A", 0x80 => "\xE2\x82\xAC", 0x81 => "\xEF\xBF\xBD", 0x82 => "\xE2\x80\x9A", 0x83 => "\xC6\x92", 0x84 => "\xE2\x80\x9E", 0x85 => "\xE2\x80\xA6", 0x86 => "\xE2\x80\xA0", 0x87 => "\xE2\x80\xA1", 0x88 => "\xCB\x86", 0x89 => "\xE2\x80\xB0", 0x8A => "\xC5\xA0", 0x8B => "\xE2\x80\xB9", 0x8C => "\xC5\x92", 0x8D => "\xEF\xBF\xBD", 0x8E => "\xC5\xBD", 0x8F => "\xEF\xBF\xBD", 0x90 => "\xEF\xBF\xBD", 0x91 => "\xE2\x80\x98", 0x92 => "\xE2\x80\x99", 0x93 => "\xE2\x80\x9C", 0x94 => "\xE2\x80\x9D", 0x95 => "\xE2\x80\xA2", 0x96 => "\xE2\x80\x93", 0x97 => "\xE2\x80\x94", 0x98 => "\xCB\x9C", 0x99 => "\xE2\x84\xA2", 0x9A => "\xC5\xA1", 0x9B => "\xE2\x80\xBA", 0x9C => "\xC5\x93", 0x9D => "\xEF\xBF\xBD", 0x9E => "\xC5\xBE", 0x9F => "\xC5\xB8"];
153
154                    if ($hex) {
155                        // Cap to PHP_INT_MAX to ensure consistent behaviour if $codepoint is so large
156                        // it cannot fit into int – just casting float to int might return junk (e.g. a negative number).
157                        // If it is so large, `Misc::codepoint_to_utf8` will just return a replacement character.
158                        $codepoint = (int) min(hexdec($codepoint), \PHP_INT_MAX);
159                    } else {
160                        // Casting string to int caps at PHP_INT_MAX automatically.
161                        $codepoint = (int) $codepoint;
162                    }
163
164                    if (isset($windows_1252_specials[$codepoint])) {
165                        $replacement = $windows_1252_specials[$codepoint];
166                    } else {
167                        $replacement = SimplePie_Misc::codepoint_to_utf8($codepoint);
168                    }
169
170                    if (!in_array($this->consume(), [';', false], true)) {
171                        $this->unconsume();
172                    }
173
174                    $consumed_length = strlen($this->consumed);
175                    $this->data = substr_replace($this->data, $replacement, $this->position - $consumed_length, $consumed_length);
176                    $this->position += strlen($replacement) - $consumed_length;
177                }
178                break;
179
180            default:
181                static $entities = [
182                    'Aacute' => "\xC3\x81",
183                    'aacute' => "\xC3\xA1",
184                    'Aacute;' => "\xC3\x81",
185                    'aacute;' => "\xC3\xA1",
186                    'Acirc' => "\xC3\x82",
187                    'acirc' => "\xC3\xA2",
188                    'Acirc;' => "\xC3\x82",
189                    'acirc;' => "\xC3\xA2",
190                    'acute' => "\xC2\xB4",
191                    'acute;' => "\xC2\xB4",
192                    'AElig' => "\xC3\x86",
193                    'aelig' => "\xC3\xA6",
194                    'AElig;' => "\xC3\x86",
195                    'aelig;' => "\xC3\xA6",
196                    'Agrave' => "\xC3\x80",
197                    'agrave' => "\xC3\xA0",
198                    'Agrave;' => "\xC3\x80",
199                    'agrave;' => "\xC3\xA0",
200                    'alefsym;' => "\xE2\x84\xB5",
201                    'Alpha;' => "\xCE\x91",
202                    'alpha;' => "\xCE\xB1",
203                    'AMP' => "\x26",
204                    'amp' => "\x26",
205                    'AMP;' => "\x26",
206                    'amp;' => "\x26",
207                    'and;' => "\xE2\x88\xA7",
208                    'ang;' => "\xE2\x88\xA0",
209                    'apos;' => "\x27",
210                    'Aring' => "\xC3\x85",
211                    'aring' => "\xC3\xA5",
212                    'Aring;' => "\xC3\x85",
213                    'aring;' => "\xC3\xA5",
214                    'asymp;' => "\xE2\x89\x88",
215                    'Atilde' => "\xC3\x83",
216                    'atilde' => "\xC3\xA3",
217                    'Atilde;' => "\xC3\x83",
218                    'atilde;' => "\xC3\xA3",
219                    'Auml' => "\xC3\x84",
220                    'auml' => "\xC3\xA4",
221                    'Auml;' => "\xC3\x84",
222                    'auml;' => "\xC3\xA4",
223                    'bdquo;' => "\xE2\x80\x9E",
224                    'Beta;' => "\xCE\x92",
225                    'beta;' => "\xCE\xB2",
226                    'brvbar' => "\xC2\xA6",
227                    'brvbar;' => "\xC2\xA6",
228                    'bull;' => "\xE2\x80\xA2",
229                    'cap;' => "\xE2\x88\xA9",
230                    'Ccedil' => "\xC3\x87",
231                    'ccedil' => "\xC3\xA7",
232                    'Ccedil;' => "\xC3\x87",
233                    'ccedil;' => "\xC3\xA7",
234                    'cedil' => "\xC2\xB8",
235                    'cedil;' => "\xC2\xB8",
236                    'cent' => "\xC2\xA2",
237                    'cent;' => "\xC2\xA2",
238                    'Chi;' => "\xCE\xA7",
239                    'chi;' => "\xCF\x87",
240                    'circ;' => "\xCB\x86",
241                    'clubs;' => "\xE2\x99\xA3",
242                    'cong;' => "\xE2\x89\x85",
243                    'COPY' => "\xC2\xA9",
244                    'copy' => "\xC2\xA9",
245                    'COPY;' => "\xC2\xA9",
246                    'copy;' => "\xC2\xA9",
247                    'crarr;' => "\xE2\x86\xB5",
248                    'cup;' => "\xE2\x88\xAA",
249                    'curren' => "\xC2\xA4",
250                    'curren;' => "\xC2\xA4",
251                    'Dagger;' => "\xE2\x80\xA1",
252                    'dagger;' => "\xE2\x80\xA0",
253                    'dArr;' => "\xE2\x87\x93",
254                    'darr;' => "\xE2\x86\x93",
255                    'deg' => "\xC2\xB0",
256                    'deg;' => "\xC2\xB0",
257                    'Delta;' => "\xCE\x94",
258                    'delta;' => "\xCE\xB4",
259                    'diams;' => "\xE2\x99\xA6",
260                    'divide' => "\xC3\xB7",
261                    'divide;' => "\xC3\xB7",
262                    'Eacute' => "\xC3\x89",
263                    'eacute' => "\xC3\xA9",
264                    'Eacute;' => "\xC3\x89",
265                    'eacute;' => "\xC3\xA9",
266                    'Ecirc' => "\xC3\x8A",
267                    'ecirc' => "\xC3\xAA",
268                    'Ecirc;' => "\xC3\x8A",
269                    'ecirc;' => "\xC3\xAA",
270                    'Egrave' => "\xC3\x88",
271                    'egrave' => "\xC3\xA8",
272                    'Egrave;' => "\xC3\x88",
273                    'egrave;' => "\xC3\xA8",
274                    'empty;' => "\xE2\x88\x85",
275                    'emsp;' => "\xE2\x80\x83",
276                    'ensp;' => "\xE2\x80\x82",
277                    'Epsilon;' => "\xCE\x95",
278                    'epsilon;' => "\xCE\xB5",
279                    'equiv;' => "\xE2\x89\xA1",
280                    'Eta;' => "\xCE\x97",
281                    'eta;' => "\xCE\xB7",
282                    'ETH' => "\xC3\x90",
283                    'eth' => "\xC3\xB0",
284                    'ETH;' => "\xC3\x90",
285                    'eth;' => "\xC3\xB0",
286                    'Euml' => "\xC3\x8B",
287                    'euml' => "\xC3\xAB",
288                    'Euml;' => "\xC3\x8B",
289                    'euml;' => "\xC3\xAB",
290                    'euro;' => "\xE2\x82\xAC",
291                    'exist;' => "\xE2\x88\x83",
292                    'fnof;' => "\xC6\x92",
293                    'forall;' => "\xE2\x88\x80",
294                    'frac12' => "\xC2\xBD",
295                    'frac12;' => "\xC2\xBD",
296                    'frac14' => "\xC2\xBC",
297                    'frac14;' => "\xC2\xBC",
298                    'frac34' => "\xC2\xBE",
299                    'frac34;' => "\xC2\xBE",
300                    'frasl;' => "\xE2\x81\x84",
301                    'Gamma;' => "\xCE\x93",
302                    'gamma;' => "\xCE\xB3",
303                    'ge;' => "\xE2\x89\xA5",
304                    'GT' => "\x3E",
305                    'gt' => "\x3E",
306                    'GT;' => "\x3E",
307                    'gt;' => "\x3E",
308                    'hArr;' => "\xE2\x87\x94",
309                    'harr;' => "\xE2\x86\x94",
310                    'hearts;' => "\xE2\x99\xA5",
311                    'hellip;' => "\xE2\x80\xA6",
312                    'Iacute' => "\xC3\x8D",
313                    'iacute' => "\xC3\xAD",
314                    'Iacute;' => "\xC3\x8D",
315                    'iacute;' => "\xC3\xAD",
316                    'Icirc' => "\xC3\x8E",
317                    'icirc' => "\xC3\xAE",
318                    'Icirc;' => "\xC3\x8E",
319                    'icirc;' => "\xC3\xAE",
320                    'iexcl' => "\xC2\xA1",
321                    'iexcl;' => "\xC2\xA1",
322                    'Igrave' => "\xC3\x8C",
323                    'igrave' => "\xC3\xAC",
324                    'Igrave;' => "\xC3\x8C",
325                    'igrave;' => "\xC3\xAC",
326                    'image;' => "\xE2\x84\x91",
327                    'infin;' => "\xE2\x88\x9E",
328                    'int;' => "\xE2\x88\xAB",
329                    'Iota;' => "\xCE\x99",
330                    'iota;' => "\xCE\xB9",
331                    'iquest' => "\xC2\xBF",
332                    'iquest;' => "\xC2\xBF",
333                    'isin;' => "\xE2\x88\x88",
334                    'Iuml' => "\xC3\x8F",
335                    'iuml' => "\xC3\xAF",
336                    'Iuml;' => "\xC3\x8F",
337                    'iuml;' => "\xC3\xAF",
338                    'Kappa;' => "\xCE\x9A",
339                    'kappa;' => "\xCE\xBA",
340                    'Lambda;' => "\xCE\x9B",
341                    'lambda;' => "\xCE\xBB",
342                    'lang;' => "\xE3\x80\x88",
343                    'laquo' => "\xC2\xAB",
344                    'laquo;' => "\xC2\xAB",
345                    'lArr;' => "\xE2\x87\x90",
346                    'larr;' => "\xE2\x86\x90",
347                    'lceil;' => "\xE2\x8C\x88",
348                    'ldquo;' => "\xE2\x80\x9C",
349                    'le;' => "\xE2\x89\xA4",
350                    'lfloor;' => "\xE2\x8C\x8A",
351                    'lowast;' => "\xE2\x88\x97",
352                    'loz;' => "\xE2\x97\x8A",
353                    'lrm;' => "\xE2\x80\x8E",
354                    'lsaquo;' => "\xE2\x80\xB9",
355                    'lsquo;' => "\xE2\x80\x98",
356                    'LT' => "\x3C",
357                    'lt' => "\x3C",
358                    'LT;' => "\x3C",
359                    'lt;' => "\x3C",
360                    'macr' => "\xC2\xAF",
361                    'macr;' => "\xC2\xAF",
362                    'mdash;' => "\xE2\x80\x94",
363                    'micro' => "\xC2\xB5",
364                    'micro;' => "\xC2\xB5",
365                    'middot' => "\xC2\xB7",
366                    'middot;' => "\xC2\xB7",
367                    'minus;' => "\xE2\x88\x92",
368                    'Mu;' => "\xCE\x9C",
369                    'mu;' => "\xCE\xBC",
370                    'nabla;' => "\xE2\x88\x87",
371                    'nbsp' => "\xC2\xA0",
372                    'nbsp;' => "\xC2\xA0",
373                    'ndash;' => "\xE2\x80\x93",
374                    'ne;' => "\xE2\x89\xA0",
375                    'ni;' => "\xE2\x88\x8B",
376                    'not' => "\xC2\xAC",
377                    'not;' => "\xC2\xAC",
378                    'notin;' => "\xE2\x88\x89",
379                    'nsub;' => "\xE2\x8A\x84",
380                    'Ntilde' => "\xC3\x91",
381                    'ntilde' => "\xC3\xB1",
382                    'Ntilde;' => "\xC3\x91",
383                    'ntilde;' => "\xC3\xB1",
384                    'Nu;' => "\xCE\x9D",
385                    'nu;' => "\xCE\xBD",
386                    'Oacute' => "\xC3\x93",
387                    'oacute' => "\xC3\xB3",
388                    'Oacute;' => "\xC3\x93",
389                    'oacute;' => "\xC3\xB3",
390                    'Ocirc' => "\xC3\x94",
391                    'ocirc' => "\xC3\xB4",
392                    'Ocirc;' => "\xC3\x94",
393                    'ocirc;' => "\xC3\xB4",
394                    'OElig;' => "\xC5\x92",
395                    'oelig;' => "\xC5\x93",
396                    'Ograve' => "\xC3\x92",
397                    'ograve' => "\xC3\xB2",
398                    'Ograve;' => "\xC3\x92",
399                    'ograve;' => "\xC3\xB2",
400                    'oline;' => "\xE2\x80\xBE",
401                    'Omega;' => "\xCE\xA9",
402                    'omega;' => "\xCF\x89",
403                    'Omicron;' => "\xCE\x9F",
404                    'omicron;' => "\xCE\xBF",
405                    'oplus;' => "\xE2\x8A\x95",
406                    'or;' => "\xE2\x88\xA8",
407                    'ordf' => "\xC2\xAA",
408                    'ordf;' => "\xC2\xAA",
409                    'ordm' => "\xC2\xBA",
410                    'ordm;' => "\xC2\xBA",
411                    'Oslash' => "\xC3\x98",
412                    'oslash' => "\xC3\xB8",
413                    'Oslash;' => "\xC3\x98",
414                    'oslash;' => "\xC3\xB8",
415                    'Otilde' => "\xC3\x95",
416                    'otilde' => "\xC3\xB5",
417                    'Otilde;' => "\xC3\x95",
418                    'otilde;' => "\xC3\xB5",
419                    'otimes;' => "\xE2\x8A\x97",
420                    'Ouml' => "\xC3\x96",
421                    'ouml' => "\xC3\xB6",
422                    'Ouml;' => "\xC3\x96",
423                    'ouml;' => "\xC3\xB6",
424                    'para' => "\xC2\xB6",
425                    'para;' => "\xC2\xB6",
426                    'part;' => "\xE2\x88\x82",
427                    'permil;' => "\xE2\x80\xB0",
428                    'perp;' => "\xE2\x8A\xA5",
429                    'Phi;' => "\xCE\xA6",
430                    'phi;' => "\xCF\x86",
431                    'Pi;' => "\xCE\xA0",
432                    'pi;' => "\xCF\x80",
433                    'piv;' => "\xCF\x96",
434                    'plusmn' => "\xC2\xB1",
435                    'plusmn;' => "\xC2\xB1",
436                    'pound' => "\xC2\xA3",
437                    'pound;' => "\xC2\xA3",
438                    'Prime;' => "\xE2\x80\xB3",
439                    'prime;' => "\xE2\x80\xB2",
440                    'prod;' => "\xE2\x88\x8F",
441                    'prop;' => "\xE2\x88\x9D",
442                    'Psi;' => "\xCE\xA8",
443                    'psi;' => "\xCF\x88",
444                    'QUOT' => "\x22",
445                    'quot' => "\x22",
446                    'QUOT;' => "\x22",
447                    'quot;' => "\x22",
448                    'radic;' => "\xE2\x88\x9A",
449                    'rang;' => "\xE3\x80\x89",
450                    'raquo' => "\xC2\xBB",
451                    'raquo;' => "\xC2\xBB",
452                    'rArr;' => "\xE2\x87\x92",
453                    'rarr;' => "\xE2\x86\x92",
454                    'rceil;' => "\xE2\x8C\x89",
455                    'rdquo;' => "\xE2\x80\x9D",
456                    'real;' => "\xE2\x84\x9C",
457                    'REG' => "\xC2\xAE",
458                    'reg' => "\xC2\xAE",
459                    'REG;' => "\xC2\xAE",
460                    'reg;' => "\xC2\xAE",
461                    'rfloor;' => "\xE2\x8C\x8B",
462                    'Rho;' => "\xCE\xA1",
463                    'rho;' => "\xCF\x81",
464                    'rlm;' => "\xE2\x80\x8F",
465                    'rsaquo;' => "\xE2\x80\xBA",
466                    'rsquo;' => "\xE2\x80\x99",
467                    'sbquo;' => "\xE2\x80\x9A",
468                    'Scaron;' => "\xC5\xA0",
469                    'scaron;' => "\xC5\xA1",
470                    'sdot;' => "\xE2\x8B\x85",
471                    'sect' => "\xC2\xA7",
472                    'sect;' => "\xC2\xA7",
473                    'shy' => "\xC2\xAD",
474                    'shy;' => "\xC2\xAD",
475                    'Sigma;' => "\xCE\xA3",
476                    'sigma;' => "\xCF\x83",
477                    'sigmaf;' => "\xCF\x82",
478                    'sim;' => "\xE2\x88\xBC",
479                    'spades;' => "\xE2\x99\xA0",
480                    'sub;' => "\xE2\x8A\x82",
481                    'sube;' => "\xE2\x8A\x86",
482                    'sum;' => "\xE2\x88\x91",
483                    'sup;' => "\xE2\x8A\x83",
484                    'sup1' => "\xC2\xB9",
485                    'sup1;' => "\xC2\xB9",
486                    'sup2' => "\xC2\xB2",
487                    'sup2;' => "\xC2\xB2",
488                    'sup3' => "\xC2\xB3",
489                    'sup3;' => "\xC2\xB3",
490                    'supe;' => "\xE2\x8A\x87",
491                    'szlig' => "\xC3\x9F",
492                    'szlig;' => "\xC3\x9F",
493                    'Tau;' => "\xCE\xA4",
494                    'tau;' => "\xCF\x84",
495                    'there4;' => "\xE2\x88\xB4",
496                    'Theta;' => "\xCE\x98",
497                    'theta;' => "\xCE\xB8",
498                    'thetasym;' => "\xCF\x91",
499                    'thinsp;' => "\xE2\x80\x89",
500                    'THORN' => "\xC3\x9E",
501                    'thorn' => "\xC3\xBE",
502                    'THORN;' => "\xC3\x9E",
503                    'thorn;' => "\xC3\xBE",
504                    'tilde;' => "\xCB\x9C",
505                    'times' => "\xC3\x97",
506                    'times;' => "\xC3\x97",
507                    'TRADE;' => "\xE2\x84\xA2",
508                    'trade;' => "\xE2\x84\xA2",
509                    'Uacute' => "\xC3\x9A",
510                    'uacute' => "\xC3\xBA",
511                    'Uacute;' => "\xC3\x9A",
512                    'uacute;' => "\xC3\xBA",
513                    'uArr;' => "\xE2\x87\x91",
514                    'uarr;' => "\xE2\x86\x91",
515                    'Ucirc' => "\xC3\x9B",
516                    'ucirc' => "\xC3\xBB",
517                    'Ucirc;' => "\xC3\x9B",
518                    'ucirc;' => "\xC3\xBB",
519                    'Ugrave' => "\xC3\x99",
520                    'ugrave' => "\xC3\xB9",
521                    'Ugrave;' => "\xC3\x99",
522                    'ugrave;' => "\xC3\xB9",
523                    'uml' => "\xC2\xA8",
524                    'uml;' => "\xC2\xA8",
525                    'upsih;' => "\xCF\x92",
526                    'Upsilon;' => "\xCE\xA5",
527                    'upsilon;' => "\xCF\x85",
528                    'Uuml' => "\xC3\x9C",
529                    'uuml' => "\xC3\xBC",
530                    'Uuml;' => "\xC3\x9C",
531                    'uuml;' => "\xC3\xBC",
532                    'weierp;' => "\xE2\x84\x98",
533                    'Xi;' => "\xCE\x9E",
534                    'xi;' => "\xCE\xBE",
535                    'Yacute' => "\xC3\x9D",
536                    'yacute' => "\xC3\xBD",
537                    'Yacute;' => "\xC3\x9D",
538                    'yacute;' => "\xC3\xBD",
539                    'yen' => "\xC2\xA5",
540                    'yen;' => "\xC2\xA5",
541                    'yuml' => "\xC3\xBF",
542                    'Yuml;' => "\xC5\xB8",
543                    'yuml;' => "\xC3\xBF",
544                    'Zeta;' => "\xCE\x96",
545                    'zeta;' => "\xCE\xB6",
546                    'zwj;' => "\xE2\x80\x8D",
547                    'zwnj;' => "\xE2\x80\x8C"
548                ];
549
550                for ($i = 0, $match = null; $i < 9 && $this->consume() !== false; $i++) {
551                    // Cast for PHPStan on PHP < 8.0: We consumed as per the loop condition,
552                    // so `$this->consumed` is non-empty and the substr offset is valid.
553                    $consumed = (string) substr($this->consumed, 1);
554                    if (isset($entities[$consumed])) {
555                        $match = $consumed;
556                    }
557                }
558
559                if ($match !== null) {
560                    $this->data = substr_replace($this->data, $entities[$match], $this->position - strlen($consumed) - 1, strlen($match) + 1);
561                    $this->position += strlen($entities[$match]) - strlen($consumed) - 1;
562                }
563                break;
564        }
565    }
566}
567