xref: /dokuwiki/inc/Parsing/ParserMode/GfmHtmlEntity.php (revision eb15e634e1400f6c4d78f5fb40179ca25f41574d)
1<?php
2
3namespace dokuwiki\Parsing\ParserMode;
4
5use dokuwiki\Parsing\Handler;
6use dokuwiki\Parsing\Helpers\HtmlEntity;
7
8/**
9 * GFM HTML entity references: numeric (`&#nnn;` and `&#xhhh;`) and
10 * HTML5 named (`&copy;`, `&AElig;`, `&ngE;`, ...) decode to the
11 * corresponding Unicode codepoint(s) and ride out as cdata.
12 *
13 * Distinct from the typography Entity mode, which is renderer-side
14 * configurable (entities.conf maps `(c)` to `©` etc.). HTML entity
15 * references are not configurable - their meaning is fixed by the
16 * HTML5 / Unicode specs - so decoding happens at parse time and the
17 * renderer needs no changes.
18 *
19 * Decoding semantics live in {@see HtmlEntity}; this mode is a thin
20 * wrapper that exposes them to the inline lexer. Whole-span PROTECTED
21 * modes (GfmCode, GfmLink, ...) capture their body in one regex shot
22 * and bypass this mode, so they call HtmlEntity::decode() directly on
23 * the captured slice.
24 *
25 * Category SUBSTITION so the mode is reachable in every container
26 * that allows substitutions (paragraphs, formatting, list items,
27 * table cells, headers). Code spans and code blocks live in
28 * CATEGORY_PROTECTED and reject SUBSTITION, so entities stay literal
29 * there - matching CommonMark's rule that entities are not recognized
30 * in code.
31 *
32 * Side benefit: by consuming the entire entity run before any
33 * structural pattern sees it, this mode automatically enforces the
34 * spec rule that numeric references cannot stand in for structural
35 * markers. `&#42;foo&#42;` decodes to literal `*foo*` text and never
36 * triggers emphasis; `&#42; foo` decodes to literal `* foo` and never
37 * starts a list.
38 */
39class GfmHtmlEntity extends AbstractMode
40{
41    public function __construct()
42    {
43        $this->allowedModes = [];
44    }
45
46    /** @inheritdoc */
47    public function getSort()
48    {
49        return 255;
50    }
51
52    /** @inheritdoc */
53    public function connectTo($mode)
54    {
55        $this->Lexer->addSpecialPattern(HtmlEntity::PATTERN, $mode, 'gfm_html_entity');
56    }
57
58    /** @inheritdoc */
59    public function handle($match, $state, $pos, Handler $handler)
60    {
61        $handler->addCall('cdata', [HtmlEntity::decodeOne($match)], $pos);
62        return true;
63    }
64}
65