xref: /dokuwiki/inc/Parsing/ParserMode/GfmHtmlEntity.php (revision eb15e634e1400f6c4d78f5fb40179ca25f41574d)
1d2085866SAndreas Gohr<?php
2d2085866SAndreas Gohr
3d2085866SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode;
4d2085866SAndreas Gohr
5d2085866SAndreas Gohruse dokuwiki\Parsing\Handler;
6*eb15e634SAndreas Gohruse dokuwiki\Parsing\Helpers\HtmlEntity;
7d2085866SAndreas Gohr
8d2085866SAndreas Gohr/**
9d2085866SAndreas Gohr * GFM HTML entity references: numeric (`&#nnn;` and `&#xhhh;`) and
10d2085866SAndreas Gohr * HTML5 named (`&copy;`, `&AElig;`, `&ngE;`, ...) decode to the
11d2085866SAndreas Gohr * corresponding Unicode codepoint(s) and ride out as cdata.
12d2085866SAndreas Gohr *
13d2085866SAndreas Gohr * Distinct from the typography Entity mode, which is renderer-side
14d2085866SAndreas Gohr * configurable (entities.conf maps `(c)` to `©` etc.). HTML entity
15d2085866SAndreas Gohr * references are not configurable - their meaning is fixed by the
16d2085866SAndreas Gohr * HTML5 / Unicode specs - so decoding happens at parse time and the
17d2085866SAndreas Gohr * renderer needs no changes.
18d2085866SAndreas Gohr *
19*eb15e634SAndreas Gohr * Decoding semantics live in {@see HtmlEntity}; this mode is a thin
20*eb15e634SAndreas Gohr * wrapper that exposes them to the inline lexer. Whole-span PROTECTED
21*eb15e634SAndreas Gohr * modes (GfmCode, GfmLink, ...) capture their body in one regex shot
22*eb15e634SAndreas Gohr * and bypass this mode, so they call HtmlEntity::decode() directly on
23*eb15e634SAndreas Gohr * the captured slice.
24d2085866SAndreas Gohr *
25d2085866SAndreas Gohr * Category SUBSTITION so the mode is reachable in every container
26d2085866SAndreas Gohr * that allows substitutions (paragraphs, formatting, list items,
27d2085866SAndreas Gohr * table cells, headers). Code spans and code blocks live in
28d2085866SAndreas Gohr * CATEGORY_PROTECTED and reject SUBSTITION, so entities stay literal
29d2085866SAndreas Gohr * there - matching CommonMark's rule that entities are not recognized
30d2085866SAndreas Gohr * in code.
31d2085866SAndreas Gohr *
32d2085866SAndreas Gohr * Side benefit: by consuming the entire entity run before any
33d2085866SAndreas Gohr * structural pattern sees it, this mode automatically enforces the
34d2085866SAndreas Gohr * spec rule that numeric references cannot stand in for structural
35d2085866SAndreas Gohr * markers. `&#42;foo&#42;` decodes to literal `*foo*` text and never
36d2085866SAndreas Gohr * triggers emphasis; `&#42; foo` decodes to literal `* foo` and never
37d2085866SAndreas Gohr * starts a list.
38d2085866SAndreas Gohr */
39d2085866SAndreas Gohrclass GfmHtmlEntity extends AbstractMode
40d2085866SAndreas Gohr{
41d2085866SAndreas Gohr    public function __construct()
42d2085866SAndreas Gohr    {
43d2085866SAndreas Gohr        $this->allowedModes = [];
44d2085866SAndreas Gohr    }
45d2085866SAndreas Gohr
46d2085866SAndreas Gohr    /** @inheritdoc */
47d2085866SAndreas Gohr    public function getSort()
48d2085866SAndreas Gohr    {
49d2085866SAndreas Gohr        return 255;
50d2085866SAndreas Gohr    }
51d2085866SAndreas Gohr
52d2085866SAndreas Gohr    /** @inheritdoc */
53d2085866SAndreas Gohr    public function connectTo($mode)
54d2085866SAndreas Gohr    {
55*eb15e634SAndreas Gohr        $this->Lexer->addSpecialPattern(HtmlEntity::PATTERN, $mode, 'gfm_html_entity');
56d2085866SAndreas Gohr    }
57d2085866SAndreas Gohr
58d2085866SAndreas Gohr    /** @inheritdoc */
59d2085866SAndreas Gohr    public function handle($match, $state, $pos, Handler $handler)
60d2085866SAndreas Gohr    {
61*eb15e634SAndreas Gohr        $handler->addCall('cdata', [HtmlEntity::decodeOne($match)], $pos);
62d2085866SAndreas Gohr        return true;
63d2085866SAndreas Gohr    }
64d2085866SAndreas Gohr}
65