1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6use dokuwiki\Parsing\Helpers\HtmlEntity; 7 8/** 9 * GFM HTML entity references: numeric (`&#nnn;` and `&#xhhh;`) and 10 * HTML5 named (`©`, `Æ`, `≧̸`, ...) decode to the 11 * corresponding Unicode codepoint(s) and ride out as cdata. 12 * 13 * Distinct from the typography Entity mode, which is renderer-side 14 * configurable (entities.conf maps `(c)` to `©` etc.). HTML entity 15 * references are not configurable - their meaning is fixed by the 16 * HTML5 / Unicode specs - so decoding happens at parse time and the 17 * renderer needs no changes. 18 * 19 * Decoding semantics live in {@see HtmlEntity}; this mode is a thin 20 * wrapper that exposes them to the inline lexer. Whole-span PROTECTED 21 * modes (GfmCode, GfmLink, ...) capture their body in one regex shot 22 * and bypass this mode, so they call HtmlEntity::decode() directly on 23 * the captured slice. 24 * 25 * Category SUBSTITUTION so the mode is reachable in every container 26 * that allows substitutions (paragraphs, formatting, list items, 27 * table cells, headers). Code spans and code blocks live in 28 * CATEGORY_PROTECTED and reject SUBSTITUTION, so entities stay literal 29 * there - matching CommonMark's rule that entities are not recognized 30 * in code. 31 * 32 * Side benefit: by consuming the entire entity run before any 33 * structural pattern sees it, this mode automatically enforces the 34 * spec rule that numeric references cannot stand in for structural 35 * markers. `*foo*` decodes to literal `*foo*` text and never 36 * triggers emphasis; `* foo` decodes to literal `* foo` and never 37 * starts a list. 38 */ 39class GfmHtmlEntity extends AbstractMode 40{ 41 /** @inheritdoc */ 42 public function getSort() 43 { 44 return 255; 45 } 46 47 /** @inheritdoc */ 48 public function connectTo($mode) 49 { 50 $this->Lexer->addSpecialPattern(HtmlEntity::PATTERN, $mode, 'gfm_html_entity'); 51 } 52 53 /** @inheritdoc */ 54 public function handle($match, $state, $pos, Handler $handler) 55 { 56 $handler->addCall('cdata', [HtmlEntity::decodeOne($match)], $pos); 57 return true; 58 } 59} 60