1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6use dokuwiki\Parsing\Helpers\HtmlEntity; 7 8/** 9 * GFM HTML entity references: numeric (`&#nnn;` and `&#xhhh;`) and 10 * HTML5 named (`©`, `Æ`, `≧̸`, ...) decode to the 11 * corresponding Unicode codepoint(s) and ride out as cdata. 12 * 13 * Distinct from the typography Entity mode, which is renderer-side 14 * configurable (entities.conf maps `(c)` to `©` etc.). HTML entity 15 * references are not configurable - their meaning is fixed by the 16 * HTML5 / Unicode specs - so decoding happens at parse time and the 17 * renderer needs no changes. 18 * 19 * Decoding semantics live in {@see HtmlEntity}; this mode is a thin 20 * wrapper that exposes them to the inline lexer. Whole-span PROTECTED 21 * modes (GfmCode, GfmLink, ...) capture their body in one regex shot 22 * and bypass this mode, so they call HtmlEntity::decode() directly on 23 * the captured slice. 24 * 25 * Category SUBSTITION so the mode is reachable in every container 26 * that allows substitutions (paragraphs, formatting, list items, 27 * table cells, headers). Code spans and code blocks live in 28 * CATEGORY_PROTECTED and reject SUBSTITION, so entities stay literal 29 * there - matching CommonMark's rule that entities are not recognized 30 * in code. 31 * 32 * Side benefit: by consuming the entire entity run before any 33 * structural pattern sees it, this mode automatically enforces the 34 * spec rule that numeric references cannot stand in for structural 35 * markers. `*foo*` decodes to literal `*foo*` text and never 36 * triggers emphasis; `* foo` decodes to literal `* foo` and never 37 * starts a list. 38 */ 39class GfmHtmlEntity extends AbstractMode 40{ 41 public function __construct() 42 { 43 $this->allowedModes = []; 44 } 45 46 /** @inheritdoc */ 47 public function getSort() 48 { 49 return 255; 50 } 51 52 /** @inheritdoc */ 53 public function connectTo($mode) 54 { 55 $this->Lexer->addSpecialPattern(HtmlEntity::PATTERN, $mode, 'gfm_html_entity'); 56 } 57 58 /** @inheritdoc */ 59 public function handle($match, $state, $pos, Handler $handler) 60 { 61 $handler->addCall('cdata', [HtmlEntity::decodeOne($match)], $pos); 62 return true; 63 } 64} 65