1d2085866SAndreas Gohr<?php 2d2085866SAndreas Gohr 3d2085866SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 4d2085866SAndreas Gohr 5d2085866SAndreas Gohruse dokuwiki\Parsing\Handler; 6eb15e634SAndreas Gohruse dokuwiki\Parsing\Helpers\HtmlEntity; 7d2085866SAndreas Gohr 8d2085866SAndreas Gohr/** 9d2085866SAndreas Gohr * GFM HTML entity references: numeric (`&#nnn;` and `&#xhhh;`) and 10d2085866SAndreas Gohr * HTML5 named (`©`, `Æ`, `≧̸`, ...) decode to the 11d2085866SAndreas Gohr * corresponding Unicode codepoint(s) and ride out as cdata. 12d2085866SAndreas Gohr * 13d2085866SAndreas Gohr * Distinct from the typography Entity mode, which is renderer-side 14d2085866SAndreas Gohr * configurable (entities.conf maps `(c)` to `©` etc.). HTML entity 15d2085866SAndreas Gohr * references are not configurable - their meaning is fixed by the 16d2085866SAndreas Gohr * HTML5 / Unicode specs - so decoding happens at parse time and the 17d2085866SAndreas Gohr * renderer needs no changes. 18d2085866SAndreas Gohr * 19eb15e634SAndreas Gohr * Decoding semantics live in {@see HtmlEntity}; this mode is a thin 20eb15e634SAndreas Gohr * wrapper that exposes them to the inline lexer. Whole-span PROTECTED 21eb15e634SAndreas Gohr * modes (GfmCode, GfmLink, ...) capture their body in one regex shot 22eb15e634SAndreas Gohr * and bypass this mode, so they call HtmlEntity::decode() directly on 23eb15e634SAndreas Gohr * the captured slice. 24d2085866SAndreas Gohr * 25*d331a839SAndreas Gohr * Category SUBSTITUTION so the mode is reachable in every container 26d2085866SAndreas Gohr * that allows substitutions (paragraphs, formatting, list items, 27d2085866SAndreas Gohr * table cells, headers). Code spans and code blocks live in 28*d331a839SAndreas Gohr * CATEGORY_PROTECTED and reject SUBSTITUTION, so entities stay literal 29d2085866SAndreas Gohr * there - matching CommonMark's rule that entities are not recognized 30d2085866SAndreas Gohr * in code. 31d2085866SAndreas Gohr * 32d2085866SAndreas Gohr * Side benefit: by consuming the entire entity run before any 33d2085866SAndreas Gohr * structural pattern sees it, this mode automatically enforces the 34d2085866SAndreas Gohr * spec rule that numeric references cannot stand in for structural 35d2085866SAndreas Gohr * markers. `*foo*` decodes to literal `*foo*` text and never 36d2085866SAndreas Gohr * triggers emphasis; `* foo` decodes to literal `* foo` and never 37d2085866SAndreas Gohr * starts a list. 38d2085866SAndreas Gohr */ 39d2085866SAndreas Gohrclass GfmHtmlEntity extends AbstractMode 40d2085866SAndreas Gohr{ 41d2085866SAndreas Gohr public function __construct() 42d2085866SAndreas Gohr { 43d2085866SAndreas Gohr $this->allowedModes = []; 44d2085866SAndreas Gohr } 45d2085866SAndreas Gohr 46d2085866SAndreas Gohr /** @inheritdoc */ 47d2085866SAndreas Gohr public function getSort() 48d2085866SAndreas Gohr { 49d2085866SAndreas Gohr return 255; 50d2085866SAndreas Gohr } 51d2085866SAndreas Gohr 52d2085866SAndreas Gohr /** @inheritdoc */ 53d2085866SAndreas Gohr public function connectTo($mode) 54d2085866SAndreas Gohr { 55eb15e634SAndreas Gohr $this->Lexer->addSpecialPattern(HtmlEntity::PATTERN, $mode, 'gfm_html_entity'); 56d2085866SAndreas Gohr } 57d2085866SAndreas Gohr 58d2085866SAndreas Gohr /** @inheritdoc */ 59d2085866SAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 60d2085866SAndreas Gohr { 61eb15e634SAndreas Gohr $handler->addCall('cdata', [HtmlEntity::decodeOne($match)], $pos); 62d2085866SAndreas Gohr return true; 63d2085866SAndreas Gohr } 64d2085866SAndreas Gohr} 65