174031e46SAndreas Gohr<?php 274031e46SAndreas Gohr 374031e46SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 474031e46SAndreas Gohr 574031e46SAndreas Gohruse dokuwiki\Parsing\Handler; 674031e46SAndreas Gohruse dokuwiki\Parsing\Helpers\Escape; 774031e46SAndreas Gohr 874031e46SAndreas Gohr/** 974031e46SAndreas Gohr * GFM backslash escapes: a backslash before any ASCII punctuation 1074031e46SAndreas Gohr * character produces the literal punctuation character; the backslash 1174031e46SAndreas Gohr * itself is consumed and the following char loses any markup meaning. 1274031e46SAndreas Gohr * 1374031e46SAndreas Gohr * Backslashes before any other character (letters, digits, multibyte, 1474031e46SAndreas Gohr * spaces, tabs, newlines) are NOT escapes — those sequences stay 1574031e46SAndreas Gohr * literal because the pattern doesn't match them and the lexer leaves 1674031e46SAndreas Gohr * them as cdata. 1774031e46SAndreas Gohr * 1874031e46SAndreas Gohr * Sort 5 places this mode ahead of every other inline mode so that 1974031e46SAndreas Gohr * leftmost-then-priority resolution claims `\X` before any competing 2074031e46SAndreas Gohr * delimiter (emphasis `*`, heading `#`, link `[`, …) can match the 2174031e46SAndreas Gohr * unescaped char. 2274031e46SAndreas Gohr * 23d331a839SAndreas Gohr * Category SUBSTITUTION (alongside Smiley and Entity) so the mode is 2474031e46SAndreas Gohr * reachable everywhere those run: inside paragraphs, formatting 2574031e46SAndreas Gohr * modes (emphasis, strong, deleted), list items, table cells, headers 26d331a839SAndreas Gohr * — every container whose allowedModes include SUBSTITUTION. Whole-span 2774031e46SAndreas Gohr * code modes (GfmCode, GfmFile, GfmBacktickSingle, GfmBacktickDouble) 2874031e46SAndreas Gohr * capture their entire body in one regex shot and therefore bypass 2974031e46SAndreas Gohr * GfmEscape on their content — matching GFM's rule that escapes don't 3074031e46SAndreas Gohr * fire inside code blocks or code spans. 3174031e46SAndreas Gohr * 3274031e46SAndreas Gohr * Modes that capture a literal string and need GFM unescape applied 3374031e46SAndreas Gohr * post-hoc (link URL/label, fence info string) call 3474031e46SAndreas Gohr * {@see \dokuwiki\Parsing\Helpers\Escape::unescapeBackslashes()} from 3574031e46SAndreas Gohr * their handle() — same character class. 36*65dd2042SAndreas Gohr * 37*65dd2042SAndreas Gohr * Collision with DokuWiki's Linebreak mode (`\\` before a space, tab, 38*65dd2042SAndreas Gohr * or newline): both patterns can claim the two backslashes at the same 39*65dd2042SAndreas Gohr * position. GfmEscape's sort 5 beats Linebreak's sort 140 on tie, which 40*65dd2042SAndreas Gohr * would silently swallow every DW forced linebreak in mixed-syntax 41*65dd2042SAndreas Gohr * settings. To avoid that, when DW syntax is loaded the pattern carries 42*65dd2042SAndreas Gohr * a negative lookahead that declines `\\` followed by `[ \t\n]` — 43*65dd2042SAndreas Gohr * deferring those bytes to Linebreak. Mid-line `\\` (e.g. UNC paths 44*65dd2042SAndreas Gohr * like `\\\\host\\share`) still escapes normally; only the EOL-adjacent 45*65dd2042SAndreas Gohr * form is handed off. In pure `md` mode no DW Linebreak is loaded and 46*65dd2042SAndreas Gohr * the lookahead is omitted so GFM-spec behavior is preserved. 4774031e46SAndreas Gohr */ 4874031e46SAndreas Gohrclass GfmEscape extends AbstractMode 4974031e46SAndreas Gohr{ 5074031e46SAndreas Gohr public function __construct() 5174031e46SAndreas Gohr { 5274031e46SAndreas Gohr $this->allowedModes = []; 5374031e46SAndreas Gohr } 5474031e46SAndreas Gohr 5574031e46SAndreas Gohr /** @inheritdoc */ 5674031e46SAndreas Gohr public function getSort() 5774031e46SAndreas Gohr { 5874031e46SAndreas Gohr return 5; 5974031e46SAndreas Gohr } 6074031e46SAndreas Gohr 6174031e46SAndreas Gohr /** @inheritdoc */ 6274031e46SAndreas Gohr public function connectTo($mode) 6374031e46SAndreas Gohr { 64*65dd2042SAndreas Gohr global $conf; 65*65dd2042SAndreas Gohr // PHP `\\\\\\\\` → regex `\\\\` → matches two literal backslashes. 66*65dd2042SAndreas Gohr $lookahead = $conf['syntax'] === 'md' ? '' : '(?!\\\\\\\\[ \t\n])'; 6774031e46SAndreas Gohr $this->Lexer->addSpecialPattern( 68*65dd2042SAndreas Gohr $lookahead . '\\\\' . Escape::PUNCTUATION_CHAR_CLASS, 6974031e46SAndreas Gohr $mode, 7074031e46SAndreas Gohr 'gfm_escape' 7174031e46SAndreas Gohr ); 7274031e46SAndreas Gohr } 7374031e46SAndreas Gohr 7474031e46SAndreas Gohr /** @inheritdoc */ 7574031e46SAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 7674031e46SAndreas Gohr { 7774031e46SAndreas Gohr $handler->addCall('cdata', [substr($match, 1)], $pos); 7874031e46SAndreas Gohr return true; 7974031e46SAndreas Gohr } 8074031e46SAndreas Gohr} 81