xref: /dokuwiki/inc/Parsing/ParserMode/GfmEscape.php (revision 47a02a102092be9e1e6f1ddaf158bdfffdb13d4f)
174031e46SAndreas Gohr<?php
274031e46SAndreas Gohr
374031e46SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode;
474031e46SAndreas Gohr
574031e46SAndreas Gohruse dokuwiki\Parsing\Handler;
674031e46SAndreas Gohruse dokuwiki\Parsing\Helpers\Escape;
774031e46SAndreas Gohr
874031e46SAndreas Gohr/**
974031e46SAndreas Gohr * GFM backslash escapes: a backslash before any ASCII punctuation
1074031e46SAndreas Gohr * character produces the literal punctuation character; the backslash
1174031e46SAndreas Gohr * itself is consumed and the following char loses any markup meaning.
1274031e46SAndreas Gohr *
1374031e46SAndreas Gohr * Backslashes before any other character (letters, digits, multibyte,
1474031e46SAndreas Gohr * spaces, tabs, newlines) are NOT escapes — those sequences stay
1574031e46SAndreas Gohr * literal because the pattern doesn't match them and the lexer leaves
1674031e46SAndreas Gohr * them as cdata.
1774031e46SAndreas Gohr *
1874031e46SAndreas Gohr * Sort 5 places this mode ahead of every other inline mode so that
1974031e46SAndreas Gohr * leftmost-then-priority resolution claims `\X` before any competing
2074031e46SAndreas Gohr * delimiter (emphasis `*`, heading `#`, link `[`, …) can match the
2174031e46SAndreas Gohr * unescaped char.
2274031e46SAndreas Gohr *
23d331a839SAndreas Gohr * Category SUBSTITUTION (alongside Smiley and Entity) so the mode is
2474031e46SAndreas Gohr * reachable everywhere those run: inside paragraphs, formatting
2574031e46SAndreas Gohr * modes (emphasis, strong, deleted), list items, table cells, headers
26d331a839SAndreas Gohr * — every container whose allowedModes include SUBSTITUTION. Whole-span
2774031e46SAndreas Gohr * code modes (GfmCode, GfmFile, GfmBacktickSingle, GfmBacktickDouble)
2874031e46SAndreas Gohr * capture their entire body in one regex shot and therefore bypass
2974031e46SAndreas Gohr * GfmEscape on their content — matching GFM's rule that escapes don't
3074031e46SAndreas Gohr * fire inside code blocks or code spans.
3174031e46SAndreas Gohr *
3274031e46SAndreas Gohr * Modes that capture a literal string and need GFM unescape applied
3374031e46SAndreas Gohr * post-hoc (link URL/label, fence info string) call
3474031e46SAndreas Gohr * {@see \dokuwiki\Parsing\Helpers\Escape::unescapeBackslashes()} from
3574031e46SAndreas Gohr * their handle() — same character class.
3665dd2042SAndreas Gohr *
3765dd2042SAndreas Gohr * Collision with DokuWiki's Linebreak mode (`\\` before a space, tab,
3865dd2042SAndreas Gohr * or newline): both patterns can claim the two backslashes at the same
3965dd2042SAndreas Gohr * position. GfmEscape's sort 5 beats Linebreak's sort 140 on tie, which
4065dd2042SAndreas Gohr * would silently swallow every DW forced linebreak in mixed-syntax
4165dd2042SAndreas Gohr * settings. To avoid that, when DW syntax is loaded the pattern carries
4265dd2042SAndreas Gohr * a negative lookahead that declines `\\` followed by `[ \t\n]` —
4365dd2042SAndreas Gohr * deferring those bytes to Linebreak. Mid-line `\\` (e.g. UNC paths
4465dd2042SAndreas Gohr * like `\\\\host\\share`) still escapes normally; only the EOL-adjacent
4565dd2042SAndreas Gohr * form is handed off. In pure `md` mode no DW Linebreak is loaded and
4665dd2042SAndreas Gohr * the lookahead is omitted so GFM-spec behavior is preserved.
4774031e46SAndreas Gohr */
4874031e46SAndreas Gohrclass GfmEscape extends AbstractMode
4974031e46SAndreas Gohr{
5074031e46SAndreas Gohr    /** @inheritdoc */
5174031e46SAndreas Gohr    public function getSort()
5274031e46SAndreas Gohr    {
5374031e46SAndreas Gohr        return 5;
5474031e46SAndreas Gohr    }
5574031e46SAndreas Gohr
5674031e46SAndreas Gohr    /** @inheritdoc */
5774031e46SAndreas Gohr    public function connectTo($mode)
5874031e46SAndreas Gohr    {
5965dd2042SAndreas Gohr        // PHP `\\\\\\\\` → regex `\\\\` → matches two literal backslashes.
60*47a02a10SAndreas Gohr        $lookahead = $this->registry->getSyntax() === 'md' ? '' : '(?!\\\\\\\\[ \t\n])';
6174031e46SAndreas Gohr        $this->Lexer->addSpecialPattern(
6265dd2042SAndreas Gohr            $lookahead . '\\\\' . Escape::PUNCTUATION_CHAR_CLASS,
6374031e46SAndreas Gohr            $mode,
6474031e46SAndreas Gohr            'gfm_escape'
6574031e46SAndreas Gohr        );
6674031e46SAndreas Gohr    }
6774031e46SAndreas Gohr
6874031e46SAndreas Gohr    /** @inheritdoc */
6974031e46SAndreas Gohr    public function handle($match, $state, $pos, Handler $handler)
7074031e46SAndreas Gohr    {
7174031e46SAndreas Gohr        $handler->addCall('cdata', [substr($match, 1)], $pos);
7274031e46SAndreas Gohr        return true;
7374031e46SAndreas Gohr    }
7474031e46SAndreas Gohr}
75