xref: /dokuwiki/inc/Parsing/ParserMode/GfmEscape.php (revision d331a8396503a69ec91cd77124b1b8983c251c54)
174031e46SAndreas Gohr<?php
274031e46SAndreas Gohr
374031e46SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode;
474031e46SAndreas Gohr
574031e46SAndreas Gohruse dokuwiki\Parsing\Handler;
674031e46SAndreas Gohruse dokuwiki\Parsing\Helpers\Escape;
774031e46SAndreas Gohr
874031e46SAndreas Gohr/**
974031e46SAndreas Gohr * GFM backslash escapes: a backslash before any ASCII punctuation
1074031e46SAndreas Gohr * character produces the literal punctuation character; the backslash
1174031e46SAndreas Gohr * itself is consumed and the following char loses any markup meaning.
1274031e46SAndreas Gohr *
1374031e46SAndreas Gohr * Backslashes before any other character (letters, digits, multibyte,
1474031e46SAndreas Gohr * spaces, tabs, newlines) are NOT escapes — those sequences stay
1574031e46SAndreas Gohr * literal because the pattern doesn't match them and the lexer leaves
1674031e46SAndreas Gohr * them as cdata.
1774031e46SAndreas Gohr *
1874031e46SAndreas Gohr * Sort 5 places this mode ahead of every other inline mode so that
1974031e46SAndreas Gohr * leftmost-then-priority resolution claims `\X` before any competing
2074031e46SAndreas Gohr * delimiter (emphasis `*`, heading `#`, link `[`, …) can match the
2174031e46SAndreas Gohr * unescaped char.
2274031e46SAndreas Gohr *
23*d331a839SAndreas Gohr * Category SUBSTITUTION (alongside Smiley and Entity) so the mode is
2474031e46SAndreas Gohr * reachable everywhere those run: inside paragraphs, formatting
2574031e46SAndreas Gohr * modes (emphasis, strong, deleted), list items, table cells, headers
26*d331a839SAndreas Gohr * — every container whose allowedModes include SUBSTITUTION. Whole-span
2774031e46SAndreas Gohr * code modes (GfmCode, GfmFile, GfmBacktickSingle, GfmBacktickDouble)
2874031e46SAndreas Gohr * capture their entire body in one regex shot and therefore bypass
2974031e46SAndreas Gohr * GfmEscape on their content — matching GFM's rule that escapes don't
3074031e46SAndreas Gohr * fire inside code blocks or code spans.
3174031e46SAndreas Gohr *
3274031e46SAndreas Gohr * Modes that capture a literal string and need GFM unescape applied
3374031e46SAndreas Gohr * post-hoc (link URL/label, fence info string) call
3474031e46SAndreas Gohr * {@see \dokuwiki\Parsing\Helpers\Escape::unescapeBackslashes()} from
3574031e46SAndreas Gohr * their handle() — same character class.
3674031e46SAndreas Gohr */
3774031e46SAndreas Gohrclass GfmEscape extends AbstractMode
3874031e46SAndreas Gohr{
3974031e46SAndreas Gohr    public function __construct()
4074031e46SAndreas Gohr    {
4174031e46SAndreas Gohr        $this->allowedModes = [];
4274031e46SAndreas Gohr    }
4374031e46SAndreas Gohr
4474031e46SAndreas Gohr    /** @inheritdoc */
4574031e46SAndreas Gohr    public function getSort()
4674031e46SAndreas Gohr    {
4774031e46SAndreas Gohr        return 5;
4874031e46SAndreas Gohr    }
4974031e46SAndreas Gohr
5074031e46SAndreas Gohr    /** @inheritdoc */
5174031e46SAndreas Gohr    public function connectTo($mode)
5274031e46SAndreas Gohr    {
5374031e46SAndreas Gohr        $this->Lexer->addSpecialPattern(
5474031e46SAndreas Gohr            '\\\\' . Escape::PUNCTUATION_CHAR_CLASS,
5574031e46SAndreas Gohr            $mode,
5674031e46SAndreas Gohr            'gfm_escape'
5774031e46SAndreas Gohr        );
5874031e46SAndreas Gohr    }
5974031e46SAndreas Gohr
6074031e46SAndreas Gohr    /** @inheritdoc */
6174031e46SAndreas Gohr    public function handle($match, $state, $pos, Handler $handler)
6274031e46SAndreas Gohr    {
6374031e46SAndreas Gohr        $handler->addCall('cdata', [substr($match, 1)], $pos);
6474031e46SAndreas Gohr        return true;
6574031e46SAndreas Gohr    }
6674031e46SAndreas Gohr}
67