xref: /dokuwiki/inc/Parsing/ParserMode/GfmEscape.php (revision 74031e463764923581b9204cebc0fc3f34ce881f)
1*74031e46SAndreas Gohr<?php
2*74031e46SAndreas Gohr
3*74031e46SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode;
4*74031e46SAndreas Gohr
5*74031e46SAndreas Gohruse dokuwiki\Parsing\Handler;
6*74031e46SAndreas Gohruse dokuwiki\Parsing\Helpers\Escape;
7*74031e46SAndreas Gohr
8*74031e46SAndreas Gohr/**
9*74031e46SAndreas Gohr * GFM backslash escapes: a backslash before any ASCII punctuation
10*74031e46SAndreas Gohr * character produces the literal punctuation character; the backslash
11*74031e46SAndreas Gohr * itself is consumed and the following char loses any markup meaning.
12*74031e46SAndreas Gohr *
13*74031e46SAndreas Gohr * Backslashes before any other character (letters, digits, multibyte,
14*74031e46SAndreas Gohr * spaces, tabs, newlines) are NOT escapes — those sequences stay
15*74031e46SAndreas Gohr * literal because the pattern doesn't match them and the lexer leaves
16*74031e46SAndreas Gohr * them as cdata.
17*74031e46SAndreas Gohr *
18*74031e46SAndreas Gohr * Sort 5 places this mode ahead of every other inline mode so that
19*74031e46SAndreas Gohr * leftmost-then-priority resolution claims `\X` before any competing
20*74031e46SAndreas Gohr * delimiter (emphasis `*`, heading `#`, link `[`, …) can match the
21*74031e46SAndreas Gohr * unescaped char.
22*74031e46SAndreas Gohr *
23*74031e46SAndreas Gohr * Category SUBSTITION (alongside Smiley and Entity) so the mode is
24*74031e46SAndreas Gohr * reachable everywhere those run: inside paragraphs, formatting
25*74031e46SAndreas Gohr * modes (emphasis, strong, deleted), list items, table cells, headers
26*74031e46SAndreas Gohr * — every container whose allowedModes include SUBSTITION. Whole-span
27*74031e46SAndreas Gohr * code modes (GfmCode, GfmFile, GfmBacktickSingle, GfmBacktickDouble)
28*74031e46SAndreas Gohr * capture their entire body in one regex shot and therefore bypass
29*74031e46SAndreas Gohr * GfmEscape on their content — matching GFM's rule that escapes don't
30*74031e46SAndreas Gohr * fire inside code blocks or code spans.
31*74031e46SAndreas Gohr *
32*74031e46SAndreas Gohr * Modes that capture a literal string and need GFM unescape applied
33*74031e46SAndreas Gohr * post-hoc (link URL/label, fence info string) call
34*74031e46SAndreas Gohr * {@see \dokuwiki\Parsing\Helpers\Escape::unescapeBackslashes()} from
35*74031e46SAndreas Gohr * their handle() — same character class.
36*74031e46SAndreas Gohr */
37*74031e46SAndreas Gohrclass GfmEscape extends AbstractMode
38*74031e46SAndreas Gohr{
39*74031e46SAndreas Gohr    public function __construct()
40*74031e46SAndreas Gohr    {
41*74031e46SAndreas Gohr        $this->allowedModes = [];
42*74031e46SAndreas Gohr    }
43*74031e46SAndreas Gohr
44*74031e46SAndreas Gohr    /** @inheritdoc */
45*74031e46SAndreas Gohr    public function getSort()
46*74031e46SAndreas Gohr    {
47*74031e46SAndreas Gohr        return 5;
48*74031e46SAndreas Gohr    }
49*74031e46SAndreas Gohr
50*74031e46SAndreas Gohr    /** @inheritdoc */
51*74031e46SAndreas Gohr    public function connectTo($mode)
52*74031e46SAndreas Gohr    {
53*74031e46SAndreas Gohr        $this->Lexer->addSpecialPattern(
54*74031e46SAndreas Gohr            '\\\\' . Escape::PUNCTUATION_CHAR_CLASS,
55*74031e46SAndreas Gohr            $mode,
56*74031e46SAndreas Gohr            'gfm_escape'
57*74031e46SAndreas Gohr        );
58*74031e46SAndreas Gohr    }
59*74031e46SAndreas Gohr
60*74031e46SAndreas Gohr    /** @inheritdoc */
61*74031e46SAndreas Gohr    public function handle($match, $state, $pos, Handler $handler)
62*74031e46SAndreas Gohr    {
63*74031e46SAndreas Gohr        $handler->addCall('cdata', [substr($match, 1)], $pos);
64*74031e46SAndreas Gohr        return true;
65*74031e46SAndreas Gohr    }
66*74031e46SAndreas Gohr}
67