xref: /dokuwiki/inc/Parsing/ParserMode/GfmBacktickSingle.php (revision 8ed75a23932353c18b43f67323808e9a662f532a)
1*8ed75a23SAndreas Gohr<?php
2*8ed75a23SAndreas Gohr
3*8ed75a23SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode;
4*8ed75a23SAndreas Gohr
5*8ed75a23SAndreas Gohruse dokuwiki\Parsing\Handler;
6*8ed75a23SAndreas Gohr
7*8ed75a23SAndreas Gohr/**
8*8ed75a23SAndreas Gohr * GFM inline code span bounded by single backticks: `text`.
9*8ed75a23SAndreas Gohr *
10*8ed75a23SAndreas Gohr * A backtick span is both monospace-formatted and verbatim: the content
11*8ed75a23SAndreas Gohr * is wrapped in monospace_open / monospace_close (the same instructions
12*8ed75a23SAndreas Gohr * as DokuWiki's doubled-single-quote pair, rendered as an HTML <code>
13*8ed75a23SAndreas Gohr * element) and the body is emitted through the unformatted handler
14*8ed75a23SAndreas Gohr * rather than plain cdata, so renderers that distinguish the two
15*8ed75a23SAndreas Gohr * (metadata, indexer, non-XHTML backends) treat it as literal.
16*8ed75a23SAndreas Gohr *
17*8ed75a23SAndreas Gohr * The entry pattern's lookahead only verifies three things: an opener,
18*8ed75a23SAndreas Gohr * at least one body character, and a valid closer. It does NOT enforce
19*8ed75a23SAndreas Gohr * non-whitespace body edges or a non-whitespace body interior. GFM's
20*8ed75a23SAndreas Gohr * edge rules are applied in handle() after the body has been extracted:
21*8ed75a23SAndreas Gohr *
22*8ed75a23SAndreas Gohr *   1. Line endings become single spaces.
23*8ed75a23SAndreas Gohr *   2. If the body both starts and ends with a space, and is not
24*8ed75a23SAndreas Gohr *      entirely whitespace, one space is stripped from each end.
25*8ed75a23SAndreas Gohr *
26*8ed75a23SAndreas Gohr * This lets the regex stay small while still producing GFM-correct
27*8ed75a23SAndreas Gohr * output for the tricky cases:
28*8ed75a23SAndreas Gohr *
29*8ed75a23SAndreas Gohr *   ` `          ->   <code> </code>     (all-whitespace body, no strip)
30*8ed75a23SAndreas Gohr *   ` a`         ->   <code> a</code>    (asymmetric edge, no strip)
31*8ed75a23SAndreas Gohr *   ` `` `       ->   <code>``</code>    (run of 2 inside body, strip)
32*8ed75a23SAndreas Gohr *
33*8ed75a23SAndreas Gohr * Runs of two or more backticks on either delimiter are rejected by
34*8ed75a23SAndreas Gohr * the length-boundary guards (?<!`)...(?!`), so this mode never steals
35*8ed75a23SAndreas Gohr * input from GfmBacktickDouble. GfmBacktickDouble extends this class
36*8ed75a23SAndreas Gohr * to reuse handle() and normalizeBody().
37*8ed75a23SAndreas Gohr *
38*8ed75a23SAndreas Gohr * No other inline parsing runs inside a span; allowedModes is empty.
39*8ed75a23SAndreas Gohr *
40*8ed75a23SAndreas Gohr * @see GfmBacktickDouble
41*8ed75a23SAndreas Gohr */
42*8ed75a23SAndreas Gohrclass GfmBacktickSingle extends AbstractMode
43*8ed75a23SAndreas Gohr{
44*8ed75a23SAndreas Gohr    public function __construct()
45*8ed75a23SAndreas Gohr    {
46*8ed75a23SAndreas Gohr        // Content is literal — no nested inline parsing.
47*8ed75a23SAndreas Gohr        $this->allowedModes = [];
48*8ed75a23SAndreas Gohr    }
49*8ed75a23SAndreas Gohr
50*8ed75a23SAndreas Gohr    /** @inheritdoc */
51*8ed75a23SAndreas Gohr    public function getSort()
52*8ed75a23SAndreas Gohr    {
53*8ed75a23SAndreas Gohr        return 165;
54*8ed75a23SAndreas Gohr    }
55*8ed75a23SAndreas Gohr
56*8ed75a23SAndreas Gohr    /** The lexer state / mode name. Subclasses override for n≥2. */
57*8ed75a23SAndreas Gohr    protected function getModeName(): string
58*8ed75a23SAndreas Gohr    {
59*8ed75a23SAndreas Gohr        return 'gfm_backtick_single';
60*8ed75a23SAndreas Gohr    }
61*8ed75a23SAndreas Gohr
62*8ed75a23SAndreas Gohr    /**
63*8ed75a23SAndreas Gohr     * Entry pattern. The length-boundary guards (?<!`)...(?!`) around
64*8ed75a23SAndreas Gohr     * each delimiter ensure a run of two or more backticks is never read
65*8ed75a23SAndreas Gohr     * as an n=1 opener or closer. The body character class, which admits
66*8ed75a23SAndreas Gohr     * either a non-backtick or a run of two-or-more backticks, lets
67*8ed75a23SAndreas Gohr     * those longer runs live inside the body since they cannot be valid
68*8ed75a23SAndreas Gohr     * n=1 closers.
69*8ed75a23SAndreas Gohr     */
70*8ed75a23SAndreas Gohr    protected function getEntryPattern(): string
71*8ed75a23SAndreas Gohr    {
72*8ed75a23SAndreas Gohr        return '(?<!`)`(?!`)(?='
73*8ed75a23SAndreas Gohr            . '(?:' . self::NOT_AT_PARA_BREAK . '(?:[^`]|``+))+'
74*8ed75a23SAndreas Gohr            . '(?<!`)`(?!`)'
75*8ed75a23SAndreas Gohr            . ')';
76*8ed75a23SAndreas Gohr    }
77*8ed75a23SAndreas Gohr
78*8ed75a23SAndreas Gohr    /** Exit pattern. Same boundary guards as the entry. */
79*8ed75a23SAndreas Gohr    protected function getExitPattern(): string
80*8ed75a23SAndreas Gohr    {
81*8ed75a23SAndreas Gohr        return '(?<!`)`(?!`)';
82*8ed75a23SAndreas Gohr    }
83*8ed75a23SAndreas Gohr
84*8ed75a23SAndreas Gohr    /** @inheritdoc */
85*8ed75a23SAndreas Gohr    public function connectTo($mode)
86*8ed75a23SAndreas Gohr    {
87*8ed75a23SAndreas Gohr        $this->Lexer->addEntryPattern(
88*8ed75a23SAndreas Gohr            $this->getEntryPattern(),
89*8ed75a23SAndreas Gohr            $mode,
90*8ed75a23SAndreas Gohr            $this->getModeName()
91*8ed75a23SAndreas Gohr        );
92*8ed75a23SAndreas Gohr    }
93*8ed75a23SAndreas Gohr
94*8ed75a23SAndreas Gohr    /** @inheritdoc */
95*8ed75a23SAndreas Gohr    public function postConnect()
96*8ed75a23SAndreas Gohr    {
97*8ed75a23SAndreas Gohr        $this->Lexer->addExitPattern($this->getExitPattern(), $this->getModeName());
98*8ed75a23SAndreas Gohr    }
99*8ed75a23SAndreas Gohr
100*8ed75a23SAndreas Gohr    /** @inheritdoc */
101*8ed75a23SAndreas Gohr    public function handle($match, $state, $pos, Handler $handler)
102*8ed75a23SAndreas Gohr    {
103*8ed75a23SAndreas Gohr        match ($state) {
104*8ed75a23SAndreas Gohr            DOKU_LEXER_ENTER => $handler->addCall('monospace_open', [], $pos),
105*8ed75a23SAndreas Gohr            DOKU_LEXER_EXIT => $handler->addCall('monospace_close', [], $pos),
106*8ed75a23SAndreas Gohr            DOKU_LEXER_UNMATCHED => $handler->addCall(
107*8ed75a23SAndreas Gohr                'unformatted',
108*8ed75a23SAndreas Gohr                [$this->normalizeBody($match)],
109*8ed75a23SAndreas Gohr                $pos
110*8ed75a23SAndreas Gohr            ),
111*8ed75a23SAndreas Gohr            default => true,
112*8ed75a23SAndreas Gohr        };
113*8ed75a23SAndreas Gohr        return true;
114*8ed75a23SAndreas Gohr    }
115*8ed75a23SAndreas Gohr
116*8ed75a23SAndreas Gohr    /**
117*8ed75a23SAndreas Gohr     * GFM code-span body normalization: newlines become spaces; if both
118*8ed75a23SAndreas Gohr     * ends are spaces and the body isn't entirely whitespace, strip one
119*8ed75a23SAndreas Gohr     * space from each end.
120*8ed75a23SAndreas Gohr     */
121*8ed75a23SAndreas Gohr    protected function normalizeBody(string $body): string
122*8ed75a23SAndreas Gohr    {
123*8ed75a23SAndreas Gohr        $body = str_replace(["\r\n", "\r", "\n"], ' ', $body);
124*8ed75a23SAndreas Gohr        if (strlen($body) >= 2
125*8ed75a23SAndreas Gohr            && $body[0] === ' '
126*8ed75a23SAndreas Gohr            && $body[-1] === ' '
127*8ed75a23SAndreas Gohr            && trim($body) !== ''
128*8ed75a23SAndreas Gohr        ) {
129*8ed75a23SAndreas Gohr            $body = substr($body, 1, -1);
130*8ed75a23SAndreas Gohr        }
131*8ed75a23SAndreas Gohr        return $body;
132*8ed75a23SAndreas Gohr    }
133*8ed75a23SAndreas Gohr}
134