xref: /dokuwiki/inc/Parsing/ParserMode/GfmBacktickSingle.php (revision b73ece99c18919754d993a1d1f5cb27140555705)
1<?php
2
3namespace dokuwiki\Parsing\ParserMode;
4
5use dokuwiki\Parsing\Handler;
6
7/**
8 * GFM inline code span bounded by single backticks: `text`.
9 *
10 * A backtick span is both monospace-formatted and verbatim: the content
11 * is wrapped in monospace_open / monospace_close (the same instructions
12 * as DokuWiki's doubled-single-quote pair, rendered as an HTML <code>
13 * element) and the body is emitted through the unformatted handler
14 * rather than plain cdata, so renderers that distinguish the two
15 * (metadata, indexer, non-XHTML backends) treat it as literal.
16 *
17 * The entry pattern's lookahead only verifies three things: an opener,
18 * at least one body character, and a valid closer. It does NOT enforce
19 * non-whitespace body edges or a non-whitespace body interior. GFM's
20 * edge rules are applied in handle() after the body has been extracted:
21 *
22 *   1. Line endings become single spaces.
23 *   2. If the body both starts and ends with a space, and is not
24 *      entirely whitespace, one space is stripped from each end.
25 *
26 * This lets the regex stay small while still producing GFM-correct
27 * output for the tricky cases:
28 *
29 *   ` `          ->   <code> </code>     (all-whitespace body, no strip)
30 *   ` a`         ->   <code> a</code>    (asymmetric edge, no strip)
31 *   ` `` `       ->   <code>``</code>    (run of 2 inside body, strip)
32 *
33 * Runs of two or more backticks on either delimiter are rejected by
34 * the length-boundary guards (?<!`)...(?!`), so this mode never steals
35 * input from GfmBacktickDouble. GfmBacktickDouble extends this class
36 * to reuse handle() and normalizeBody().
37 *
38 * No other inline parsing runs inside a span; allowedModes is empty.
39 *
40 * @see GfmBacktickDouble
41 */
42class GfmBacktickSingle extends AbstractMode
43{
44    /** @inheritdoc */
45    public function getSort()
46    {
47        return 165;
48    }
49
50    /** The lexer state / mode name. Subclasses override for n≥2. */
51    protected function getModeName(): string
52    {
53        return 'gfm_backtick_single';
54    }
55
56    /**
57     * Entry pattern. The length-boundary guards (?<!`)...(?!`) around
58     * each delimiter ensure a run of two or more backticks is never read
59     * as an n=1 opener or closer. The body character class, which admits
60     * either a non-backtick or a run of two-or-more backticks, lets
61     * those longer runs live inside the body since they cannot be valid
62     * n=1 closers.
63     */
64    protected function getEntryPattern(): string
65    {
66        return '(?<!`)`(?!`)(?='
67            . '(?:' . self::NOT_AT_PARA_BREAK . '(?:[^`]|``+))+'
68            . '(?<!`)`(?!`)'
69            . ')';
70    }
71
72    /** Exit pattern. Same boundary guards as the entry. */
73    protected function getExitPattern(): string
74    {
75        return '(?<!`)`(?!`)';
76    }
77
78    /** @inheritdoc */
79    public function connectTo($mode)
80    {
81        $this->Lexer->addEntryPattern(
82            $this->getEntryPattern(),
83            $mode,
84            $this->getModeName()
85        );
86    }
87
88    /** @inheritdoc */
89    public function postConnect()
90    {
91        $this->Lexer->addExitPattern($this->getExitPattern(), $this->getModeName());
92    }
93
94    /** @inheritdoc */
95    public function handle($match, $state, $pos, Handler $handler)
96    {
97        match ($state) {
98            DOKU_LEXER_ENTER => $handler->addCall('monospace_open', [], $pos),
99            DOKU_LEXER_EXIT => $handler->addCall('monospace_close', [], $pos),
100            DOKU_LEXER_UNMATCHED => $handler->addCall(
101                'unformatted',
102                [$this->normalizeBody($match)],
103                $pos
104            ),
105            default => true,
106        };
107        return true;
108    }
109
110    /**
111     * GFM code-span body normalization: newlines become spaces; if both
112     * ends are spaces and the body isn't entirely whitespace, strip one
113     * space from each end.
114     */
115    protected function normalizeBody(string $body): string
116    {
117        $body = str_replace(["\r\n", "\r", "\n"], ' ', $body);
118        if (
119            strlen($body) >= 2
120            && $body[0] === ' '
121            && $body[-1] === ' '
122            && trim($body) !== ''
123        ) {
124            $body = substr($body, 1, -1);
125        }
126        return $body;
127    }
128}
129