xref: /dokuwiki/inc/Parsing/ParserMode/GfmBacktickSingle.php (revision 8ed75a23932353c18b43f67323808e9a662f532a)
1<?php
2
3namespace dokuwiki\Parsing\ParserMode;
4
5use dokuwiki\Parsing\Handler;
6
7/**
8 * GFM inline code span bounded by single backticks: `text`.
9 *
10 * A backtick span is both monospace-formatted and verbatim: the content
11 * is wrapped in monospace_open / monospace_close (the same instructions
12 * as DokuWiki's doubled-single-quote pair, rendered as an HTML <code>
13 * element) and the body is emitted through the unformatted handler
14 * rather than plain cdata, so renderers that distinguish the two
15 * (metadata, indexer, non-XHTML backends) treat it as literal.
16 *
17 * The entry pattern's lookahead only verifies three things: an opener,
18 * at least one body character, and a valid closer. It does NOT enforce
19 * non-whitespace body edges or a non-whitespace body interior. GFM's
20 * edge rules are applied in handle() after the body has been extracted:
21 *
22 *   1. Line endings become single spaces.
23 *   2. If the body both starts and ends with a space, and is not
24 *      entirely whitespace, one space is stripped from each end.
25 *
26 * This lets the regex stay small while still producing GFM-correct
27 * output for the tricky cases:
28 *
29 *   ` `          ->   <code> </code>     (all-whitespace body, no strip)
30 *   ` a`         ->   <code> a</code>    (asymmetric edge, no strip)
31 *   ` `` `       ->   <code>``</code>    (run of 2 inside body, strip)
32 *
33 * Runs of two or more backticks on either delimiter are rejected by
34 * the length-boundary guards (?<!`)...(?!`), so this mode never steals
35 * input from GfmBacktickDouble. GfmBacktickDouble extends this class
36 * to reuse handle() and normalizeBody().
37 *
38 * No other inline parsing runs inside a span; allowedModes is empty.
39 *
40 * @see GfmBacktickDouble
41 */
42class GfmBacktickSingle extends AbstractMode
43{
44    public function __construct()
45    {
46        // Content is literal — no nested inline parsing.
47        $this->allowedModes = [];
48    }
49
50    /** @inheritdoc */
51    public function getSort()
52    {
53        return 165;
54    }
55
56    /** The lexer state / mode name. Subclasses override for n≥2. */
57    protected function getModeName(): string
58    {
59        return 'gfm_backtick_single';
60    }
61
62    /**
63     * Entry pattern. The length-boundary guards (?<!`)...(?!`) around
64     * each delimiter ensure a run of two or more backticks is never read
65     * as an n=1 opener or closer. The body character class, which admits
66     * either a non-backtick or a run of two-or-more backticks, lets
67     * those longer runs live inside the body since they cannot be valid
68     * n=1 closers.
69     */
70    protected function getEntryPattern(): string
71    {
72        return '(?<!`)`(?!`)(?='
73            . '(?:' . self::NOT_AT_PARA_BREAK . '(?:[^`]|``+))+'
74            . '(?<!`)`(?!`)'
75            . ')';
76    }
77
78    /** Exit pattern. Same boundary guards as the entry. */
79    protected function getExitPattern(): string
80    {
81        return '(?<!`)`(?!`)';
82    }
83
84    /** @inheritdoc */
85    public function connectTo($mode)
86    {
87        $this->Lexer->addEntryPattern(
88            $this->getEntryPattern(),
89            $mode,
90            $this->getModeName()
91        );
92    }
93
94    /** @inheritdoc */
95    public function postConnect()
96    {
97        $this->Lexer->addExitPattern($this->getExitPattern(), $this->getModeName());
98    }
99
100    /** @inheritdoc */
101    public function handle($match, $state, $pos, Handler $handler)
102    {
103        match ($state) {
104            DOKU_LEXER_ENTER => $handler->addCall('monospace_open', [], $pos),
105            DOKU_LEXER_EXIT => $handler->addCall('monospace_close', [], $pos),
106            DOKU_LEXER_UNMATCHED => $handler->addCall(
107                'unformatted',
108                [$this->normalizeBody($match)],
109                $pos
110            ),
111            default => true,
112        };
113        return true;
114    }
115
116    /**
117     * GFM code-span body normalization: newlines become spaces; if both
118     * ends are spaces and the body isn't entirely whitespace, strip one
119     * space from each end.
120     */
121    protected function normalizeBody(string $body): string
122    {
123        $body = str_replace(["\r\n", "\r", "\n"], ' ', $body);
124        if (strlen($body) >= 2
125            && $body[0] === ' '
126            && $body[-1] === ' '
127            && trim($body) !== ''
128        ) {
129            $body = substr($body, 1, -1);
130        }
131        return $body;
132    }
133}
134