xref: /dokuwiki/inc/Parsing/ParserMode/GfmBacktickSingle.php (revision e7dae73bcd947f44c901faaac9dd45de67633a3b)
18ed75a23SAndreas Gohr<?php
28ed75a23SAndreas Gohr
38ed75a23SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode;
48ed75a23SAndreas Gohr
58ed75a23SAndreas Gohruse dokuwiki\Parsing\Handler;
68ed75a23SAndreas Gohr
78ed75a23SAndreas Gohr/**
88ed75a23SAndreas Gohr * GFM inline code span bounded by single backticks: `text`.
98ed75a23SAndreas Gohr *
108ed75a23SAndreas Gohr * A backtick span is both monospace-formatted and verbatim: the content
118ed75a23SAndreas Gohr * is wrapped in monospace_open / monospace_close (the same instructions
128ed75a23SAndreas Gohr * as DokuWiki's doubled-single-quote pair, rendered as an HTML <code>
138ed75a23SAndreas Gohr * element) and the body is emitted through the unformatted handler
148ed75a23SAndreas Gohr * rather than plain cdata, so renderers that distinguish the two
158ed75a23SAndreas Gohr * (metadata, indexer, non-XHTML backends) treat it as literal.
168ed75a23SAndreas Gohr *
178ed75a23SAndreas Gohr * The entry pattern's lookahead only verifies three things: an opener,
188ed75a23SAndreas Gohr * at least one body character, and a valid closer. It does NOT enforce
198ed75a23SAndreas Gohr * non-whitespace body edges or a non-whitespace body interior. GFM's
208ed75a23SAndreas Gohr * edge rules are applied in handle() after the body has been extracted:
218ed75a23SAndreas Gohr *
228ed75a23SAndreas Gohr *   1. Line endings become single spaces.
238ed75a23SAndreas Gohr *   2. If the body both starts and ends with a space, and is not
248ed75a23SAndreas Gohr *      entirely whitespace, one space is stripped from each end.
258ed75a23SAndreas Gohr *
268ed75a23SAndreas Gohr * This lets the regex stay small while still producing GFM-correct
278ed75a23SAndreas Gohr * output for the tricky cases:
288ed75a23SAndreas Gohr *
298ed75a23SAndreas Gohr *   ` `          ->   <code> </code>     (all-whitespace body, no strip)
308ed75a23SAndreas Gohr *   ` a`         ->   <code> a</code>    (asymmetric edge, no strip)
318ed75a23SAndreas Gohr *   ` `` `       ->   <code>``</code>    (run of 2 inside body, strip)
328ed75a23SAndreas Gohr *
338ed75a23SAndreas Gohr * Runs of two or more backticks on either delimiter are rejected by
348ed75a23SAndreas Gohr * the length-boundary guards (?<!`)...(?!`), so this mode never steals
358ed75a23SAndreas Gohr * input from GfmBacktickDouble. GfmBacktickDouble extends this class
368ed75a23SAndreas Gohr * to reuse handle() and normalizeBody().
378ed75a23SAndreas Gohr *
388ed75a23SAndreas Gohr * No other inline parsing runs inside a span; allowedModes is empty.
398ed75a23SAndreas Gohr *
408ed75a23SAndreas Gohr * @see GfmBacktickDouble
418ed75a23SAndreas Gohr */
428ed75a23SAndreas Gohrclass GfmBacktickSingle extends AbstractMode
438ed75a23SAndreas Gohr{
448ed75a23SAndreas Gohr    public function __construct()
458ed75a23SAndreas Gohr    {
468ed75a23SAndreas Gohr        // Content is literal — no nested inline parsing.
478ed75a23SAndreas Gohr        $this->allowedModes = [];
488ed75a23SAndreas Gohr    }
498ed75a23SAndreas Gohr
508ed75a23SAndreas Gohr    /** @inheritdoc */
518ed75a23SAndreas Gohr    public function getSort()
528ed75a23SAndreas Gohr    {
538ed75a23SAndreas Gohr        return 165;
548ed75a23SAndreas Gohr    }
558ed75a23SAndreas Gohr
568ed75a23SAndreas Gohr    /** The lexer state / mode name. Subclasses override for n≥2. */
578ed75a23SAndreas Gohr    protected function getModeName(): string
588ed75a23SAndreas Gohr    {
598ed75a23SAndreas Gohr        return 'gfm_backtick_single';
608ed75a23SAndreas Gohr    }
618ed75a23SAndreas Gohr
628ed75a23SAndreas Gohr    /**
638ed75a23SAndreas Gohr     * Entry pattern. The length-boundary guards (?<!`)...(?!`) around
648ed75a23SAndreas Gohr     * each delimiter ensure a run of two or more backticks is never read
658ed75a23SAndreas Gohr     * as an n=1 opener or closer. The body character class, which admits
668ed75a23SAndreas Gohr     * either a non-backtick or a run of two-or-more backticks, lets
678ed75a23SAndreas Gohr     * those longer runs live inside the body since they cannot be valid
688ed75a23SAndreas Gohr     * n=1 closers.
698ed75a23SAndreas Gohr     */
708ed75a23SAndreas Gohr    protected function getEntryPattern(): string
718ed75a23SAndreas Gohr    {
728ed75a23SAndreas Gohr        return '(?<!`)`(?!`)(?='
738ed75a23SAndreas Gohr            . '(?:' . self::NOT_AT_PARA_BREAK . '(?:[^`]|``+))+'
748ed75a23SAndreas Gohr            . '(?<!`)`(?!`)'
758ed75a23SAndreas Gohr            . ')';
768ed75a23SAndreas Gohr    }
778ed75a23SAndreas Gohr
788ed75a23SAndreas Gohr    /** Exit pattern. Same boundary guards as the entry. */
798ed75a23SAndreas Gohr    protected function getExitPattern(): string
808ed75a23SAndreas Gohr    {
818ed75a23SAndreas Gohr        return '(?<!`)`(?!`)';
828ed75a23SAndreas Gohr    }
838ed75a23SAndreas Gohr
848ed75a23SAndreas Gohr    /** @inheritdoc */
858ed75a23SAndreas Gohr    public function connectTo($mode)
868ed75a23SAndreas Gohr    {
878ed75a23SAndreas Gohr        $this->Lexer->addEntryPattern(
888ed75a23SAndreas Gohr            $this->getEntryPattern(),
898ed75a23SAndreas Gohr            $mode,
908ed75a23SAndreas Gohr            $this->getModeName()
918ed75a23SAndreas Gohr        );
928ed75a23SAndreas Gohr    }
938ed75a23SAndreas Gohr
948ed75a23SAndreas Gohr    /** @inheritdoc */
958ed75a23SAndreas Gohr    public function postConnect()
968ed75a23SAndreas Gohr    {
978ed75a23SAndreas Gohr        $this->Lexer->addExitPattern($this->getExitPattern(), $this->getModeName());
988ed75a23SAndreas Gohr    }
998ed75a23SAndreas Gohr
1008ed75a23SAndreas Gohr    /** @inheritdoc */
1018ed75a23SAndreas Gohr    public function handle($match, $state, $pos, Handler $handler)
1028ed75a23SAndreas Gohr    {
1038ed75a23SAndreas Gohr        match ($state) {
1048ed75a23SAndreas Gohr            DOKU_LEXER_ENTER => $handler->addCall('monospace_open', [], $pos),
1058ed75a23SAndreas Gohr            DOKU_LEXER_EXIT => $handler->addCall('monospace_close', [], $pos),
1068ed75a23SAndreas Gohr            DOKU_LEXER_UNMATCHED => $handler->addCall(
1078ed75a23SAndreas Gohr                'unformatted',
1088ed75a23SAndreas Gohr                [$this->normalizeBody($match)],
1098ed75a23SAndreas Gohr                $pos
1108ed75a23SAndreas Gohr            ),
1118ed75a23SAndreas Gohr            default => true,
1128ed75a23SAndreas Gohr        };
1138ed75a23SAndreas Gohr        return true;
1148ed75a23SAndreas Gohr    }
1158ed75a23SAndreas Gohr
1168ed75a23SAndreas Gohr    /**
1178ed75a23SAndreas Gohr     * GFM code-span body normalization: newlines become spaces; if both
1188ed75a23SAndreas Gohr     * ends are spaces and the body isn't entirely whitespace, strip one
1198ed75a23SAndreas Gohr     * space from each end.
1208ed75a23SAndreas Gohr     */
1218ed75a23SAndreas Gohr    protected function normalizeBody(string $body): string
1228ed75a23SAndreas Gohr    {
1238ed75a23SAndreas Gohr        $body = str_replace(["\r\n", "\r", "\n"], ' ', $body);
124*e7dae73bSAndreas Gohr        if (
125*e7dae73bSAndreas Gohr            strlen($body) >= 2
1268ed75a23SAndreas Gohr            && $body[0] === ' '
1278ed75a23SAndreas Gohr            && $body[-1] === ' '
1288ed75a23SAndreas Gohr            && trim($body) !== ''
1298ed75a23SAndreas Gohr        ) {
1308ed75a23SAndreas Gohr            $body = substr($body, 1, -1);
1318ed75a23SAndreas Gohr        }
1328ed75a23SAndreas Gohr        return $body;
1338ed75a23SAndreas Gohr    }
1348ed75a23SAndreas Gohr}
135