1*8ed75a23SAndreas Gohr<?php 2*8ed75a23SAndreas Gohr 3*8ed75a23SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 4*8ed75a23SAndreas Gohr 5*8ed75a23SAndreas Gohruse dokuwiki\Parsing\Handler; 6*8ed75a23SAndreas Gohr 7*8ed75a23SAndreas Gohr/** 8*8ed75a23SAndreas Gohr * GFM inline code span bounded by single backticks: `text`. 9*8ed75a23SAndreas Gohr * 10*8ed75a23SAndreas Gohr * A backtick span is both monospace-formatted and verbatim: the content 11*8ed75a23SAndreas Gohr * is wrapped in monospace_open / monospace_close (the same instructions 12*8ed75a23SAndreas Gohr * as DokuWiki's doubled-single-quote pair, rendered as an HTML <code> 13*8ed75a23SAndreas Gohr * element) and the body is emitted through the unformatted handler 14*8ed75a23SAndreas Gohr * rather than plain cdata, so renderers that distinguish the two 15*8ed75a23SAndreas Gohr * (metadata, indexer, non-XHTML backends) treat it as literal. 16*8ed75a23SAndreas Gohr * 17*8ed75a23SAndreas Gohr * The entry pattern's lookahead only verifies three things: an opener, 18*8ed75a23SAndreas Gohr * at least one body character, and a valid closer. It does NOT enforce 19*8ed75a23SAndreas Gohr * non-whitespace body edges or a non-whitespace body interior. GFM's 20*8ed75a23SAndreas Gohr * edge rules are applied in handle() after the body has been extracted: 21*8ed75a23SAndreas Gohr * 22*8ed75a23SAndreas Gohr * 1. Line endings become single spaces. 23*8ed75a23SAndreas Gohr * 2. If the body both starts and ends with a space, and is not 24*8ed75a23SAndreas Gohr * entirely whitespace, one space is stripped from each end. 25*8ed75a23SAndreas Gohr * 26*8ed75a23SAndreas Gohr * This lets the regex stay small while still producing GFM-correct 27*8ed75a23SAndreas Gohr * output for the tricky cases: 28*8ed75a23SAndreas Gohr * 29*8ed75a23SAndreas Gohr * ` ` -> <code> </code> (all-whitespace body, no strip) 30*8ed75a23SAndreas Gohr * ` a` -> <code> a</code> (asymmetric edge, no strip) 31*8ed75a23SAndreas Gohr * ` `` ` -> <code>``</code> (run of 2 inside body, strip) 32*8ed75a23SAndreas Gohr * 33*8ed75a23SAndreas Gohr * Runs of two or more backticks on either delimiter are rejected by 34*8ed75a23SAndreas Gohr * the length-boundary guards (?<!`)...(?!`), so this mode never steals 35*8ed75a23SAndreas Gohr * input from GfmBacktickDouble. GfmBacktickDouble extends this class 36*8ed75a23SAndreas Gohr * to reuse handle() and normalizeBody(). 37*8ed75a23SAndreas Gohr * 38*8ed75a23SAndreas Gohr * No other inline parsing runs inside a span; allowedModes is empty. 39*8ed75a23SAndreas Gohr * 40*8ed75a23SAndreas Gohr * @see GfmBacktickDouble 41*8ed75a23SAndreas Gohr */ 42*8ed75a23SAndreas Gohrclass GfmBacktickSingle extends AbstractMode 43*8ed75a23SAndreas Gohr{ 44*8ed75a23SAndreas Gohr public function __construct() 45*8ed75a23SAndreas Gohr { 46*8ed75a23SAndreas Gohr // Content is literal — no nested inline parsing. 47*8ed75a23SAndreas Gohr $this->allowedModes = []; 48*8ed75a23SAndreas Gohr } 49*8ed75a23SAndreas Gohr 50*8ed75a23SAndreas Gohr /** @inheritdoc */ 51*8ed75a23SAndreas Gohr public function getSort() 52*8ed75a23SAndreas Gohr { 53*8ed75a23SAndreas Gohr return 165; 54*8ed75a23SAndreas Gohr } 55*8ed75a23SAndreas Gohr 56*8ed75a23SAndreas Gohr /** The lexer state / mode name. Subclasses override for n≥2. */ 57*8ed75a23SAndreas Gohr protected function getModeName(): string 58*8ed75a23SAndreas Gohr { 59*8ed75a23SAndreas Gohr return 'gfm_backtick_single'; 60*8ed75a23SAndreas Gohr } 61*8ed75a23SAndreas Gohr 62*8ed75a23SAndreas Gohr /** 63*8ed75a23SAndreas Gohr * Entry pattern. The length-boundary guards (?<!`)...(?!`) around 64*8ed75a23SAndreas Gohr * each delimiter ensure a run of two or more backticks is never read 65*8ed75a23SAndreas Gohr * as an n=1 opener or closer. The body character class, which admits 66*8ed75a23SAndreas Gohr * either a non-backtick or a run of two-or-more backticks, lets 67*8ed75a23SAndreas Gohr * those longer runs live inside the body since they cannot be valid 68*8ed75a23SAndreas Gohr * n=1 closers. 69*8ed75a23SAndreas Gohr */ 70*8ed75a23SAndreas Gohr protected function getEntryPattern(): string 71*8ed75a23SAndreas Gohr { 72*8ed75a23SAndreas Gohr return '(?<!`)`(?!`)(?=' 73*8ed75a23SAndreas Gohr . '(?:' . self::NOT_AT_PARA_BREAK . '(?:[^`]|``+))+' 74*8ed75a23SAndreas Gohr . '(?<!`)`(?!`)' 75*8ed75a23SAndreas Gohr . ')'; 76*8ed75a23SAndreas Gohr } 77*8ed75a23SAndreas Gohr 78*8ed75a23SAndreas Gohr /** Exit pattern. Same boundary guards as the entry. */ 79*8ed75a23SAndreas Gohr protected function getExitPattern(): string 80*8ed75a23SAndreas Gohr { 81*8ed75a23SAndreas Gohr return '(?<!`)`(?!`)'; 82*8ed75a23SAndreas Gohr } 83*8ed75a23SAndreas Gohr 84*8ed75a23SAndreas Gohr /** @inheritdoc */ 85*8ed75a23SAndreas Gohr public function connectTo($mode) 86*8ed75a23SAndreas Gohr { 87*8ed75a23SAndreas Gohr $this->Lexer->addEntryPattern( 88*8ed75a23SAndreas Gohr $this->getEntryPattern(), 89*8ed75a23SAndreas Gohr $mode, 90*8ed75a23SAndreas Gohr $this->getModeName() 91*8ed75a23SAndreas Gohr ); 92*8ed75a23SAndreas Gohr } 93*8ed75a23SAndreas Gohr 94*8ed75a23SAndreas Gohr /** @inheritdoc */ 95*8ed75a23SAndreas Gohr public function postConnect() 96*8ed75a23SAndreas Gohr { 97*8ed75a23SAndreas Gohr $this->Lexer->addExitPattern($this->getExitPattern(), $this->getModeName()); 98*8ed75a23SAndreas Gohr } 99*8ed75a23SAndreas Gohr 100*8ed75a23SAndreas Gohr /** @inheritdoc */ 101*8ed75a23SAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 102*8ed75a23SAndreas Gohr { 103*8ed75a23SAndreas Gohr match ($state) { 104*8ed75a23SAndreas Gohr DOKU_LEXER_ENTER => $handler->addCall('monospace_open', [], $pos), 105*8ed75a23SAndreas Gohr DOKU_LEXER_EXIT => $handler->addCall('monospace_close', [], $pos), 106*8ed75a23SAndreas Gohr DOKU_LEXER_UNMATCHED => $handler->addCall( 107*8ed75a23SAndreas Gohr 'unformatted', 108*8ed75a23SAndreas Gohr [$this->normalizeBody($match)], 109*8ed75a23SAndreas Gohr $pos 110*8ed75a23SAndreas Gohr ), 111*8ed75a23SAndreas Gohr default => true, 112*8ed75a23SAndreas Gohr }; 113*8ed75a23SAndreas Gohr return true; 114*8ed75a23SAndreas Gohr } 115*8ed75a23SAndreas Gohr 116*8ed75a23SAndreas Gohr /** 117*8ed75a23SAndreas Gohr * GFM code-span body normalization: newlines become spaces; if both 118*8ed75a23SAndreas Gohr * ends are spaces and the body isn't entirely whitespace, strip one 119*8ed75a23SAndreas Gohr * space from each end. 120*8ed75a23SAndreas Gohr */ 121*8ed75a23SAndreas Gohr protected function normalizeBody(string $body): string 122*8ed75a23SAndreas Gohr { 123*8ed75a23SAndreas Gohr $body = str_replace(["\r\n", "\r", "\n"], ' ', $body); 124*8ed75a23SAndreas Gohr if (strlen($body) >= 2 125*8ed75a23SAndreas Gohr && $body[0] === ' ' 126*8ed75a23SAndreas Gohr && $body[-1] === ' ' 127*8ed75a23SAndreas Gohr && trim($body) !== '' 128*8ed75a23SAndreas Gohr ) { 129*8ed75a23SAndreas Gohr $body = substr($body, 1, -1); 130*8ed75a23SAndreas Gohr } 131*8ed75a23SAndreas Gohr return $body; 132*8ed75a23SAndreas Gohr } 133*8ed75a23SAndreas Gohr} 134