1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6 7/** 8 * GFM inline code span bounded by single backticks: `text`. 9 * 10 * A backtick span is both monospace-formatted and verbatim: the content 11 * is wrapped in monospace_open / monospace_close (the same instructions 12 * as DokuWiki's doubled-single-quote pair, rendered as an HTML <code> 13 * element) and the body is emitted through the unformatted handler 14 * rather than plain cdata, so renderers that distinguish the two 15 * (metadata, indexer, non-XHTML backends) treat it as literal. 16 * 17 * The entry pattern's lookahead only verifies three things: an opener, 18 * at least one body character, and a valid closer. It does NOT enforce 19 * non-whitespace body edges or a non-whitespace body interior. GFM's 20 * edge rules are applied in handle() after the body has been extracted: 21 * 22 * 1. Line endings become single spaces. 23 * 2. If the body both starts and ends with a space, and is not 24 * entirely whitespace, one space is stripped from each end. 25 * 26 * This lets the regex stay small while still producing GFM-correct 27 * output for the tricky cases: 28 * 29 * ` ` -> <code> </code> (all-whitespace body, no strip) 30 * ` a` -> <code> a</code> (asymmetric edge, no strip) 31 * ` `` ` -> <code>``</code> (run of 2 inside body, strip) 32 * 33 * Runs of two or more backticks on either delimiter are rejected by 34 * the length-boundary guards (?<!`)...(?!`), so this mode never steals 35 * input from GfmBacktickDouble. GfmBacktickDouble extends this class 36 * to reuse handle() and normalizeBody(). 37 * 38 * No other inline parsing runs inside a span; allowedModes is empty. 39 * 40 * @see GfmBacktickDouble 41 */ 42class GfmBacktickSingle extends AbstractMode 43{ 44 /** @inheritdoc */ 45 public function getSort() 46 { 47 return 165; 48 } 49 50 /** The lexer state / mode name. Subclasses override for n≥2. */ 51 protected function getModeName(): string 52 { 53 return 'gfm_backtick_single'; 54 } 55 56 /** 57 * Entry pattern. The length-boundary guards (?<!`)...(?!`) around 58 * each delimiter ensure a run of two or more backticks is never read 59 * as an n=1 opener or closer. The body character class, which admits 60 * either a non-backtick or a run of two-or-more backticks, lets 61 * those longer runs live inside the body since they cannot be valid 62 * n=1 closers. 63 */ 64 protected function getEntryPattern(): string 65 { 66 return '(?<!`)`(?!`)(?=' 67 . '(?:' . self::NOT_AT_PARA_BREAK . '(?:[^`]|``+))+' 68 . '(?<!`)`(?!`)' 69 . ')'; 70 } 71 72 /** Exit pattern. Same boundary guards as the entry. */ 73 protected function getExitPattern(): string 74 { 75 return '(?<!`)`(?!`)'; 76 } 77 78 /** @inheritdoc */ 79 public function connectTo($mode) 80 { 81 $this->Lexer->addEntryPattern( 82 $this->getEntryPattern(), 83 $mode, 84 $this->getModeName() 85 ); 86 } 87 88 /** @inheritdoc */ 89 public function postConnect() 90 { 91 $this->Lexer->addExitPattern($this->getExitPattern(), $this->getModeName()); 92 } 93 94 /** @inheritdoc */ 95 public function handle($match, $state, $pos, Handler $handler) 96 { 97 match ($state) { 98 DOKU_LEXER_ENTER => $handler->addCall('monospace_open', [], $pos), 99 DOKU_LEXER_EXIT => $handler->addCall('monospace_close', [], $pos), 100 DOKU_LEXER_UNMATCHED => $handler->addCall( 101 'unformatted', 102 [$this->normalizeBody($match)], 103 $pos 104 ), 105 default => true, 106 }; 107 return true; 108 } 109 110 /** 111 * GFM code-span body normalization: newlines become spaces; if both 112 * ends are spaces and the body isn't entirely whitespace, strip one 113 * space from each end. 114 */ 115 protected function normalizeBody(string $body): string 116 { 117 $body = str_replace(["\r\n", "\r", "\n"], ' ', $body); 118 if ( 119 strlen($body) >= 2 120 && $body[0] === ' ' 121 && $body[-1] === ' ' 122 && trim($body) !== '' 123 ) { 124 $body = substr($body, 1, -1); 125 } 126 return $body; 127 } 128} 129