18ed75a23SAndreas Gohr<?php 28ed75a23SAndreas Gohr 38ed75a23SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 48ed75a23SAndreas Gohr 58ed75a23SAndreas Gohruse dokuwiki\Parsing\Handler; 68ed75a23SAndreas Gohr 78ed75a23SAndreas Gohr/** 88ed75a23SAndreas Gohr * GFM inline code span bounded by single backticks: `text`. 98ed75a23SAndreas Gohr * 108ed75a23SAndreas Gohr * A backtick span is both monospace-formatted and verbatim: the content 118ed75a23SAndreas Gohr * is wrapped in monospace_open / monospace_close (the same instructions 128ed75a23SAndreas Gohr * as DokuWiki's doubled-single-quote pair, rendered as an HTML <code> 138ed75a23SAndreas Gohr * element) and the body is emitted through the unformatted handler 148ed75a23SAndreas Gohr * rather than plain cdata, so renderers that distinguish the two 158ed75a23SAndreas Gohr * (metadata, indexer, non-XHTML backends) treat it as literal. 168ed75a23SAndreas Gohr * 178ed75a23SAndreas Gohr * The entry pattern's lookahead only verifies three things: an opener, 188ed75a23SAndreas Gohr * at least one body character, and a valid closer. It does NOT enforce 198ed75a23SAndreas Gohr * non-whitespace body edges or a non-whitespace body interior. GFM's 208ed75a23SAndreas Gohr * edge rules are applied in handle() after the body has been extracted: 218ed75a23SAndreas Gohr * 228ed75a23SAndreas Gohr * 1. Line endings become single spaces. 238ed75a23SAndreas Gohr * 2. If the body both starts and ends with a space, and is not 248ed75a23SAndreas Gohr * entirely whitespace, one space is stripped from each end. 258ed75a23SAndreas Gohr * 268ed75a23SAndreas Gohr * This lets the regex stay small while still producing GFM-correct 278ed75a23SAndreas Gohr * output for the tricky cases: 288ed75a23SAndreas Gohr * 298ed75a23SAndreas Gohr * ` ` -> <code> </code> (all-whitespace body, no strip) 308ed75a23SAndreas Gohr * ` a` -> <code> a</code> (asymmetric edge, no strip) 318ed75a23SAndreas Gohr * ` `` ` -> <code>``</code> (run of 2 inside body, strip) 328ed75a23SAndreas Gohr * 338ed75a23SAndreas Gohr * Runs of two or more backticks on either delimiter are rejected by 348ed75a23SAndreas Gohr * the length-boundary guards (?<!`)...(?!`), so this mode never steals 358ed75a23SAndreas Gohr * input from GfmBacktickDouble. GfmBacktickDouble extends this class 368ed75a23SAndreas Gohr * to reuse handle() and normalizeBody(). 378ed75a23SAndreas Gohr * 388ed75a23SAndreas Gohr * No other inline parsing runs inside a span; allowedModes is empty. 398ed75a23SAndreas Gohr * 408ed75a23SAndreas Gohr * @see GfmBacktickDouble 418ed75a23SAndreas Gohr */ 428ed75a23SAndreas Gohrclass GfmBacktickSingle extends AbstractMode 438ed75a23SAndreas Gohr{ 448ed75a23SAndreas Gohr public function __construct() 458ed75a23SAndreas Gohr { 468ed75a23SAndreas Gohr // Content is literal — no nested inline parsing. 478ed75a23SAndreas Gohr $this->allowedModes = []; 488ed75a23SAndreas Gohr } 498ed75a23SAndreas Gohr 508ed75a23SAndreas Gohr /** @inheritdoc */ 518ed75a23SAndreas Gohr public function getSort() 528ed75a23SAndreas Gohr { 538ed75a23SAndreas Gohr return 165; 548ed75a23SAndreas Gohr } 558ed75a23SAndreas Gohr 568ed75a23SAndreas Gohr /** The lexer state / mode name. Subclasses override for n≥2. */ 578ed75a23SAndreas Gohr protected function getModeName(): string 588ed75a23SAndreas Gohr { 598ed75a23SAndreas Gohr return 'gfm_backtick_single'; 608ed75a23SAndreas Gohr } 618ed75a23SAndreas Gohr 628ed75a23SAndreas Gohr /** 638ed75a23SAndreas Gohr * Entry pattern. The length-boundary guards (?<!`)...(?!`) around 648ed75a23SAndreas Gohr * each delimiter ensure a run of two or more backticks is never read 658ed75a23SAndreas Gohr * as an n=1 opener or closer. The body character class, which admits 668ed75a23SAndreas Gohr * either a non-backtick or a run of two-or-more backticks, lets 678ed75a23SAndreas Gohr * those longer runs live inside the body since they cannot be valid 688ed75a23SAndreas Gohr * n=1 closers. 698ed75a23SAndreas Gohr */ 708ed75a23SAndreas Gohr protected function getEntryPattern(): string 718ed75a23SAndreas Gohr { 728ed75a23SAndreas Gohr return '(?<!`)`(?!`)(?=' 738ed75a23SAndreas Gohr . '(?:' . self::NOT_AT_PARA_BREAK . '(?:[^`]|``+))+' 748ed75a23SAndreas Gohr . '(?<!`)`(?!`)' 758ed75a23SAndreas Gohr . ')'; 768ed75a23SAndreas Gohr } 778ed75a23SAndreas Gohr 788ed75a23SAndreas Gohr /** Exit pattern. Same boundary guards as the entry. */ 798ed75a23SAndreas Gohr protected function getExitPattern(): string 808ed75a23SAndreas Gohr { 818ed75a23SAndreas Gohr return '(?<!`)`(?!`)'; 828ed75a23SAndreas Gohr } 838ed75a23SAndreas Gohr 848ed75a23SAndreas Gohr /** @inheritdoc */ 858ed75a23SAndreas Gohr public function connectTo($mode) 868ed75a23SAndreas Gohr { 878ed75a23SAndreas Gohr $this->Lexer->addEntryPattern( 888ed75a23SAndreas Gohr $this->getEntryPattern(), 898ed75a23SAndreas Gohr $mode, 908ed75a23SAndreas Gohr $this->getModeName() 918ed75a23SAndreas Gohr ); 928ed75a23SAndreas Gohr } 938ed75a23SAndreas Gohr 948ed75a23SAndreas Gohr /** @inheritdoc */ 958ed75a23SAndreas Gohr public function postConnect() 968ed75a23SAndreas Gohr { 978ed75a23SAndreas Gohr $this->Lexer->addExitPattern($this->getExitPattern(), $this->getModeName()); 988ed75a23SAndreas Gohr } 998ed75a23SAndreas Gohr 1008ed75a23SAndreas Gohr /** @inheritdoc */ 1018ed75a23SAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 1028ed75a23SAndreas Gohr { 1038ed75a23SAndreas Gohr match ($state) { 1048ed75a23SAndreas Gohr DOKU_LEXER_ENTER => $handler->addCall('monospace_open', [], $pos), 1058ed75a23SAndreas Gohr DOKU_LEXER_EXIT => $handler->addCall('monospace_close', [], $pos), 1068ed75a23SAndreas Gohr DOKU_LEXER_UNMATCHED => $handler->addCall( 1078ed75a23SAndreas Gohr 'unformatted', 1088ed75a23SAndreas Gohr [$this->normalizeBody($match)], 1098ed75a23SAndreas Gohr $pos 1108ed75a23SAndreas Gohr ), 1118ed75a23SAndreas Gohr default => true, 1128ed75a23SAndreas Gohr }; 1138ed75a23SAndreas Gohr return true; 1148ed75a23SAndreas Gohr } 1158ed75a23SAndreas Gohr 1168ed75a23SAndreas Gohr /** 1178ed75a23SAndreas Gohr * GFM code-span body normalization: newlines become spaces; if both 1188ed75a23SAndreas Gohr * ends are spaces and the body isn't entirely whitespace, strip one 1198ed75a23SAndreas Gohr * space from each end. 1208ed75a23SAndreas Gohr */ 1218ed75a23SAndreas Gohr protected function normalizeBody(string $body): string 1228ed75a23SAndreas Gohr { 1238ed75a23SAndreas Gohr $body = str_replace(["\r\n", "\r", "\n"], ' ', $body); 124*e7dae73bSAndreas Gohr if ( 125*e7dae73bSAndreas Gohr strlen($body) >= 2 1268ed75a23SAndreas Gohr && $body[0] === ' ' 1278ed75a23SAndreas Gohr && $body[-1] === ' ' 1288ed75a23SAndreas Gohr && trim($body) !== '' 1298ed75a23SAndreas Gohr ) { 1308ed75a23SAndreas Gohr $body = substr($body, 1, -1); 1318ed75a23SAndreas Gohr } 1328ed75a23SAndreas Gohr return $body; 1338ed75a23SAndreas Gohr } 1348ed75a23SAndreas Gohr} 135