18ed75a23SAndreas Gohr<?php 28ed75a23SAndreas Gohr 38ed75a23SAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 48ed75a23SAndreas Gohr 58ed75a23SAndreas Gohruse dokuwiki\Parsing\Handler; 68ed75a23SAndreas Gohr 78ed75a23SAndreas Gohr/** 88ed75a23SAndreas Gohr * GFM inline code span bounded by single backticks: `text`. 98ed75a23SAndreas Gohr * 108ed75a23SAndreas Gohr * A backtick span is both monospace-formatted and verbatim: the content 118ed75a23SAndreas Gohr * is wrapped in monospace_open / monospace_close (the same instructions 128ed75a23SAndreas Gohr * as DokuWiki's doubled-single-quote pair, rendered as an HTML <code> 138ed75a23SAndreas Gohr * element) and the body is emitted through the unformatted handler 148ed75a23SAndreas Gohr * rather than plain cdata, so renderers that distinguish the two 158ed75a23SAndreas Gohr * (metadata, indexer, non-XHTML backends) treat it as literal. 168ed75a23SAndreas Gohr * 178ed75a23SAndreas Gohr * The entry pattern's lookahead only verifies three things: an opener, 188ed75a23SAndreas Gohr * at least one body character, and a valid closer. It does NOT enforce 198ed75a23SAndreas Gohr * non-whitespace body edges or a non-whitespace body interior. GFM's 208ed75a23SAndreas Gohr * edge rules are applied in handle() after the body has been extracted: 218ed75a23SAndreas Gohr * 228ed75a23SAndreas Gohr * 1. Line endings become single spaces. 238ed75a23SAndreas Gohr * 2. If the body both starts and ends with a space, and is not 248ed75a23SAndreas Gohr * entirely whitespace, one space is stripped from each end. 258ed75a23SAndreas Gohr * 268ed75a23SAndreas Gohr * This lets the regex stay small while still producing GFM-correct 278ed75a23SAndreas Gohr * output for the tricky cases: 288ed75a23SAndreas Gohr * 298ed75a23SAndreas Gohr * ` ` -> <code> </code> (all-whitespace body, no strip) 308ed75a23SAndreas Gohr * ` a` -> <code> a</code> (asymmetric edge, no strip) 318ed75a23SAndreas Gohr * ` `` ` -> <code>``</code> (run of 2 inside body, strip) 328ed75a23SAndreas Gohr * 338ed75a23SAndreas Gohr * Runs of two or more backticks on either delimiter are rejected by 348ed75a23SAndreas Gohr * the length-boundary guards (?<!`)...(?!`), so this mode never steals 358ed75a23SAndreas Gohr * input from GfmBacktickDouble. GfmBacktickDouble extends this class 368ed75a23SAndreas Gohr * to reuse handle() and normalizeBody(). 378ed75a23SAndreas Gohr * 388ed75a23SAndreas Gohr * No other inline parsing runs inside a span; allowedModes is empty. 398ed75a23SAndreas Gohr * 408ed75a23SAndreas Gohr * @see GfmBacktickDouble 418ed75a23SAndreas Gohr */ 428ed75a23SAndreas Gohrclass GfmBacktickSingle extends AbstractMode 438ed75a23SAndreas Gohr{ 448ed75a23SAndreas Gohr /** @inheritdoc */ 458ed75a23SAndreas Gohr public function getSort() 468ed75a23SAndreas Gohr { 478ed75a23SAndreas Gohr return 165; 488ed75a23SAndreas Gohr } 498ed75a23SAndreas Gohr 508ed75a23SAndreas Gohr /** The lexer state / mode name. Subclasses override for n≥2. */ 518ed75a23SAndreas Gohr protected function getModeName(): string 528ed75a23SAndreas Gohr { 538ed75a23SAndreas Gohr return 'gfm_backtick_single'; 548ed75a23SAndreas Gohr } 558ed75a23SAndreas Gohr 568ed75a23SAndreas Gohr /** 578ed75a23SAndreas Gohr * Entry pattern. The length-boundary guards (?<!`)...(?!`) around 588ed75a23SAndreas Gohr * each delimiter ensure a run of two or more backticks is never read 598ed75a23SAndreas Gohr * as an n=1 opener or closer. The body character class, which admits 608ed75a23SAndreas Gohr * either a non-backtick or a run of two-or-more backticks, lets 618ed75a23SAndreas Gohr * those longer runs live inside the body since they cannot be valid 628ed75a23SAndreas Gohr * n=1 closers. 638ed75a23SAndreas Gohr */ 648ed75a23SAndreas Gohr protected function getEntryPattern(): string 658ed75a23SAndreas Gohr { 668ed75a23SAndreas Gohr return '(?<!`)`(?!`)(?=' 678ed75a23SAndreas Gohr . '(?:' . self::NOT_AT_PARA_BREAK . '(?:[^`]|``+))+' 688ed75a23SAndreas Gohr . '(?<!`)`(?!`)' 698ed75a23SAndreas Gohr . ')'; 708ed75a23SAndreas Gohr } 718ed75a23SAndreas Gohr 728ed75a23SAndreas Gohr /** Exit pattern. Same boundary guards as the entry. */ 738ed75a23SAndreas Gohr protected function getExitPattern(): string 748ed75a23SAndreas Gohr { 758ed75a23SAndreas Gohr return '(?<!`)`(?!`)'; 768ed75a23SAndreas Gohr } 778ed75a23SAndreas Gohr 788ed75a23SAndreas Gohr /** @inheritdoc */ 798ed75a23SAndreas Gohr public function connectTo($mode) 808ed75a23SAndreas Gohr { 818ed75a23SAndreas Gohr $this->Lexer->addEntryPattern( 828ed75a23SAndreas Gohr $this->getEntryPattern(), 838ed75a23SAndreas Gohr $mode, 848ed75a23SAndreas Gohr $this->getModeName() 858ed75a23SAndreas Gohr ); 868ed75a23SAndreas Gohr } 878ed75a23SAndreas Gohr 888ed75a23SAndreas Gohr /** @inheritdoc */ 898ed75a23SAndreas Gohr public function postConnect() 908ed75a23SAndreas Gohr { 918ed75a23SAndreas Gohr $this->Lexer->addExitPattern($this->getExitPattern(), $this->getModeName()); 928ed75a23SAndreas Gohr } 938ed75a23SAndreas Gohr 948ed75a23SAndreas Gohr /** @inheritdoc */ 958ed75a23SAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 968ed75a23SAndreas Gohr { 978ed75a23SAndreas Gohr match ($state) { 988ed75a23SAndreas Gohr DOKU_LEXER_ENTER => $handler->addCall('monospace_open', [], $pos), 998ed75a23SAndreas Gohr DOKU_LEXER_EXIT => $handler->addCall('monospace_close', [], $pos), 1008ed75a23SAndreas Gohr DOKU_LEXER_UNMATCHED => $handler->addCall( 1018ed75a23SAndreas Gohr 'unformatted', 1028ed75a23SAndreas Gohr [$this->normalizeBody($match)], 1038ed75a23SAndreas Gohr $pos 1048ed75a23SAndreas Gohr ), 1058ed75a23SAndreas Gohr default => true, 1068ed75a23SAndreas Gohr }; 1078ed75a23SAndreas Gohr return true; 1088ed75a23SAndreas Gohr } 1098ed75a23SAndreas Gohr 1108ed75a23SAndreas Gohr /** 1118ed75a23SAndreas Gohr * GFM code-span body normalization: newlines become spaces; if both 1128ed75a23SAndreas Gohr * ends are spaces and the body isn't entirely whitespace, strip one 1138ed75a23SAndreas Gohr * space from each end. 1148ed75a23SAndreas Gohr */ 1158ed75a23SAndreas Gohr protected function normalizeBody(string $body): string 1168ed75a23SAndreas Gohr { 1178ed75a23SAndreas Gohr $body = str_replace(["\r\n", "\r", "\n"], ' ', $body); 118*e7dae73bSAndreas Gohr if ( 119*e7dae73bSAndreas Gohr strlen($body) >= 2 1208ed75a23SAndreas Gohr && $body[0] === ' ' 1218ed75a23SAndreas Gohr && $body[-1] === ' ' 1228ed75a23SAndreas Gohr && trim($body) !== '' 1238ed75a23SAndreas Gohr ) { 1248ed75a23SAndreas Gohr $body = substr($body, 1, -1); 1258ed75a23SAndreas Gohr } 1268ed75a23SAndreas Gohr return $body; 1278ed75a23SAndreas Gohr } 1288ed75a23SAndreas Gohr} 129