1*b1c59bedSAndreas Gohr<?php 2*b1c59bedSAndreas Gohr 3*b1c59bedSAndreas Gohrnamespace dokuwiki\Parsing\ParserMode; 4*b1c59bedSAndreas Gohr 5*b1c59bedSAndreas Gohruse dokuwiki\Parsing\Handler; 6*b1c59bedSAndreas Gohruse dokuwiki\Parsing\Helpers; 7*b1c59bedSAndreas Gohr 8*b1c59bedSAndreas Gohr/** 9*b1c59bedSAndreas Gohr * GFM fenced code block with backtick fences: ```...``` 10*b1c59bedSAndreas Gohr * 11*b1c59bedSAndreas Gohr * Emits the same `code` handler instruction DokuWiki's `<code>` mode 12*b1c59bedSAndreas Gohr * emits, so renderers, indexing, and syntax highlighting reuse the 13*b1c59bedSAndreas Gohr * existing pipeline. 14*b1c59bedSAndreas Gohr * 15*b1c59bedSAndreas Gohr * The info string after the opening fence accepts DokuWiki's full 16*b1c59bedSAndreas Gohr * code-tag attribute vocabulary — language, optional filename, and 17*b1c59bedSAndreas Gohr * optional [key=value,...] highlight options — parsed via 18*b1c59bedSAndreas Gohr * Helpers::parseCodeAttributes. Markdown authors pasting to GitHub 19*b1c59bedSAndreas Gohr * will see the extras render as part of the language class; the 20*b1c59bedSAndreas Gohr * divergence is intentional, for feature parity with DokuWiki's 21*b1c59bedSAndreas Gohr * <code>...</code> blocks. 22*b1c59bedSAndreas Gohr * 23*b1c59bedSAndreas Gohr * Column-0 fences only (no indent tolerance, no body dedent). The close 24*b1c59bedSAndreas Gohr * fence is any run of 3+ fence chars at column 0 with only trailing 25*b1c59bedSAndreas Gohr * whitespace on the line — the opener's length is not paired with the 26*b1c59bedSAndreas Gohr * closer's, because ParallelRegex does not support backreferences. 27*b1c59bedSAndreas Gohr * 28*b1c59bedSAndreas Gohr * Unclosed fences stay literal text. GFM's spec says an unclosed fence 29*b1c59bedSAndreas Gohr * runs to end of input (and any enclosing container's end), but that 30*b1c59bedSAndreas Gohr * rule is part of CommonMark's two-pass block-then-inline parser where 31*b1c59bedSAndreas Gohr * "any container boundary closes" is the uniform termination rule. Our 32*b1c59bedSAndreas Gohr * single-pass regex lexer has no notion of container boundaries, so the 33*b1c59bedSAndreas Gohr * best we could do is "close at EOF" — a partial implementation that 34*b1c59bedSAndreas Gohr * already leaks (spec example 98, fence inside a blockquote, stays red 35*b1c59bedSAndreas Gohr * because we can't close at the blockquote boundary). Doing a degraded 36*b1c59bedSAndreas Gohr * version of the rule just moves the broken edge case somewhere less 37*b1c59bedSAndreas Gohr * obvious. 38*b1c59bedSAndreas Gohr * 39*b1c59bedSAndreas Gohr * Requiring a closer is also consistent with every other inline GFM 40*b1c59bedSAndreas Gohr * mode in this codebase (all of which use entry-pattern lookaheads to 41*b1c59bedSAndreas Gohr * verify a matching closer exists) and with DokuWiki's own <code> tag 42*b1c59bedSAndreas Gohr * parsing (<code\b(?=.*</code>)>). And it has a safer failure mode: a 43*b1c59bedSAndreas Gohr * stray ``` at the top of a document stays as literal text rather than 44*b1c59bedSAndreas Gohr * swallowing everything below it into a code block. Spec examples 96 45*b1c59bedSAndreas Gohr * and 97 are in skip.php with this rationale. 46*b1c59bedSAndreas Gohr * 47*b1c59bedSAndreas Gohr * @see GfmFile 48*b1c59bedSAndreas Gohr */ 49*b1c59bedSAndreas Gohrclass GfmCode extends AbstractMode 50*b1c59bedSAndreas Gohr{ 51*b1c59bedSAndreas Gohr /** @var string The call type used in addCall ('code' or 'file') */ 52*b1c59bedSAndreas Gohr protected $type = 'code'; 53*b1c59bedSAndreas Gohr 54*b1c59bedSAndreas Gohr /** @var string The fence character (`` ` `` or `~`). */ 55*b1c59bedSAndreas Gohr protected $fenceChar = '`'; 56*b1c59bedSAndreas Gohr 57*b1c59bedSAndreas Gohr /** 58*b1c59bedSAndreas Gohr * Info-string character class. Backtick fences forbid backticks in 59*b1c59bedSAndreas Gohr * the info string (spec example 115); tilde fences allow anything 60*b1c59bedSAndreas Gohr * except newline (spec example 116). 61*b1c59bedSAndreas Gohr */ 62*b1c59bedSAndreas Gohr protected $infoClass = '[^\n`]*'; 63*b1c59bedSAndreas Gohr 64*b1c59bedSAndreas Gohr public function __construct() 65*b1c59bedSAndreas Gohr { 66*b1c59bedSAndreas Gohr $this->allowedModes = []; 67*b1c59bedSAndreas Gohr } 68*b1c59bedSAndreas Gohr 69*b1c59bedSAndreas Gohr /** @inheritdoc */ 70*b1c59bedSAndreas Gohr public function getSort() 71*b1c59bedSAndreas Gohr { 72*b1c59bedSAndreas Gohr return 200; 73*b1c59bedSAndreas Gohr } 74*b1c59bedSAndreas Gohr 75*b1c59bedSAndreas Gohr /** The lexer state / mode name. Subclasses override for tildes. */ 76*b1c59bedSAndreas Gohr protected function getModeName(): string 77*b1c59bedSAndreas Gohr { 78*b1c59bedSAndreas Gohr return 'gfm_code'; 79*b1c59bedSAndreas Gohr } 80*b1c59bedSAndreas Gohr 81*b1c59bedSAndreas Gohr /** @inheritdoc */ 82*b1c59bedSAndreas Gohr public function connectTo($mode) 83*b1c59bedSAndreas Gohr { 84*b1c59bedSAndreas Gohr // Entry pattern breakdown (F = fence char, INFO = info-string class): 85*b1c59bedSAndreas Gohr // \n — line start (Parser prepends a newline) 86*b1c59bedSAndreas Gohr // F{3,} — opener: 3+ fence chars at column 0 87*b1c59bedSAndreas Gohr // INFO — info-string (language etc.) 88*b1c59bedSAndreas Gohr // (?=\n) — opener line must end at a newline; 89*b1c59bedSAndreas Gohr // without this anchor `` ``` aa ``` `` 90*b1c59bedSAndreas Gohr // on one line would parse as a fence 91*b1c59bedSAndreas Gohr // (?:(?!CLOSE).)* — body: any char (DOTALL) that isn't 92*b1c59bedSAndreas Gohr // the start of a close-fence line 93*b1c59bedSAndreas Gohr // CLOSE = \nF{3,}[ \t]*(?=\n) — close fence, required. 94*b1c59bedSAndreas Gohr // No `\z` fallback: unclosed fences stay 95*b1c59bedSAndreas Gohr // literal (see class docblock) 96*b1c59bedSAndreas Gohr $close = '\n' . $this->fenceChar . '{3,}[ \t]*(?=\n)'; 97*b1c59bedSAndreas Gohr $this->Lexer->addSpecialPattern( 98*b1c59bedSAndreas Gohr '\n' . $this->fenceChar . '{3,}' . $this->infoClass . '(?=\n)' 99*b1c59bedSAndreas Gohr . '(?:(?!' . $close . ').)*' . $close, 100*b1c59bedSAndreas Gohr $mode, 101*b1c59bedSAndreas Gohr $this->getModeName() 102*b1c59bedSAndreas Gohr ); 103*b1c59bedSAndreas Gohr } 104*b1c59bedSAndreas Gohr 105*b1c59bedSAndreas Gohr /** @inheritdoc */ 106*b1c59bedSAndreas Gohr public function handle($match, $state, $pos, Handler $handler) 107*b1c59bedSAndreas Gohr { 108*b1c59bedSAndreas Gohr $c = $this->fenceChar; 109*b1c59bedSAndreas Gohr 110*b1c59bedSAndreas Gohr // Shed the pattern's leading \n, the opener fence run, and the 111*b1c59bedSAndreas Gohr // close-fence run with its trailing whitespace. 112*b1c59bedSAndreas Gohr $text = rtrim(ltrim(substr($match, 1), $c), " \t" . $c); 113*b1c59bedSAndreas Gohr 114*b1c59bedSAndreas Gohr // The opener ended at a newline (required by the pattern's `(?=\n)` 115*b1c59bedSAndreas Gohr // anchor), so an explode split always has two parts. 116*b1c59bedSAndreas Gohr [$info, $body] = explode("\n", $text, 2); 117*b1c59bedSAndreas Gohr 118*b1c59bedSAndreas Gohr [$language, $filename, $options] = Helpers::parseCodeAttributes($info); 119*b1c59bedSAndreas Gohr 120*b1c59bedSAndreas Gohr $param = [$body, $language, $filename]; 121*b1c59bedSAndreas Gohr if ($options !== null) $param[] = $options; 122*b1c59bedSAndreas Gohr $handler->addCall($this->type, $param, $pos); 123*b1c59bedSAndreas Gohr return true; 124*b1c59bedSAndreas Gohr } 125*b1c59bedSAndreas Gohr} 126