1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6use dokuwiki\Parsing\Helpers\Code as CodeHelper; 7use dokuwiki\Parsing\Helpers\Escape; 8 9/** 10 * GFM fenced code block with backtick fences: ```...``` 11 * 12 * Emits the same `code` handler instruction DokuWiki's `<code>` mode 13 * emits, so renderers, indexing, and syntax highlighting reuse the 14 * existing pipeline. 15 * 16 * The info string after the opening fence accepts DokuWiki's full 17 * code-tag attribute vocabulary — language, optional filename, and 18 * optional [key=value,...] highlight options — parsed via 19 * Helpers\Code::parseAttributes. Markdown authors pasting to GitHub 20 * will see the extras render as part of the language class; the 21 * divergence is intentional, for feature parity with DokuWiki's 22 * <code>...</code> blocks. 23 * 24 * Column-0 fences only (no indent tolerance, no body dedent). The close 25 * fence is any run of 3+ fence chars at column 0 with only trailing 26 * whitespace on the line — the opener's length is not paired with the 27 * closer's, because ParallelRegex does not support backreferences. 28 * 29 * Unclosed fences stay literal text. GFM's spec says an unclosed fence 30 * runs to end of input (and any enclosing container's end), but that 31 * rule is part of CommonMark's two-pass block-then-inline parser where 32 * "any container boundary closes" is the uniform termination rule. Our 33 * single-pass regex lexer has no notion of container boundaries, so the 34 * best we could do is "close at EOF" — a partial implementation that 35 * already leaks (spec example 98, fence inside a blockquote, stays red 36 * because we can't close at the blockquote boundary). Doing a degraded 37 * version of the rule just moves the broken edge case somewhere less 38 * obvious. 39 * 40 * Requiring a closer is also consistent with every other inline GFM 41 * mode in this codebase (all of which use entry-pattern lookaheads to 42 * verify a matching closer exists) and with DokuWiki's own <code> tag 43 * parsing (<code\b(?=.*</code>)>). And it has a safer failure mode: a 44 * stray ``` at the top of a document stays as literal text rather than 45 * swallowing everything below it into a code block. Spec examples 96 46 * and 97 are in skip.php with this rationale. 47 * 48 * @see GfmFile 49 */ 50class GfmCode extends AbstractMode 51{ 52 /** @var string The call type used in addCall ('code' or 'file') */ 53 protected $type = 'code'; 54 55 /** @var string The fence character (`` ` `` or `~`). */ 56 protected $fenceChar = '`'; 57 58 /** 59 * Info-string character class. Backtick fences forbid backticks in 60 * the info string (spec example 115); tilde fences allow anything 61 * except newline (spec example 116). 62 */ 63 protected $infoClass = '[^\n`]*'; 64 65 public function __construct() 66 { 67 $this->allowedModes = []; 68 } 69 70 /** @inheritdoc */ 71 public function getSort() 72 { 73 return 200; 74 } 75 76 /** The lexer state / mode name. Subclasses override for tildes. */ 77 protected function getModeName(): string 78 { 79 return 'gfm_code'; 80 } 81 82 /** @inheritdoc */ 83 public function connectTo($mode) 84 { 85 // Entry pattern breakdown (F = fence char, INFO = info-string class): 86 // \n — line start (Parser prepends a newline) 87 // F{3,} — opener: 3+ fence chars at column 0 88 // INFO — info-string (language etc.) 89 // (?=\n) — opener line must end at a newline; 90 // without this anchor `` ``` aa ``` `` 91 // on one line would parse as a fence 92 // (?:(?!CLOSE).)* — body: any char (DOTALL) that isn't 93 // the start of a close-fence line 94 // CLOSE = \nF{3,}[ \t]*(?=\n) — close fence, required. 95 // No `\z` fallback: unclosed fences stay 96 // literal (see class docblock) 97 $close = '\n' . $this->fenceChar . '{3,}[ \t]*(?=\n)'; 98 $this->Lexer->addSpecialPattern( 99 '\n' . $this->fenceChar . '{3,}' . $this->infoClass . '(?=\n)' 100 . '(?:(?!' . $close . ').)*' . $close, 101 $mode, 102 $this->getModeName() 103 ); 104 } 105 106 /** @inheritdoc */ 107 public function handle($match, $state, $pos, Handler $handler) 108 { 109 $c = $this->fenceChar; 110 111 // Shed the pattern's leading \n, the opener fence run, and the 112 // close-fence run with its trailing whitespace. 113 $text = rtrim(ltrim(substr($match, 1), $c), " \t" . $c); 114 115 // The opener ended at a newline (required by the pattern's `(?=\n)` 116 // anchor), so an explode split always has two parts. 117 [$info, $body] = explode("\n", $text, 2); 118 119 [$language, $filename, $options] = CodeHelper::parseAttributes(Escape::unescapeBackslashes($info)); 120 121 $param = [$body, $language, $filename]; 122 if ($options !== null) $param[] = $options; 123 $handler->addCall($this->type, $param, $pos); 124 return true; 125 } 126} 127