1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6use dokuwiki\Parsing\Helpers; 7 8/** 9 * GFM fenced code block with backtick fences: ```...``` 10 * 11 * Emits the same `code` handler instruction DokuWiki's `<code>` mode 12 * emits, so renderers, indexing, and syntax highlighting reuse the 13 * existing pipeline. 14 * 15 * The info string after the opening fence accepts DokuWiki's full 16 * code-tag attribute vocabulary — language, optional filename, and 17 * optional [key=value,...] highlight options — parsed via 18 * Helpers::parseCodeAttributes. Markdown authors pasting to GitHub 19 * will see the extras render as part of the language class; the 20 * divergence is intentional, for feature parity with DokuWiki's 21 * <code>...</code> blocks. 22 * 23 * Column-0 fences only (no indent tolerance, no body dedent). The close 24 * fence is any run of 3+ fence chars at column 0 with only trailing 25 * whitespace on the line — the opener's length is not paired with the 26 * closer's, because ParallelRegex does not support backreferences. 27 * 28 * Unclosed fences stay literal text. GFM's spec says an unclosed fence 29 * runs to end of input (and any enclosing container's end), but that 30 * rule is part of CommonMark's two-pass block-then-inline parser where 31 * "any container boundary closes" is the uniform termination rule. Our 32 * single-pass regex lexer has no notion of container boundaries, so the 33 * best we could do is "close at EOF" — a partial implementation that 34 * already leaks (spec example 98, fence inside a blockquote, stays red 35 * because we can't close at the blockquote boundary). Doing a degraded 36 * version of the rule just moves the broken edge case somewhere less 37 * obvious. 38 * 39 * Requiring a closer is also consistent with every other inline GFM 40 * mode in this codebase (all of which use entry-pattern lookaheads to 41 * verify a matching closer exists) and with DokuWiki's own <code> tag 42 * parsing (<code\b(?=.*</code>)>). And it has a safer failure mode: a 43 * stray ``` at the top of a document stays as literal text rather than 44 * swallowing everything below it into a code block. Spec examples 96 45 * and 97 are in skip.php with this rationale. 46 * 47 * @see GfmFile 48 */ 49class GfmCode extends AbstractMode 50{ 51 /** @var string The call type used in addCall ('code' or 'file') */ 52 protected $type = 'code'; 53 54 /** @var string The fence character (`` ` `` or `~`). */ 55 protected $fenceChar = '`'; 56 57 /** 58 * Info-string character class. Backtick fences forbid backticks in 59 * the info string (spec example 115); tilde fences allow anything 60 * except newline (spec example 116). 61 */ 62 protected $infoClass = '[^\n`]*'; 63 64 public function __construct() 65 { 66 $this->allowedModes = []; 67 } 68 69 /** @inheritdoc */ 70 public function getSort() 71 { 72 return 200; 73 } 74 75 /** The lexer state / mode name. Subclasses override for tildes. */ 76 protected function getModeName(): string 77 { 78 return 'gfm_code'; 79 } 80 81 /** @inheritdoc */ 82 public function connectTo($mode) 83 { 84 // Entry pattern breakdown (F = fence char, INFO = info-string class): 85 // \n — line start (Parser prepends a newline) 86 // F{3,} — opener: 3+ fence chars at column 0 87 // INFO — info-string (language etc.) 88 // (?=\n) — opener line must end at a newline; 89 // without this anchor `` ``` aa ``` `` 90 // on one line would parse as a fence 91 // (?:(?!CLOSE).)* — body: any char (DOTALL) that isn't 92 // the start of a close-fence line 93 // CLOSE = \nF{3,}[ \t]*(?=\n) — close fence, required. 94 // No `\z` fallback: unclosed fences stay 95 // literal (see class docblock) 96 $close = '\n' . $this->fenceChar . '{3,}[ \t]*(?=\n)'; 97 $this->Lexer->addSpecialPattern( 98 '\n' . $this->fenceChar . '{3,}' . $this->infoClass . '(?=\n)' 99 . '(?:(?!' . $close . ').)*' . $close, 100 $mode, 101 $this->getModeName() 102 ); 103 } 104 105 /** @inheritdoc */ 106 public function handle($match, $state, $pos, Handler $handler) 107 { 108 $c = $this->fenceChar; 109 110 // Shed the pattern's leading \n, the opener fence run, and the 111 // close-fence run with its trailing whitespace. 112 $text = rtrim(ltrim(substr($match, 1), $c), " \t" . $c); 113 114 // The opener ended at a newline (required by the pattern's `(?=\n)` 115 // anchor), so an explode split always has two parts. 116 [$info, $body] = explode("\n", $text, 2); 117 118 [$language, $filename, $options] = Helpers::parseCodeAttributes($info); 119 120 $param = [$body, $language, $filename]; 121 if ($options !== null) $param[] = $options; 122 $handler->addCall($this->type, $param, $pos); 123 return true; 124 } 125} 126