1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6use dokuwiki\Parsing\Helpers\Code as CodeHelper; 7use dokuwiki\Parsing\Helpers\Escape; 8use dokuwiki\Parsing\Helpers\HtmlEntity; 9 10/** 11 * GFM fenced code block with backtick fences: ```...``` 12 * 13 * Emits the same `code` handler instruction DokuWiki's `<code>` mode 14 * emits, so renderers, indexing, and syntax highlighting reuse the 15 * existing pipeline. 16 * 17 * The info string after the opening fence accepts DokuWiki's full 18 * code-tag attribute vocabulary — language, optional filename, and 19 * optional [key=value,...] highlight options — parsed via 20 * Helpers\Code::parseAttributes. Markdown authors pasting to GitHub 21 * will see the extras render as part of the language class; the 22 * divergence is intentional, for feature parity with DokuWiki's 23 * <code>...</code> blocks. 24 * 25 * Column-0 fences only (no indent tolerance, no body dedent). The close 26 * fence is any run of 3+ fence chars at column 0 with only trailing 27 * whitespace on the line — the opener's length is not paired with the 28 * closer's, because ParallelRegex does not support backreferences. 29 * 30 * Unclosed fences stay literal text. GFM's spec says an unclosed fence 31 * runs to end of input (and any enclosing container's end), but that 32 * rule is part of CommonMark's two-pass block-then-inline parser where 33 * "any container boundary closes" is the uniform termination rule. Our 34 * single-pass regex lexer has no notion of container boundaries, so the 35 * best we could do is "close at EOF" — a partial implementation that 36 * already leaks (spec example 98, fence inside a blockquote, stays red 37 * because we can't close at the blockquote boundary). Doing a degraded 38 * version of the rule just moves the broken edge case somewhere less 39 * obvious. 40 * 41 * Requiring a closer is also consistent with every other inline GFM 42 * mode in this codebase (all of which use entry-pattern lookaheads to 43 * verify a matching closer exists) and with DokuWiki's own <code> tag 44 * parsing (<code\b(?=.*</code>)>). And it has a safer failure mode: a 45 * stray ``` at the top of a document stays as literal text rather than 46 * swallowing everything below it into a code block. Spec examples 96 47 * and 97 are in skip.php with this rationale. 48 * 49 * @see GfmFile 50 */ 51class GfmCode extends AbstractMode 52{ 53 /** @var string The call type used in addCall ('code' or 'file') */ 54 protected $type = 'code'; 55 56 /** @var string The fence character (`` ` `` or `~`). */ 57 protected $fenceChar = '`'; 58 59 /** 60 * Info-string character class. Backtick fences forbid backticks in 61 * the info string (spec example 115); tilde fences allow anything 62 * except newline (spec example 116). 63 */ 64 protected $infoClass = '[^\n`]*'; 65 66 public function __construct() 67 { 68 $this->allowedModes = []; 69 } 70 71 /** @inheritdoc */ 72 public function getSort() 73 { 74 return 200; 75 } 76 77 /** The lexer state / mode name. Subclasses override for tildes. */ 78 protected function getModeName(): string 79 { 80 return 'gfm_code'; 81 } 82 83 /** @inheritdoc */ 84 public function connectTo($mode) 85 { 86 // Entry pattern breakdown (F = fence char, INFO = info-string class): 87 // \n — line start (Parser prepends a newline) 88 // F{3,} — opener: 3+ fence chars at column 0 89 // INFO — info-string (language etc.) 90 // (?=\n) — opener line must end at a newline; 91 // without this anchor `` ``` aa ``` `` 92 // on one line would parse as a fence 93 // (?:(?!CLOSE).)* — body: any char (DOTALL) that isn't 94 // the start of a close-fence line 95 // CLOSE = \nF{3,}[ \t]*(?=\n) — close fence, required. 96 // No `\z` fallback: unclosed fences stay 97 // literal (see class docblock) 98 $close = '\n' . $this->fenceChar . '{3,}[ \t]*(?=\n)'; 99 $this->Lexer->addSpecialPattern( 100 '\n' . $this->fenceChar . '{3,}' . $this->infoClass . '(?=\n)' 101 . '(?:(?!' . $close . ').)*' . $close, 102 $mode, 103 $this->getModeName() 104 ); 105 } 106 107 /** @inheritdoc */ 108 public function handle($match, $state, $pos, Handler $handler) 109 { 110 $c = $this->fenceChar; 111 112 // Shed the pattern's leading \n, the opener fence run, and the 113 // close-fence run with its trailing whitespace. 114 $text = rtrim(ltrim(substr($match, 1), $c), " \t" . $c); 115 116 // The opener ended at a newline (required by the pattern's `(?=\n)` 117 // anchor), so an explode split always has two parts. 118 [$info, $body] = explode("\n", $text, 2); 119 120 [$language, $filename, $options] = CodeHelper::parseAttributes( 121 Escape::unescapeBackslashes(HtmlEntity::decode($info)) 122 ); 123 124 $param = [$body, $language, $filename]; 125 if ($options !== null) $param[] = $options; 126 $handler->addCall($this->type, $param, $pos); 127 return true; 128 } 129} 130