1<?php 2 3namespace dokuwiki\Parsing\ParserMode; 4 5use dokuwiki\Parsing\Handler; 6use dokuwiki\Parsing\Helpers\Code as CodeHelper; 7use dokuwiki\Parsing\Helpers\Escape; 8use dokuwiki\Parsing\Helpers\HtmlEntity; 9 10/** 11 * GFM fenced code block with backtick fences: ```...``` 12 * 13 * Emits the same `code` handler instruction DokuWiki's `<code>` mode 14 * emits, so renderers, indexing, and syntax highlighting reuse the 15 * existing pipeline. 16 * 17 * The info string after the opening fence accepts DokuWiki's full 18 * code-tag attribute vocabulary — language, optional filename, and 19 * optional [key=value,...] highlight options — parsed via 20 * Helpers\Code::parseAttributes. Markdown authors pasting to GitHub 21 * will see the extras render as part of the language class; the 22 * divergence is intentional, for feature parity with DokuWiki's 23 * <code>...</code> blocks. 24 * 25 * Column-0 fences only (no indent tolerance, no body dedent). The close 26 * fence is any run of 3+ fence chars at column 0 with only trailing 27 * whitespace on the line — the opener's length is not paired with the 28 * closer's, because ParallelRegex does not support backreferences. 29 * 30 * Unclosed fences stay literal text. GFM's spec says an unclosed fence 31 * runs to end of input (and any enclosing container's end), but that 32 * rule is part of CommonMark's two-pass block-then-inline parser where 33 * "any container boundary closes" is the uniform termination rule. Our 34 * single-pass regex lexer has no notion of container boundaries, so the 35 * best we could do is "close at EOF" — a partial implementation that 36 * already leaks (spec example 98, fence inside a blockquote, stays red 37 * because we can't close at the blockquote boundary). Doing a degraded 38 * version of the rule just moves the broken edge case somewhere less 39 * obvious. 40 * 41 * Requiring a closer is also consistent with every other inline GFM 42 * mode in this codebase (all of which use entry-pattern lookaheads to 43 * verify a matching closer exists) and with DokuWiki's own <code> tag 44 * parsing (<code\b(?=.*</code>)>). And it has a safer failure mode: a 45 * stray ``` at the top of a document stays as literal text rather than 46 * swallowing everything below it into a code block. Spec examples 96 47 * and 97 are in skip.php with this rationale. 48 * 49 * @see GfmFile 50 */ 51class GfmCode extends AbstractMode 52{ 53 /** @var string The call type used in addCall ('code' or 'file') */ 54 protected $type = 'code'; 55 56 /** @var string The fence character (`` ` `` or `~`). */ 57 protected $fenceChar = '`'; 58 59 /** 60 * Info-string character class. Backtick fences forbid backticks in 61 * the info string (spec example 115); tilde fences allow anything 62 * except newline (spec example 116). 63 */ 64 protected $infoClass = '[^\n`]*'; 65 66 /** @inheritdoc */ 67 public function getSort() 68 { 69 return 200; 70 } 71 72 /** The lexer state / mode name. Subclasses override for tildes. */ 73 protected function getModeName(): string 74 { 75 return 'gfm_code'; 76 } 77 78 /** @inheritdoc */ 79 public function connectTo($mode) 80 { 81 // Entry pattern breakdown (F = fence char, INFO = info-string class): 82 // \n — line start (Parser prepends a newline) 83 // F{3,} — opener: 3+ fence chars at column 0 84 // INFO — info-string (language etc.) 85 // (?=\n) — opener line must end at a newline; 86 // without this anchor `` ``` aa ``` `` 87 // on one line would parse as a fence 88 // (?:(?!CLOSE).)* — body: any char (DOTALL) that isn't 89 // the start of a close-fence line 90 // CLOSE = \nF{3,}[ \t]*(?=\n) — close fence, required. 91 // No `\z` fallback: unclosed fences stay 92 // literal (see class docblock) 93 $close = '\n' . $this->fenceChar . '{3,}[ \t]*(?=\n)'; 94 $this->Lexer->addSpecialPattern( 95 '\n' . $this->fenceChar . '{3,}' . $this->infoClass . '(?=\n)' 96 . '(?:(?!' . $close . ').)*' . $close, 97 $mode, 98 $this->getModeName() 99 ); 100 } 101 102 /** @inheritdoc */ 103 public function handle($match, $state, $pos, Handler $handler) 104 { 105 $c = $this->fenceChar; 106 107 // Shed the pattern's leading \n, the opener fence run, and the 108 // close-fence run with its trailing whitespace. 109 $text = rtrim(ltrim(substr($match, 1), $c), " \t" . $c); 110 111 // The opener ended at a newline (required by the pattern's `(?=\n)` 112 // anchor), so an explode split always has two parts. 113 [$info, $body] = explode("\n", $text, 2); 114 115 [$language, $filename, $options] = CodeHelper::parseAttributes( 116 Escape::unescapeBackslashes(HtmlEntity::decode($info)) 117 ); 118 119 $param = [$body, $language, $filename]; 120 if ($options !== null) $param[] = $options; 121 $handler->addCall($this->type, $param, $pos); 122 return true; 123 } 124} 125