` mode
* emits, so renderers, indexing, and syntax highlighting reuse the
* existing pipeline.
*
* The info string after the opening fence accepts DokuWiki's full
* code-tag attribute vocabulary — language, optional filename, and
* optional [key=value,...] highlight options — parsed via
* Helpers\Code::parseAttributes. Markdown authors pasting to GitHub
* will see the extras render as part of the language class; the
* divergence is intentional, for feature parity with DokuWiki's
* ... blocks.
*
* Column-0 fences only (no indent tolerance, no body dedent). The close
* fence is any run of 3+ fence chars at column 0 with only trailing
* whitespace on the line — the opener's length is not paired with the
* closer's, because ParallelRegex does not support backreferences.
*
* Unclosed fences stay literal text. GFM's spec says an unclosed fence
* runs to end of input (and any enclosing container's end), but that
* rule is part of CommonMark's two-pass block-then-inline parser where
* "any container boundary closes" is the uniform termination rule. Our
* single-pass regex lexer has no notion of container boundaries, so the
* best we could do is "close at EOF" — a partial implementation that
* already leaks (spec example 98, fence inside a blockquote, stays red
* because we can't close at the blockquote boundary). Doing a degraded
* version of the rule just moves the broken edge case somewhere less
* obvious.
*
* Requiring a closer is also consistent with every other inline GFM
* mode in this codebase (all of which use entry-pattern lookaheads to
* verify a matching closer exists) and with DokuWiki's own tag
* parsing ()>). And it has a safer failure mode: a
* stray ``` at the top of a document stays as literal text rather than
* swallowing everything below it into a code block. Spec examples 96
* and 97 are in skip.php with this rationale.
*
* @see GfmFile
*/
class GfmCode extends AbstractMode
{
/** @var string The call type used in addCall ('code' or 'file') */
protected $type = 'code';
/** @var string The fence character (`` ` `` or `~`). */
protected $fenceChar = '`';
/**
* Info-string character class. Backtick fences forbid backticks in
* the info string (spec example 115); tilde fences allow anything
* except newline (spec example 116).
*/
protected $infoClass = '[^\n`]*';
public function __construct()
{
$this->allowedModes = [];
}
/** @inheritdoc */
public function getSort()
{
return 200;
}
/** The lexer state / mode name. Subclasses override for tildes. */
protected function getModeName(): string
{
return 'gfm_code';
}
/** @inheritdoc */
public function connectTo($mode)
{
// Entry pattern breakdown (F = fence char, INFO = info-string class):
// \n — line start (Parser prepends a newline)
// F{3,} — opener: 3+ fence chars at column 0
// INFO — info-string (language etc.)
// (?=\n) — opener line must end at a newline;
// without this anchor `` ``` aa ``` ``
// on one line would parse as a fence
// (?:(?!CLOSE).)* — body: any char (DOTALL) that isn't
// the start of a close-fence line
// CLOSE = \nF{3,}[ \t]*(?=\n) — close fence, required.
// No `\z` fallback: unclosed fences stay
// literal (see class docblock)
$close = '\n' . $this->fenceChar . '{3,}[ \t]*(?=\n)';
$this->Lexer->addSpecialPattern(
'\n' . $this->fenceChar . '{3,}' . $this->infoClass . '(?=\n)'
. '(?:(?!' . $close . ').)*' . $close,
$mode,
$this->getModeName()
);
}
/** @inheritdoc */
public function handle($match, $state, $pos, Handler $handler)
{
$c = $this->fenceChar;
// Shed the pattern's leading \n, the opener fence run, and the
// close-fence run with its trailing whitespace.
$text = rtrim(ltrim(substr($match, 1), $c), " \t" . $c);
// The opener ended at a newline (required by the pattern's `(?=\n)`
// anchor), so an explode split always has two parts.
[$info, $body] = explode("\n", $text, 2);
[$language, $filename, $options] = CodeHelper::parseAttributes(Escape::unescapeBackslashes($info));
$param = [$body, $language, $filename];
if ($options !== null) $param[] = $options;
$handler->addCall($this->type, $param, $pos);
return true;
}
}