xref: /dokuwiki/inc/Parsing/Helpers/Escape.php (revision 95f694202286c1add4c442936a5caa38db0dd603)
1<?php
2
3namespace dokuwiki\Parsing\Helpers;
4
5/**
6 * Pure helper for applying GFM backslash-escape rules to literal text
7 * that didn't pass through the GfmEscape lexer mode.
8 *
9 * Whole-span PROTECTED modes (GfmCode, GfmLink, …) capture their body
10 * in a single regex match, so the inline GfmEscape pattern never gets
11 * to the bytes inside. For the slots GFM still wants escaped — fenced
12 * code info strings, link destinations, link titles — call this helper
13 * after extracting the literal substring.
14 */
15class Escape
16{
17    /**
18     * Regex character class matching every GFM §6.1 escapable ASCII
19     * punctuation char. Shared by GfmEscape's lexer pattern and
20     * unescapeBackslashes() so the two stay in lockstep.
21     *
22     * The encoding looks busy because of nested PHP-string + PCRE
23     * escaping: the embedded `\\\\\]` produces the regex `\\\]`,
24     * i.e. a literal `\` and a literal `]` inside the char class.
25     */
26    public const PUNCTUATION_CHAR_CLASS = '[!"#$%&\'()*+,\-./:;<=>?@\[\\\\\]^_`{|}~]';
27
28    /**
29     * Replace each `\X` (where X is GFM-escapable ASCII punctuation)
30     * with the literal X.
31     */
32    public static function unescapeBackslashes(string $text): string
33    {
34        // Paired `{...}` delimiters: PHP single-char delimiters (`/`, `~`,
35        // `#`) appearing inside the regex terminate it early. Every char
36        // we'd want as a delimiter is in our escapable class, so we use
37        // the paired form — PCRE treats `}` as the closer only at the
38        // outermost level, not inside the `[...]` class.
39        return preg_replace(
40            '{\\\\(' . self::PUNCTUATION_CHAR_CLASS . ')}',
41            '$1',
42            $text
43        );
44    }
45}
46