xref: /dokuwiki/inc/Parsing/Helpers/Escape.php (revision 74031e463764923581b9204cebc0fc3f34ce881f)
1*74031e46SAndreas Gohr<?php
2*74031e46SAndreas Gohr
3*74031e46SAndreas Gohrnamespace dokuwiki\Parsing\Helpers;
4*74031e46SAndreas Gohr
5*74031e46SAndreas Gohr/**
6*74031e46SAndreas Gohr * Pure helper for applying GFM backslash-escape rules to literal text
7*74031e46SAndreas Gohr * that didn't pass through the GfmEscape lexer mode.
8*74031e46SAndreas Gohr *
9*74031e46SAndreas Gohr * Whole-span PROTECTED modes (GfmCode, GfmLink, …) capture their body
10*74031e46SAndreas Gohr * in a single regex match, so the inline GfmEscape pattern never gets
11*74031e46SAndreas Gohr * to the bytes inside. For the slots GFM still wants escaped — fenced
12*74031e46SAndreas Gohr * code info strings, link destinations, link titles — call this helper
13*74031e46SAndreas Gohr * after extracting the literal substring.
14*74031e46SAndreas Gohr */
15*74031e46SAndreas Gohrclass Escape
16*74031e46SAndreas Gohr{
17*74031e46SAndreas Gohr    /**
18*74031e46SAndreas Gohr     * Regex character class matching every GFM §6.1 escapable ASCII
19*74031e46SAndreas Gohr     * punctuation char. Shared by GfmEscape's lexer pattern and
20*74031e46SAndreas Gohr     * unescapeBackslashes() so the two stay in lockstep.
21*74031e46SAndreas Gohr     *
22*74031e46SAndreas Gohr     * The encoding looks busy because of nested PHP-string + PCRE
23*74031e46SAndreas Gohr     * escaping: the embedded `\\\\\]` produces the regex `\\\]`,
24*74031e46SAndreas Gohr     * i.e. a literal `\` and a literal `]` inside the char class.
25*74031e46SAndreas Gohr     */
26*74031e46SAndreas Gohr    public const PUNCTUATION_CHAR_CLASS = '[!"#$%&\'()*+,\-./:;<=>?@\[\\\\\]^_`{|}~]';
27*74031e46SAndreas Gohr
28*74031e46SAndreas Gohr    /**
29*74031e46SAndreas Gohr     * Replace each `\X` (where X is GFM-escapable ASCII punctuation)
30*74031e46SAndreas Gohr     * with the literal X.
31*74031e46SAndreas Gohr     */
32*74031e46SAndreas Gohr    public static function unescapeBackslashes(string $text): string
33*74031e46SAndreas Gohr    {
34*74031e46SAndreas Gohr        // Paired `{...}` delimiters: PHP single-char delimiters (`/`, `~`,
35*74031e46SAndreas Gohr        // `#`) appearing inside the regex terminate it early. Every char
36*74031e46SAndreas Gohr        // we'd want as a delimiter is in our escapable class, so we use
37*74031e46SAndreas Gohr        // the paired form — PCRE treats `}` as the closer only at the
38*74031e46SAndreas Gohr        // outermost level, not inside the `[...]` class.
39*74031e46SAndreas Gohr        return preg_replace(
40*74031e46SAndreas Gohr            '{\\\\(' . self::PUNCTUATION_CHAR_CLASS . ')}',
41*74031e46SAndreas Gohr            '$1',
42*74031e46SAndreas Gohr            $text
43*74031e46SAndreas Gohr        );
44*74031e46SAndreas Gohr    }
45*74031e46SAndreas Gohr}
46