xref: /dokuwiki/_test/tests/Parsing/Helpers/EscapeTest.php (revision 74031e463764923581b9204cebc0fc3f34ce881f)
1*74031e46SAndreas Gohr<?php
2*74031e46SAndreas Gohr
3*74031e46SAndreas Gohrnamespace dokuwiki\test\Parsing\Helpers;
4*74031e46SAndreas Gohr
5*74031e46SAndreas Gohruse dokuwiki\Parsing\Helpers\Escape;
6*74031e46SAndreas Gohr
7*74031e46SAndreas Gohr/**
8*74031e46SAndreas Gohr * Tests for the GFM backslash-escape post-hoc helper.
9*74031e46SAndreas Gohr *
10*74031e46SAndreas Gohr * The lexer-mode coverage is in {@see \dokuwiki\test\Parsing\ParserMode\GfmEscapeTest};
11*74031e46SAndreas Gohr * this class exercises the helper that GfmLink and GfmCode call on text
12*74031e46SAndreas Gohr * the lexer never reached.
13*74031e46SAndreas Gohr */
14*74031e46SAndreas Gohrclass EscapeTest extends \DokuWikiTest
15*74031e46SAndreas Gohr{
16*74031e46SAndreas Gohr    /**
17*74031e46SAndreas Gohr     * Every ASCII punctuation char is escapable per GFM §6.1.
18*74031e46SAndreas Gohr     *
19*74031e46SAndreas Gohr     * @dataProvider provideEscapableChars
20*74031e46SAndreas Gohr     */
21*74031e46SAndreas Gohr    function testUnescapesEscapablePunctuation(string $char)
22*74031e46SAndreas Gohr    {
23*74031e46SAndreas Gohr        $this->assertSame(
24*74031e46SAndreas Gohr            "before{$char}after",
25*74031e46SAndreas Gohr            Escape::unescapeBackslashes("before\\{$char}after")
26*74031e46SAndreas Gohr        );
27*74031e46SAndreas Gohr    }
28*74031e46SAndreas Gohr
29*74031e46SAndreas Gohr    public static function provideEscapableChars(): array
30*74031e46SAndreas Gohr    {
31*74031e46SAndreas Gohr        $chars = str_split('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~');
32*74031e46SAndreas Gohr        return array_combine(
33*74031e46SAndreas Gohr            array_map(static fn($c) => 'char_' . bin2hex($c), $chars),
34*74031e46SAndreas Gohr            array_map(static fn($c) => [$c], $chars),
35*74031e46SAndreas Gohr        );
36*74031e46SAndreas Gohr    }
37*74031e46SAndreas Gohr
38*74031e46SAndreas Gohr    /**
39*74031e46SAndreas Gohr     * Backslash before any non-punctuation char stays as-is — the helper
40*74031e46SAndreas Gohr     * must not touch it. Mirrors the lexer mode's pattern, which also
41*74031e46SAndreas Gohr     * doesn't match these.
42*74031e46SAndreas Gohr     *
43*74031e46SAndreas Gohr     * @dataProvider provideNonEscapableTails
44*74031e46SAndreas Gohr     */
45*74031e46SAndreas Gohr    function testKeepsBackslashBeforeNonPunctuation(string $tail)
46*74031e46SAndreas Gohr    {
47*74031e46SAndreas Gohr        $input = "x\\{$tail}y";
48*74031e46SAndreas Gohr        $this->assertSame($input, Escape::unescapeBackslashes($input));
49*74031e46SAndreas Gohr    }
50*74031e46SAndreas Gohr
51*74031e46SAndreas Gohr    public static function provideNonEscapableTails(): array
52*74031e46SAndreas Gohr    {
53*74031e46SAndreas Gohr        return [
54*74031e46SAndreas Gohr            'letter_upper' => ['A'],
55*74031e46SAndreas Gohr            'letter_lower' => ['a'],
56*74031e46SAndreas Gohr            'digit'        => ['3'],
57*74031e46SAndreas Gohr            'multibyte'    => ['α'],
58*74031e46SAndreas Gohr            'space'        => [' '],
59*74031e46SAndreas Gohr            'tab'          => ["\t"],
60*74031e46SAndreas Gohr            'newline'      => ["\n"],
61*74031e46SAndreas Gohr        ];
62*74031e46SAndreas Gohr    }
63*74031e46SAndreas Gohr
64*74031e46SAndreas Gohr    function testDoubleBackslashCollapsesOnce()
65*74031e46SAndreas Gohr    {
66*74031e46SAndreas Gohr        // `\\` → `\`. The collapse is a single replacement; the surviving
67*74031e46SAndreas Gohr        // backslash does NOT consume the next char.
68*74031e46SAndreas Gohr        $this->assertSame('a\\*b', Escape::unescapeBackslashes('a\\\\*b'));
69*74031e46SAndreas Gohr    }
70*74031e46SAndreas Gohr
71*74031e46SAndreas Gohr    function testTripleBackslashLeavesOneEscape()
72*74031e46SAndreas Gohr    {
73*74031e46SAndreas Gohr        // `\\\*` → `\` + `*` (first pair collapses to `\`, the surviving
74*74031e46SAndreas Gohr        // standalone `\*` then unescapes to `*` because preg_replace
75*74031e46SAndreas Gohr        // processes all non-overlapping matches in one pass).
76*74031e46SAndreas Gohr        $this->assertSame('a\\*b', Escape::unescapeBackslashes('a\\\\\\*b'));
77*74031e46SAndreas Gohr    }
78*74031e46SAndreas Gohr
79*74031e46SAndreas Gohr    function testMultipleEscapesInOnePass()
80*74031e46SAndreas Gohr    {
81*74031e46SAndreas Gohr        $this->assertSame(
82*74031e46SAndreas Gohr            '/path*with|special#chars',
83*74031e46SAndreas Gohr            Escape::unescapeBackslashes('/path\\*with\\|special\\#chars')
84*74031e46SAndreas Gohr        );
85*74031e46SAndreas Gohr    }
86*74031e46SAndreas Gohr
87*74031e46SAndreas Gohr    function testStringWithoutBackslashesIsUnchanged()
88*74031e46SAndreas Gohr    {
89*74031e46SAndreas Gohr        $this->assertSame('plain text', Escape::unescapeBackslashes('plain text'));
90*74031e46SAndreas Gohr    }
91*74031e46SAndreas Gohr
92*74031e46SAndreas Gohr    function testEmptyStringRoundTrips()
93*74031e46SAndreas Gohr    {
94*74031e46SAndreas Gohr        $this->assertSame('', Escape::unescapeBackslashes(''));
95*74031e46SAndreas Gohr    }
96*74031e46SAndreas Gohr
97*74031e46SAndreas Gohr    function testTrailingLoneBackslashSurvives()
98*74031e46SAndreas Gohr    {
99*74031e46SAndreas Gohr        // A backslash with nothing after it can't form an escape — it
100*74031e46SAndreas Gohr        // stays literal.
101*74031e46SAndreas Gohr        $this->assertSame('x\\', Escape::unescapeBackslashes('x\\'));
102*74031e46SAndreas Gohr    }
103*74031e46SAndreas Gohr}
104