xref: /plugin/combo/vendor/symfony/yaml/Unescaper.php (revision 04fd306c7c155fa133ebb3669986875d65988276)
1*04fd306cSNickeau<?php
2*04fd306cSNickeau
3*04fd306cSNickeau/*
4*04fd306cSNickeau * This file is part of the Symfony package.
5*04fd306cSNickeau *
6*04fd306cSNickeau * (c) Fabien Potencier <fabien@symfony.com>
7*04fd306cSNickeau *
8*04fd306cSNickeau * For the full copyright and license information, please view the LICENSE
9*04fd306cSNickeau * file that was distributed with this source code.
10*04fd306cSNickeau */
11*04fd306cSNickeau
12*04fd306cSNickeaunamespace Symfony\Component\Yaml;
13*04fd306cSNickeau
14*04fd306cSNickeauuse Symfony\Component\Yaml\Exception\ParseException;
15*04fd306cSNickeau
16*04fd306cSNickeau/**
17*04fd306cSNickeau * Unescaper encapsulates unescaping rules for single and double-quoted
18*04fd306cSNickeau * YAML strings.
19*04fd306cSNickeau *
20*04fd306cSNickeau * @author Matthew Lewinski <matthew@lewinski.org>
21*04fd306cSNickeau *
22*04fd306cSNickeau * @internal
23*04fd306cSNickeau */
24*04fd306cSNickeauclass Unescaper
25*04fd306cSNickeau{
26*04fd306cSNickeau    /**
27*04fd306cSNickeau     * Regex fragment that matches an escaped character in a double quoted string.
28*04fd306cSNickeau     */
29*04fd306cSNickeau    public const REGEX_ESCAPED_CHARACTER = '\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|.)';
30*04fd306cSNickeau
31*04fd306cSNickeau    /**
32*04fd306cSNickeau     * Unescapes a single quoted string.
33*04fd306cSNickeau     *
34*04fd306cSNickeau     * @param string $value A single quoted string
35*04fd306cSNickeau     */
36*04fd306cSNickeau    public function unescapeSingleQuotedString(string $value): string
37*04fd306cSNickeau    {
38*04fd306cSNickeau        return str_replace('\'\'', '\'', $value);
39*04fd306cSNickeau    }
40*04fd306cSNickeau
41*04fd306cSNickeau    /**
42*04fd306cSNickeau     * Unescapes a double quoted string.
43*04fd306cSNickeau     *
44*04fd306cSNickeau     * @param string $value A double quoted string
45*04fd306cSNickeau     */
46*04fd306cSNickeau    public function unescapeDoubleQuotedString(string $value): string
47*04fd306cSNickeau    {
48*04fd306cSNickeau        $callback = function ($match) {
49*04fd306cSNickeau            return $this->unescapeCharacter($match[0]);
50*04fd306cSNickeau        };
51*04fd306cSNickeau
52*04fd306cSNickeau        // evaluate the string
53*04fd306cSNickeau        return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
54*04fd306cSNickeau    }
55*04fd306cSNickeau
56*04fd306cSNickeau    /**
57*04fd306cSNickeau     * Unescapes a character that was found in a double-quoted string.
58*04fd306cSNickeau     *
59*04fd306cSNickeau     * @param string $value An escaped character
60*04fd306cSNickeau     */
61*04fd306cSNickeau    private function unescapeCharacter(string $value): string
62*04fd306cSNickeau    {
63*04fd306cSNickeau        switch ($value[1]) {
64*04fd306cSNickeau            case '0':
65*04fd306cSNickeau                return "\x0";
66*04fd306cSNickeau            case 'a':
67*04fd306cSNickeau                return "\x7";
68*04fd306cSNickeau            case 'b':
69*04fd306cSNickeau                return "\x8";
70*04fd306cSNickeau            case 't':
71*04fd306cSNickeau                return "\t";
72*04fd306cSNickeau            case "\t":
73*04fd306cSNickeau                return "\t";
74*04fd306cSNickeau            case 'n':
75*04fd306cSNickeau                return "\n";
76*04fd306cSNickeau            case 'v':
77*04fd306cSNickeau                return "\xB";
78*04fd306cSNickeau            case 'f':
79*04fd306cSNickeau                return "\xC";
80*04fd306cSNickeau            case 'r':
81*04fd306cSNickeau                return "\r";
82*04fd306cSNickeau            case 'e':
83*04fd306cSNickeau                return "\x1B";
84*04fd306cSNickeau            case ' ':
85*04fd306cSNickeau                return ' ';
86*04fd306cSNickeau            case '"':
87*04fd306cSNickeau                return '"';
88*04fd306cSNickeau            case '/':
89*04fd306cSNickeau                return '/';
90*04fd306cSNickeau            case '\\':
91*04fd306cSNickeau                return '\\';
92*04fd306cSNickeau            case 'N':
93*04fd306cSNickeau                // U+0085 NEXT LINE
94*04fd306cSNickeau                return "\xC2\x85";
95*04fd306cSNickeau            case '_':
96*04fd306cSNickeau                // U+00A0 NO-BREAK SPACE
97*04fd306cSNickeau                return "\xC2\xA0";
98*04fd306cSNickeau            case 'L':
99*04fd306cSNickeau                // U+2028 LINE SEPARATOR
100*04fd306cSNickeau                return "\xE2\x80\xA8";
101*04fd306cSNickeau            case 'P':
102*04fd306cSNickeau                // U+2029 PARAGRAPH SEPARATOR
103*04fd306cSNickeau                return "\xE2\x80\xA9";
104*04fd306cSNickeau            case 'x':
105*04fd306cSNickeau                return self::utf8chr(hexdec(substr($value, 2, 2)));
106*04fd306cSNickeau            case 'u':
107*04fd306cSNickeau                return self::utf8chr(hexdec(substr($value, 2, 4)));
108*04fd306cSNickeau            case 'U':
109*04fd306cSNickeau                return self::utf8chr(hexdec(substr($value, 2, 8)));
110*04fd306cSNickeau            default:
111*04fd306cSNickeau                throw new ParseException(sprintf('Found unknown escape character "%s".', $value));
112*04fd306cSNickeau        }
113*04fd306cSNickeau    }
114*04fd306cSNickeau
115*04fd306cSNickeau    /**
116*04fd306cSNickeau     * Get the UTF-8 character for the given code point.
117*04fd306cSNickeau     */
118*04fd306cSNickeau    private static function utf8chr(int $c): string
119*04fd306cSNickeau    {
120*04fd306cSNickeau        if (0x80 > $c %= 0x200000) {
121*04fd306cSNickeau            return \chr($c);
122*04fd306cSNickeau        }
123*04fd306cSNickeau        if (0x800 > $c) {
124*04fd306cSNickeau            return \chr(0xC0 | $c >> 6).\chr(0x80 | $c & 0x3F);
125*04fd306cSNickeau        }
126*04fd306cSNickeau        if (0x10000 > $c) {
127*04fd306cSNickeau            return \chr(0xE0 | $c >> 12).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F);
128*04fd306cSNickeau        }
129*04fd306cSNickeau
130*04fd306cSNickeau        return \chr(0xF0 | $c >> 18).\chr(0x80 | $c >> 12 & 0x3F).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F);
131*04fd306cSNickeau    }
132*04fd306cSNickeau}
133