1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\Yaml;
13
14use Symfony\Component\Yaml\Exception\ParseException;
15
16/**
17 * Unescaper encapsulates unescaping rules for single and double-quoted
18 * YAML strings.
19 *
20 * @author Matthew Lewinski <matthew@lewinski.org>
21 *
22 * @internal
23 */
24class Unescaper
25{
26    /**
27     * Regex fragment that matches an escaped character in a double quoted string.
28     */
29    public const REGEX_ESCAPED_CHARACTER = '\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|.)';
30
31    /**
32     * Unescapes a single quoted string.
33     *
34     * @param string $value A single quoted string
35     *
36     * @return string The unescaped string
37     */
38    public function unescapeSingleQuotedString(string $value): string
39    {
40        return str_replace('\'\'', '\'', $value);
41    }
42
43    /**
44     * Unescapes a double quoted string.
45     *
46     * @param string $value A double quoted string
47     *
48     * @return string The unescaped string
49     */
50    public function unescapeDoubleQuotedString(string $value): string
51    {
52        $callback = function ($match) {
53            return $this->unescapeCharacter($match[0]);
54        };
55
56        // evaluate the string
57        return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
58    }
59
60    /**
61     * Unescapes a character that was found in a double-quoted string.
62     *
63     * @param string $value An escaped character
64     *
65     * @return string The unescaped character
66     */
67    private function unescapeCharacter(string $value): string
68    {
69        switch ($value[1]) {
70            case '0':
71                return "\x0";
72            case 'a':
73                return "\x7";
74            case 'b':
75                return "\x8";
76            case 't':
77                return "\t";
78            case "\t":
79                return "\t";
80            case 'n':
81                return "\n";
82            case 'v':
83                return "\xB";
84            case 'f':
85                return "\xC";
86            case 'r':
87                return "\r";
88            case 'e':
89                return "\x1B";
90            case ' ':
91                return ' ';
92            case '"':
93                return '"';
94            case '/':
95                return '/';
96            case '\\':
97                return '\\';
98            case 'N':
99                // U+0085 NEXT LINE
100                return "\xC2\x85";
101            case '_':
102                // U+00A0 NO-BREAK SPACE
103                return "\xC2\xA0";
104            case 'L':
105                // U+2028 LINE SEPARATOR
106                return "\xE2\x80\xA8";
107            case 'P':
108                // U+2029 PARAGRAPH SEPARATOR
109                return "\xE2\x80\xA9";
110            case 'x':
111                return self::utf8chr(hexdec(substr($value, 2, 2)));
112            case 'u':
113                return self::utf8chr(hexdec(substr($value, 2, 4)));
114            case 'U':
115                return self::utf8chr(hexdec(substr($value, 2, 8)));
116            default:
117                throw new ParseException(sprintf('Found unknown escape character "%s".', $value));
118        }
119    }
120
121    /**
122     * Get the UTF-8 character for the given code point.
123     */
124    private static function utf8chr(int $c): string
125    {
126        if (0x80 > $c %= 0x200000) {
127            return \chr($c);
128        }
129        if (0x800 > $c) {
130            return \chr(0xC0 | $c >> 6).\chr(0x80 | $c & 0x3F);
131        }
132        if (0x10000 > $c) {
133            return \chr(0xE0 | $c >> 12).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F);
134        }
135
136        return \chr(0xF0 | $c >> 18).\chr(0x80 | $c >> 12 & 0x3F).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F);
137    }
138}
139