1<?php
2
3/*
4 * This file is part of the Symfony package.
5 *
6 * (c) Fabien Potencier <fabien@symfony.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Symfony\Component\Yaml;
13
14use Symfony\Component\Yaml\Exception\ParseException;
15
16/**
17 * Unescaper encapsulates unescaping rules for single and double-quoted
18 * YAML strings.
19 *
20 * @author Matthew Lewinski <matthew@lewinski.org>
21 *
22 * @internal
23 */
24class Unescaper
25{
26    /**
27     * Regex fragment that matches an escaped character in a double quoted string.
28     */
29    public const REGEX_ESCAPED_CHARACTER = '\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|.)';
30
31    /**
32     * Unescapes a single quoted string.
33     *
34     * @param string $value A single quoted string
35     */
36    public function unescapeSingleQuotedString(string $value): string
37    {
38        return str_replace('\'\'', '\'', $value);
39    }
40
41    /**
42     * Unescapes a double quoted string.
43     *
44     * @param string $value A double quoted string
45     */
46    public function unescapeDoubleQuotedString(string $value): string
47    {
48        $callback = function ($match) {
49            return $this->unescapeCharacter($match[0]);
50        };
51
52        // evaluate the string
53        return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
54    }
55
56    /**
57     * Unescapes a character that was found in a double-quoted string.
58     *
59     * @param string $value An escaped character
60     */
61    private function unescapeCharacter(string $value): string
62    {
63        switch ($value[1]) {
64            case '0':
65                return "\x0";
66            case 'a':
67                return "\x7";
68            case 'b':
69                return "\x8";
70            case 't':
71                return "\t";
72            case "\t":
73                return "\t";
74            case 'n':
75                return "\n";
76            case 'v':
77                return "\xB";
78            case 'f':
79                return "\xC";
80            case 'r':
81                return "\r";
82            case 'e':
83                return "\x1B";
84            case ' ':
85                return ' ';
86            case '"':
87                return '"';
88            case '/':
89                return '/';
90            case '\\':
91                return '\\';
92            case 'N':
93                // U+0085 NEXT LINE
94                return "\xC2\x85";
95            case '_':
96                // U+00A0 NO-BREAK SPACE
97                return "\xC2\xA0";
98            case 'L':
99                // U+2028 LINE SEPARATOR
100                return "\xE2\x80\xA8";
101            case 'P':
102                // U+2029 PARAGRAPH SEPARATOR
103                return "\xE2\x80\xA9";
104            case 'x':
105                return self::utf8chr(hexdec(substr($value, 2, 2)));
106            case 'u':
107                return self::utf8chr(hexdec(substr($value, 2, 4)));
108            case 'U':
109                return self::utf8chr(hexdec(substr($value, 2, 8)));
110            default:
111                throw new ParseException(sprintf('Found unknown escape character "%s".', $value));
112        }
113    }
114
115    /**
116     * Get the UTF-8 character for the given code point.
117     */
118    private static function utf8chr(int $c): string
119    {
120        if (0x80 > $c %= 0x200000) {
121            return \chr($c);
122        }
123        if (0x800 > $c) {
124            return \chr(0xC0 | $c >> 6).\chr(0x80 | $c & 0x3F);
125        }
126        if (0x10000 > $c) {
127            return \chr(0xE0 | $c >> 12).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F);
128        }
129
130        return \chr(0xF0 | $c >> 18).\chr(0x80 | $c >> 12 & 0x3F).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F);
131    }
132}
133