1<?php 2 3/* 4 * This file is part of the Symfony package. 5 * 6 * (c) Fabien Potencier <fabien@symfony.com> 7 * 8 * For the full copyright and license information, please view the LICENSE 9 * file that was distributed with this source code. 10 */ 11 12namespace Symfony\Component\Yaml; 13 14use Symfony\Component\Yaml\Exception\ParseException; 15 16/** 17 * Unescaper encapsulates unescaping rules for single and double-quoted 18 * YAML strings. 19 * 20 * @author Matthew Lewinski <matthew@lewinski.org> 21 * 22 * @internal 23 */ 24class Unescaper 25{ 26 /** 27 * Regex fragment that matches an escaped character in a double quoted string. 28 */ 29 const REGEX_ESCAPED_CHARACTER = '\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|.)'; 30 31 /** 32 * Unescapes a single quoted string. 33 * 34 * @param string $value A single quoted string 35 * 36 * @return string The unescaped string 37 */ 38 public function unescapeSingleQuotedString(string $value): string 39 { 40 return str_replace('\'\'', '\'', $value); 41 } 42 43 /** 44 * Unescapes a double quoted string. 45 * 46 * @param string $value A double quoted string 47 * 48 * @return string The unescaped string 49 */ 50 public function unescapeDoubleQuotedString(string $value): string 51 { 52 $callback = function ($match) { 53 return $this->unescapeCharacter($match[0]); 54 }; 55 56 // evaluate the string 57 return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value); 58 } 59 60 /** 61 * Unescapes a character that was found in a double-quoted string. 62 * 63 * @param string $value An escaped character 64 * 65 * @return string The unescaped character 66 */ 67 private function unescapeCharacter(string $value): string 68 { 69 switch ($value[1]) { 70 case '0': 71 return "\x0"; 72 case 'a': 73 return "\x7"; 74 case 'b': 75 return "\x8"; 76 case 't': 77 return "\t"; 78 case "\t": 79 return "\t"; 80 case 'n': 81 return "\n"; 82 case 'v': 83 return "\xB"; 84 case 'f': 85 return "\xC"; 86 case 'r': 87 return "\r"; 88 case 'e': 89 return "\x1B"; 90 case ' ': 91 return ' '; 92 case '"': 93 return '"'; 94 case '/': 95 return '/'; 96 case '\\': 97 return '\\'; 98 case 'N': 99 // U+0085 NEXT LINE 100 return "\xC2\x85"; 101 case '_': 102 // U+00A0 NO-BREAK SPACE 103 return "\xC2\xA0"; 104 case 'L': 105 // U+2028 LINE SEPARATOR 106 return "\xE2\x80\xA8"; 107 case 'P': 108 // U+2029 PARAGRAPH SEPARATOR 109 return "\xE2\x80\xA9"; 110 case 'x': 111 return self::utf8chr(hexdec(substr($value, 2, 2))); 112 case 'u': 113 return self::utf8chr(hexdec(substr($value, 2, 4))); 114 case 'U': 115 return self::utf8chr(hexdec(substr($value, 2, 8))); 116 default: 117 throw new ParseException(sprintf('Found unknown escape character "%s".', $value)); 118 } 119 } 120 121 /** 122 * Get the UTF-8 character for the given code point. 123 */ 124 private static function utf8chr(int $c): string 125 { 126 if (0x80 > $c %= 0x200000) { 127 return \chr($c); 128 } 129 if (0x800 > $c) { 130 return \chr(0xC0 | $c >> 6).\chr(0x80 | $c & 0x3F); 131 } 132 if (0x10000 > $c) { 133 return \chr(0xE0 | $c >> 12).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F); 134 } 135 136 return \chr(0xF0 | $c >> 18).\chr(0x80 | $c >> 12 & 0x3F).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F); 137 } 138} 139