1<?php 2 3/* 4 * This file is part of the Symfony package. 5 * 6 * (c) Fabien Potencier <fabien@symfony.com> 7 * 8 * For the full copyright and license information, please view the LICENSE 9 * file that was distributed with this source code. 10 */ 11 12namespace Symfony\Component\Yaml; 13 14use Symfony\Component\Yaml\Exception\ParseException; 15 16/** 17 * Unescaper encapsulates unescaping rules for single and double-quoted 18 * YAML strings. 19 * 20 * @author Matthew Lewinski <matthew@lewinski.org> 21 * 22 * @internal 23 */ 24class Unescaper 25{ 26 /** 27 * Regex fragment that matches an escaped character in a double quoted string. 28 */ 29 public const REGEX_ESCAPED_CHARACTER = '\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|.)'; 30 31 /** 32 * Unescapes a single quoted string. 33 * 34 * @param string $value A single quoted string 35 */ 36 public function unescapeSingleQuotedString(string $value): string 37 { 38 return str_replace('\'\'', '\'', $value); 39 } 40 41 /** 42 * Unescapes a double quoted string. 43 * 44 * @param string $value A double quoted string 45 */ 46 public function unescapeDoubleQuotedString(string $value): string 47 { 48 $callback = function ($match) { 49 return $this->unescapeCharacter($match[0]); 50 }; 51 52 // evaluate the string 53 return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value); 54 } 55 56 /** 57 * Unescapes a character that was found in a double-quoted string. 58 * 59 * @param string $value An escaped character 60 */ 61 private function unescapeCharacter(string $value): string 62 { 63 switch ($value[1]) { 64 case '0': 65 return "\x0"; 66 case 'a': 67 return "\x7"; 68 case 'b': 69 return "\x8"; 70 case 't': 71 return "\t"; 72 case "\t": 73 return "\t"; 74 case 'n': 75 return "\n"; 76 case 'v': 77 return "\xB"; 78 case 'f': 79 return "\xC"; 80 case 'r': 81 return "\r"; 82 case 'e': 83 return "\x1B"; 84 case ' ': 85 return ' '; 86 case '"': 87 return '"'; 88 case '/': 89 return '/'; 90 case '\\': 91 return '\\'; 92 case 'N': 93 // U+0085 NEXT LINE 94 return "\xC2\x85"; 95 case '_': 96 // U+00A0 NO-BREAK SPACE 97 return "\xC2\xA0"; 98 case 'L': 99 // U+2028 LINE SEPARATOR 100 return "\xE2\x80\xA8"; 101 case 'P': 102 // U+2029 PARAGRAPH SEPARATOR 103 return "\xE2\x80\xA9"; 104 case 'x': 105 return self::utf8chr(hexdec(substr($value, 2, 2))); 106 case 'u': 107 return self::utf8chr(hexdec(substr($value, 2, 4))); 108 case 'U': 109 return self::utf8chr(hexdec(substr($value, 2, 8))); 110 default: 111 throw new ParseException(sprintf('Found unknown escape character "%s".', $value)); 112 } 113 } 114 115 /** 116 * Get the UTF-8 character for the given code point. 117 */ 118 private static function utf8chr(int $c): string 119 { 120 if (0x80 > $c %= 0x200000) { 121 return \chr($c); 122 } 123 if (0x800 > $c) { 124 return \chr(0xC0 | $c >> 6).\chr(0x80 | $c & 0x3F); 125 } 126 if (0x10000 > $c) { 127 return \chr(0xE0 | $c >> 12).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F); 128 } 129 130 return \chr(0xF0 | $c >> 18).\chr(0x80 | $c >> 12 & 0x3F).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F); 131 } 132} 133