1*04fd306cSNickeau<?php 2*04fd306cSNickeau 3*04fd306cSNickeau/* 4*04fd306cSNickeau * This file is part of the Symfony package. 5*04fd306cSNickeau * 6*04fd306cSNickeau * (c) Fabien Potencier <fabien@symfony.com> 7*04fd306cSNickeau * 8*04fd306cSNickeau * For the full copyright and license information, please view the LICENSE 9*04fd306cSNickeau * file that was distributed with this source code. 10*04fd306cSNickeau */ 11*04fd306cSNickeau 12*04fd306cSNickeaunamespace Symfony\Component\Yaml; 13*04fd306cSNickeau 14*04fd306cSNickeauuse Symfony\Component\Yaml\Exception\ParseException; 15*04fd306cSNickeau 16*04fd306cSNickeau/** 17*04fd306cSNickeau * Unescaper encapsulates unescaping rules for single and double-quoted 18*04fd306cSNickeau * YAML strings. 19*04fd306cSNickeau * 20*04fd306cSNickeau * @author Matthew Lewinski <matthew@lewinski.org> 21*04fd306cSNickeau * 22*04fd306cSNickeau * @internal 23*04fd306cSNickeau */ 24*04fd306cSNickeauclass Unescaper 25*04fd306cSNickeau{ 26*04fd306cSNickeau /** 27*04fd306cSNickeau * Regex fragment that matches an escaped character in a double quoted string. 28*04fd306cSNickeau */ 29*04fd306cSNickeau public const REGEX_ESCAPED_CHARACTER = '\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|.)'; 30*04fd306cSNickeau 31*04fd306cSNickeau /** 32*04fd306cSNickeau * Unescapes a single quoted string. 33*04fd306cSNickeau * 34*04fd306cSNickeau * @param string $value A single quoted string 35*04fd306cSNickeau */ 36*04fd306cSNickeau public function unescapeSingleQuotedString(string $value): string 37*04fd306cSNickeau { 38*04fd306cSNickeau return str_replace('\'\'', '\'', $value); 39*04fd306cSNickeau } 40*04fd306cSNickeau 41*04fd306cSNickeau /** 42*04fd306cSNickeau * Unescapes a double quoted string. 43*04fd306cSNickeau * 44*04fd306cSNickeau * @param string $value A double quoted string 45*04fd306cSNickeau */ 46*04fd306cSNickeau public function unescapeDoubleQuotedString(string $value): string 47*04fd306cSNickeau { 48*04fd306cSNickeau $callback = function ($match) { 49*04fd306cSNickeau return $this->unescapeCharacter($match[0]); 50*04fd306cSNickeau }; 51*04fd306cSNickeau 52*04fd306cSNickeau // evaluate the string 53*04fd306cSNickeau return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value); 54*04fd306cSNickeau } 55*04fd306cSNickeau 56*04fd306cSNickeau /** 57*04fd306cSNickeau * Unescapes a character that was found in a double-quoted string. 58*04fd306cSNickeau * 59*04fd306cSNickeau * @param string $value An escaped character 60*04fd306cSNickeau */ 61*04fd306cSNickeau private function unescapeCharacter(string $value): string 62*04fd306cSNickeau { 63*04fd306cSNickeau switch ($value[1]) { 64*04fd306cSNickeau case '0': 65*04fd306cSNickeau return "\x0"; 66*04fd306cSNickeau case 'a': 67*04fd306cSNickeau return "\x7"; 68*04fd306cSNickeau case 'b': 69*04fd306cSNickeau return "\x8"; 70*04fd306cSNickeau case 't': 71*04fd306cSNickeau return "\t"; 72*04fd306cSNickeau case "\t": 73*04fd306cSNickeau return "\t"; 74*04fd306cSNickeau case 'n': 75*04fd306cSNickeau return "\n"; 76*04fd306cSNickeau case 'v': 77*04fd306cSNickeau return "\xB"; 78*04fd306cSNickeau case 'f': 79*04fd306cSNickeau return "\xC"; 80*04fd306cSNickeau case 'r': 81*04fd306cSNickeau return "\r"; 82*04fd306cSNickeau case 'e': 83*04fd306cSNickeau return "\x1B"; 84*04fd306cSNickeau case ' ': 85*04fd306cSNickeau return ' '; 86*04fd306cSNickeau case '"': 87*04fd306cSNickeau return '"'; 88*04fd306cSNickeau case '/': 89*04fd306cSNickeau return '/'; 90*04fd306cSNickeau case '\\': 91*04fd306cSNickeau return '\\'; 92*04fd306cSNickeau case 'N': 93*04fd306cSNickeau // U+0085 NEXT LINE 94*04fd306cSNickeau return "\xC2\x85"; 95*04fd306cSNickeau case '_': 96*04fd306cSNickeau // U+00A0 NO-BREAK SPACE 97*04fd306cSNickeau return "\xC2\xA0"; 98*04fd306cSNickeau case 'L': 99*04fd306cSNickeau // U+2028 LINE SEPARATOR 100*04fd306cSNickeau return "\xE2\x80\xA8"; 101*04fd306cSNickeau case 'P': 102*04fd306cSNickeau // U+2029 PARAGRAPH SEPARATOR 103*04fd306cSNickeau return "\xE2\x80\xA9"; 104*04fd306cSNickeau case 'x': 105*04fd306cSNickeau return self::utf8chr(hexdec(substr($value, 2, 2))); 106*04fd306cSNickeau case 'u': 107*04fd306cSNickeau return self::utf8chr(hexdec(substr($value, 2, 4))); 108*04fd306cSNickeau case 'U': 109*04fd306cSNickeau return self::utf8chr(hexdec(substr($value, 2, 8))); 110*04fd306cSNickeau default: 111*04fd306cSNickeau throw new ParseException(sprintf('Found unknown escape character "%s".', $value)); 112*04fd306cSNickeau } 113*04fd306cSNickeau } 114*04fd306cSNickeau 115*04fd306cSNickeau /** 116*04fd306cSNickeau * Get the UTF-8 character for the given code point. 117*04fd306cSNickeau */ 118*04fd306cSNickeau private static function utf8chr(int $c): string 119*04fd306cSNickeau { 120*04fd306cSNickeau if (0x80 > $c %= 0x200000) { 121*04fd306cSNickeau return \chr($c); 122*04fd306cSNickeau } 123*04fd306cSNickeau if (0x800 > $c) { 124*04fd306cSNickeau return \chr(0xC0 | $c >> 6).\chr(0x80 | $c & 0x3F); 125*04fd306cSNickeau } 126*04fd306cSNickeau if (0x10000 > $c) { 127*04fd306cSNickeau return \chr(0xE0 | $c >> 12).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F); 128*04fd306cSNickeau } 129*04fd306cSNickeau 130*04fd306cSNickeau return \chr(0xF0 | $c >> 18).\chr(0x80 | $c >> 12 & 0x3F).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F); 131*04fd306cSNickeau } 132*04fd306cSNickeau} 133