1*04fd306cSNickeau<?php 2*04fd306cSNickeau 3*04fd306cSNickeau/* 4*04fd306cSNickeau * This file is part of the Symfony package. 5*04fd306cSNickeau * 6*04fd306cSNickeau * (c) Fabien Potencier <fabien@symfony.com> 7*04fd306cSNickeau * 8*04fd306cSNickeau * For the full copyright and license information, please view the LICENSE 9*04fd306cSNickeau * file that was distributed with this source code. 10*04fd306cSNickeau */ 11*04fd306cSNickeau 12*04fd306cSNickeaunamespace Symfony\Polyfill\Mbstring; 13*04fd306cSNickeau 14*04fd306cSNickeau/** 15*04fd306cSNickeau * Partial mbstring implementation in PHP, iconv based, UTF-8 centric. 16*04fd306cSNickeau * 17*04fd306cSNickeau * Implemented: 18*04fd306cSNickeau * - mb_chr - Returns a specific character from its Unicode code point 19*04fd306cSNickeau * - mb_convert_encoding - Convert character encoding 20*04fd306cSNickeau * - mb_convert_variables - Convert character code in variable(s) 21*04fd306cSNickeau * - mb_decode_mimeheader - Decode string in MIME header field 22*04fd306cSNickeau * - mb_encode_mimeheader - Encode string for MIME header XXX NATIVE IMPLEMENTATION IS REALLY BUGGED 23*04fd306cSNickeau * - mb_decode_numericentity - Decode HTML numeric string reference to character 24*04fd306cSNickeau * - mb_encode_numericentity - Encode character to HTML numeric string reference 25*04fd306cSNickeau * - mb_convert_case - Perform case folding on a string 26*04fd306cSNickeau * - mb_detect_encoding - Detect character encoding 27*04fd306cSNickeau * - mb_get_info - Get internal settings of mbstring 28*04fd306cSNickeau * - mb_http_input - Detect HTTP input character encoding 29*04fd306cSNickeau * - mb_http_output - Set/Get HTTP output character encoding 30*04fd306cSNickeau * - mb_internal_encoding - Set/Get internal character encoding 31*04fd306cSNickeau * - mb_list_encodings - Returns an array of all supported encodings 32*04fd306cSNickeau * - mb_ord - Returns the Unicode code point of a character 33*04fd306cSNickeau * - mb_output_handler - Callback function converts character encoding in output buffer 34*04fd306cSNickeau * - mb_scrub - Replaces ill-formed byte sequences with substitute characters 35*04fd306cSNickeau * - mb_strlen - Get string length 36*04fd306cSNickeau * - mb_strpos - Find position of first occurrence of string in a string 37*04fd306cSNickeau * - mb_strrpos - Find position of last occurrence of a string in a string 38*04fd306cSNickeau * - mb_str_split - Convert a string to an array 39*04fd306cSNickeau * - mb_strtolower - Make a string lowercase 40*04fd306cSNickeau * - mb_strtoupper - Make a string uppercase 41*04fd306cSNickeau * - mb_substitute_character - Set/Get substitution character 42*04fd306cSNickeau * - mb_substr - Get part of string 43*04fd306cSNickeau * - mb_stripos - Finds position of first occurrence of a string within another, case insensitive 44*04fd306cSNickeau * - mb_stristr - Finds first occurrence of a string within another, case insensitive 45*04fd306cSNickeau * - mb_strrchr - Finds the last occurrence of a character in a string within another 46*04fd306cSNickeau * - mb_strrichr - Finds the last occurrence of a character in a string within another, case insensitive 47*04fd306cSNickeau * - mb_strripos - Finds position of last occurrence of a string within another, case insensitive 48*04fd306cSNickeau * - mb_strstr - Finds first occurrence of a string within another 49*04fd306cSNickeau * - mb_strwidth - Return width of string 50*04fd306cSNickeau * - mb_substr_count - Count the number of substring occurrences 51*04fd306cSNickeau * 52*04fd306cSNickeau * Not implemented: 53*04fd306cSNickeau * - mb_convert_kana - Convert "kana" one from another ("zen-kaku", "han-kaku" and more) 54*04fd306cSNickeau * - mb_ereg_* - Regular expression with multibyte support 55*04fd306cSNickeau * - mb_parse_str - Parse GET/POST/COOKIE data and set global variable 56*04fd306cSNickeau * - mb_preferred_mime_name - Get MIME charset string 57*04fd306cSNickeau * - mb_regex_encoding - Returns current encoding for multibyte regex as string 58*04fd306cSNickeau * - mb_regex_set_options - Set/Get the default options for mbregex functions 59*04fd306cSNickeau * - mb_send_mail - Send encoded mail 60*04fd306cSNickeau * - mb_split - Split multibyte string using regular expression 61*04fd306cSNickeau * - mb_strcut - Get part of string 62*04fd306cSNickeau * - mb_strimwidth - Get truncated string with specified width 63*04fd306cSNickeau * 64*04fd306cSNickeau * @author Nicolas Grekas <p@tchwork.com> 65*04fd306cSNickeau * 66*04fd306cSNickeau * @internal 67*04fd306cSNickeau */ 68*04fd306cSNickeaufinal class Mbstring 69*04fd306cSNickeau{ 70*04fd306cSNickeau public const MB_CASE_FOLD = \PHP_INT_MAX; 71*04fd306cSNickeau 72*04fd306cSNickeau private const CASE_FOLD = [ 73*04fd306cSNickeau ['µ', 'ſ', "\xCD\x85", 'ς', "\xCF\x90", "\xCF\x91", "\xCF\x95", "\xCF\x96", "\xCF\xB0", "\xCF\xB1", "\xCF\xB5", "\xE1\xBA\x9B", "\xE1\xBE\xBE"], 74*04fd306cSNickeau ['μ', 's', 'ι', 'σ', 'β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', "\xE1\xB9\xA1", 'ι'], 75*04fd306cSNickeau ]; 76*04fd306cSNickeau 77*04fd306cSNickeau private static $encodingList = ['ASCII', 'UTF-8']; 78*04fd306cSNickeau private static $language = 'neutral'; 79*04fd306cSNickeau private static $internalEncoding = 'UTF-8'; 80*04fd306cSNickeau 81*04fd306cSNickeau public static function mb_convert_encoding($s, $toEncoding, $fromEncoding = null) 82*04fd306cSNickeau { 83*04fd306cSNickeau if (\is_array($fromEncoding) || ($fromEncoding !== null && false !== strpos($fromEncoding, ','))) { 84*04fd306cSNickeau $fromEncoding = self::mb_detect_encoding($s, $fromEncoding); 85*04fd306cSNickeau } else { 86*04fd306cSNickeau $fromEncoding = self::getEncoding($fromEncoding); 87*04fd306cSNickeau } 88*04fd306cSNickeau 89*04fd306cSNickeau $toEncoding = self::getEncoding($toEncoding); 90*04fd306cSNickeau 91*04fd306cSNickeau if ('BASE64' === $fromEncoding) { 92*04fd306cSNickeau $s = base64_decode($s); 93*04fd306cSNickeau $fromEncoding = $toEncoding; 94*04fd306cSNickeau } 95*04fd306cSNickeau 96*04fd306cSNickeau if ('BASE64' === $toEncoding) { 97*04fd306cSNickeau return base64_encode($s); 98*04fd306cSNickeau } 99*04fd306cSNickeau 100*04fd306cSNickeau if ('HTML-ENTITIES' === $toEncoding || 'HTML' === $toEncoding) { 101*04fd306cSNickeau if ('HTML-ENTITIES' === $fromEncoding || 'HTML' === $fromEncoding) { 102*04fd306cSNickeau $fromEncoding = 'Windows-1252'; 103*04fd306cSNickeau } 104*04fd306cSNickeau if ('UTF-8' !== $fromEncoding) { 105*04fd306cSNickeau $s = \iconv($fromEncoding, 'UTF-8//IGNORE', $s); 106*04fd306cSNickeau } 107*04fd306cSNickeau 108*04fd306cSNickeau return preg_replace_callback('/[\x80-\xFF]+/', [__CLASS__, 'html_encoding_callback'], $s); 109*04fd306cSNickeau } 110*04fd306cSNickeau 111*04fd306cSNickeau if ('HTML-ENTITIES' === $fromEncoding) { 112*04fd306cSNickeau $s = html_entity_decode($s, \ENT_COMPAT, 'UTF-8'); 113*04fd306cSNickeau $fromEncoding = 'UTF-8'; 114*04fd306cSNickeau } 115*04fd306cSNickeau 116*04fd306cSNickeau return \iconv($fromEncoding, $toEncoding.'//IGNORE', $s); 117*04fd306cSNickeau } 118*04fd306cSNickeau 119*04fd306cSNickeau public static function mb_convert_variables($toEncoding, $fromEncoding, &...$vars) 120*04fd306cSNickeau { 121*04fd306cSNickeau $ok = true; 122*04fd306cSNickeau array_walk_recursive($vars, function (&$v) use (&$ok, $toEncoding, $fromEncoding) { 123*04fd306cSNickeau if (false === $v = self::mb_convert_encoding($v, $toEncoding, $fromEncoding)) { 124*04fd306cSNickeau $ok = false; 125*04fd306cSNickeau } 126*04fd306cSNickeau }); 127*04fd306cSNickeau 128*04fd306cSNickeau return $ok ? $fromEncoding : false; 129*04fd306cSNickeau } 130*04fd306cSNickeau 131*04fd306cSNickeau public static function mb_decode_mimeheader($s) 132*04fd306cSNickeau { 133*04fd306cSNickeau return \iconv_mime_decode($s, 2, self::$internalEncoding); 134*04fd306cSNickeau } 135*04fd306cSNickeau 136*04fd306cSNickeau public static function mb_encode_mimeheader($s, $charset = null, $transferEncoding = null, $linefeed = null, $indent = null) 137*04fd306cSNickeau { 138*04fd306cSNickeau trigger_error('mb_encode_mimeheader() is bugged. Please use iconv_mime_encode() instead', \E_USER_WARNING); 139*04fd306cSNickeau } 140*04fd306cSNickeau 141*04fd306cSNickeau public static function mb_decode_numericentity($s, $convmap, $encoding = null) 142*04fd306cSNickeau { 143*04fd306cSNickeau if (null !== $s && !is_scalar($s) && !(\is_object($s) && method_exists($s, '__toString'))) { 144*04fd306cSNickeau trigger_error('mb_decode_numericentity() expects parameter 1 to be string, '.\gettype($s).' given', \E_USER_WARNING); 145*04fd306cSNickeau 146*04fd306cSNickeau return null; 147*04fd306cSNickeau } 148*04fd306cSNickeau 149*04fd306cSNickeau if (!\is_array($convmap) || (80000 > \PHP_VERSION_ID && !$convmap)) { 150*04fd306cSNickeau return false; 151*04fd306cSNickeau } 152*04fd306cSNickeau 153*04fd306cSNickeau if (null !== $encoding && !is_scalar($encoding)) { 154*04fd306cSNickeau trigger_error('mb_decode_numericentity() expects parameter 3 to be string, '.\gettype($s).' given', \E_USER_WARNING); 155*04fd306cSNickeau 156*04fd306cSNickeau return ''; // Instead of null (cf. mb_encode_numericentity). 157*04fd306cSNickeau } 158*04fd306cSNickeau 159*04fd306cSNickeau $s = (string) $s; 160*04fd306cSNickeau if ('' === $s) { 161*04fd306cSNickeau return ''; 162*04fd306cSNickeau } 163*04fd306cSNickeau 164*04fd306cSNickeau $encoding = self::getEncoding($encoding); 165*04fd306cSNickeau 166*04fd306cSNickeau if ('UTF-8' === $encoding) { 167*04fd306cSNickeau $encoding = null; 168*04fd306cSNickeau if (!preg_match('//u', $s)) { 169*04fd306cSNickeau $s = @\iconv('UTF-8', 'UTF-8//IGNORE', $s); 170*04fd306cSNickeau } 171*04fd306cSNickeau } else { 172*04fd306cSNickeau $s = \iconv($encoding, 'UTF-8//IGNORE', $s); 173*04fd306cSNickeau } 174*04fd306cSNickeau 175*04fd306cSNickeau $cnt = floor(\count($convmap) / 4) * 4; 176*04fd306cSNickeau 177*04fd306cSNickeau for ($i = 0; $i < $cnt; $i += 4) { 178*04fd306cSNickeau // collector_decode_htmlnumericentity ignores $convmap[$i + 3] 179*04fd306cSNickeau $convmap[$i] += $convmap[$i + 2]; 180*04fd306cSNickeau $convmap[$i + 1] += $convmap[$i + 2]; 181*04fd306cSNickeau } 182*04fd306cSNickeau 183*04fd306cSNickeau $s = preg_replace_callback('/&#(?:0*([0-9]+)|x0*([0-9a-fA-F]+))(?!&);?/', function (array $m) use ($cnt, $convmap) { 184*04fd306cSNickeau $c = isset($m[2]) ? (int) hexdec($m[2]) : $m[1]; 185*04fd306cSNickeau for ($i = 0; $i < $cnt; $i += 4) { 186*04fd306cSNickeau if ($c >= $convmap[$i] && $c <= $convmap[$i + 1]) { 187*04fd306cSNickeau return self::mb_chr($c - $convmap[$i + 2]); 188*04fd306cSNickeau } 189*04fd306cSNickeau } 190*04fd306cSNickeau 191*04fd306cSNickeau return $m[0]; 192*04fd306cSNickeau }, $s); 193*04fd306cSNickeau 194*04fd306cSNickeau if (null === $encoding) { 195*04fd306cSNickeau return $s; 196*04fd306cSNickeau } 197*04fd306cSNickeau 198*04fd306cSNickeau return \iconv('UTF-8', $encoding.'//IGNORE', $s); 199*04fd306cSNickeau } 200*04fd306cSNickeau 201*04fd306cSNickeau public static function mb_encode_numericentity($s, $convmap, $encoding = null, $is_hex = false) 202*04fd306cSNickeau { 203*04fd306cSNickeau if (null !== $s && !is_scalar($s) && !(\is_object($s) && method_exists($s, '__toString'))) { 204*04fd306cSNickeau trigger_error('mb_encode_numericentity() expects parameter 1 to be string, '.\gettype($s).' given', \E_USER_WARNING); 205*04fd306cSNickeau 206*04fd306cSNickeau return null; 207*04fd306cSNickeau } 208*04fd306cSNickeau 209*04fd306cSNickeau if (!\is_array($convmap) || (80000 > \PHP_VERSION_ID && !$convmap)) { 210*04fd306cSNickeau return false; 211*04fd306cSNickeau } 212*04fd306cSNickeau 213*04fd306cSNickeau if (null !== $encoding && !is_scalar($encoding)) { 214*04fd306cSNickeau trigger_error('mb_encode_numericentity() expects parameter 3 to be string, '.\gettype($s).' given', \E_USER_WARNING); 215*04fd306cSNickeau 216*04fd306cSNickeau return null; // Instead of '' (cf. mb_decode_numericentity). 217*04fd306cSNickeau } 218*04fd306cSNickeau 219*04fd306cSNickeau if (null !== $is_hex && !is_scalar($is_hex)) { 220*04fd306cSNickeau trigger_error('mb_encode_numericentity() expects parameter 4 to be boolean, '.\gettype($s).' given', \E_USER_WARNING); 221*04fd306cSNickeau 222*04fd306cSNickeau return null; 223*04fd306cSNickeau } 224*04fd306cSNickeau 225*04fd306cSNickeau $s = (string) $s; 226*04fd306cSNickeau if ('' === $s) { 227*04fd306cSNickeau return ''; 228*04fd306cSNickeau } 229*04fd306cSNickeau 230*04fd306cSNickeau $encoding = self::getEncoding($encoding); 231*04fd306cSNickeau 232*04fd306cSNickeau if ('UTF-8' === $encoding) { 233*04fd306cSNickeau $encoding = null; 234*04fd306cSNickeau if (!preg_match('//u', $s)) { 235*04fd306cSNickeau $s = @\iconv('UTF-8', 'UTF-8//IGNORE', $s); 236*04fd306cSNickeau } 237*04fd306cSNickeau } else { 238*04fd306cSNickeau $s = \iconv($encoding, 'UTF-8//IGNORE', $s); 239*04fd306cSNickeau } 240*04fd306cSNickeau 241*04fd306cSNickeau static $ulenMask = ["\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4]; 242*04fd306cSNickeau 243*04fd306cSNickeau $cnt = floor(\count($convmap) / 4) * 4; 244*04fd306cSNickeau $i = 0; 245*04fd306cSNickeau $len = \strlen($s); 246*04fd306cSNickeau $result = ''; 247*04fd306cSNickeau 248*04fd306cSNickeau while ($i < $len) { 249*04fd306cSNickeau $ulen = $s[$i] < "\x80" ? 1 : $ulenMask[$s[$i] & "\xF0"]; 250*04fd306cSNickeau $uchr = substr($s, $i, $ulen); 251*04fd306cSNickeau $i += $ulen; 252*04fd306cSNickeau $c = self::mb_ord($uchr); 253*04fd306cSNickeau 254*04fd306cSNickeau for ($j = 0; $j < $cnt; $j += 4) { 255*04fd306cSNickeau if ($c >= $convmap[$j] && $c <= $convmap[$j + 1]) { 256*04fd306cSNickeau $cOffset = ($c + $convmap[$j + 2]) & $convmap[$j + 3]; 257*04fd306cSNickeau $result .= $is_hex ? sprintf('&#x%X;', $cOffset) : '&#'.$cOffset.';'; 258*04fd306cSNickeau continue 2; 259*04fd306cSNickeau } 260*04fd306cSNickeau } 261*04fd306cSNickeau $result .= $uchr; 262*04fd306cSNickeau } 263*04fd306cSNickeau 264*04fd306cSNickeau if (null === $encoding) { 265*04fd306cSNickeau return $result; 266*04fd306cSNickeau } 267*04fd306cSNickeau 268*04fd306cSNickeau return \iconv('UTF-8', $encoding.'//IGNORE', $result); 269*04fd306cSNickeau } 270*04fd306cSNickeau 271*04fd306cSNickeau public static function mb_convert_case($s, $mode, $encoding = null) 272*04fd306cSNickeau { 273*04fd306cSNickeau $s = (string) $s; 274*04fd306cSNickeau if ('' === $s) { 275*04fd306cSNickeau return ''; 276*04fd306cSNickeau } 277*04fd306cSNickeau 278*04fd306cSNickeau $encoding = self::getEncoding($encoding); 279*04fd306cSNickeau 280*04fd306cSNickeau if ('UTF-8' === $encoding) { 281*04fd306cSNickeau $encoding = null; 282*04fd306cSNickeau if (!preg_match('//u', $s)) { 283*04fd306cSNickeau $s = @\iconv('UTF-8', 'UTF-8//IGNORE', $s); 284*04fd306cSNickeau } 285*04fd306cSNickeau } else { 286*04fd306cSNickeau $s = \iconv($encoding, 'UTF-8//IGNORE', $s); 287*04fd306cSNickeau } 288*04fd306cSNickeau 289*04fd306cSNickeau if (\MB_CASE_TITLE == $mode) { 290*04fd306cSNickeau static $titleRegexp = null; 291*04fd306cSNickeau if (null === $titleRegexp) { 292*04fd306cSNickeau $titleRegexp = self::getData('titleCaseRegexp'); 293*04fd306cSNickeau } 294*04fd306cSNickeau $s = preg_replace_callback($titleRegexp, [__CLASS__, 'title_case'], $s); 295*04fd306cSNickeau } else { 296*04fd306cSNickeau if (\MB_CASE_UPPER == $mode) { 297*04fd306cSNickeau static $upper = null; 298*04fd306cSNickeau if (null === $upper) { 299*04fd306cSNickeau $upper = self::getData('upperCase'); 300*04fd306cSNickeau } 301*04fd306cSNickeau $map = $upper; 302*04fd306cSNickeau } else { 303*04fd306cSNickeau if (self::MB_CASE_FOLD === $mode) { 304*04fd306cSNickeau $s = str_replace(self::CASE_FOLD[0], self::CASE_FOLD[1], $s); 305*04fd306cSNickeau } 306*04fd306cSNickeau 307*04fd306cSNickeau static $lower = null; 308*04fd306cSNickeau if (null === $lower) { 309*04fd306cSNickeau $lower = self::getData('lowerCase'); 310*04fd306cSNickeau } 311*04fd306cSNickeau $map = $lower; 312*04fd306cSNickeau } 313*04fd306cSNickeau 314*04fd306cSNickeau static $ulenMask = ["\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4]; 315*04fd306cSNickeau 316*04fd306cSNickeau $i = 0; 317*04fd306cSNickeau $len = \strlen($s); 318*04fd306cSNickeau 319*04fd306cSNickeau while ($i < $len) { 320*04fd306cSNickeau $ulen = $s[$i] < "\x80" ? 1 : $ulenMask[$s[$i] & "\xF0"]; 321*04fd306cSNickeau $uchr = substr($s, $i, $ulen); 322*04fd306cSNickeau $i += $ulen; 323*04fd306cSNickeau 324*04fd306cSNickeau if (isset($map[$uchr])) { 325*04fd306cSNickeau $uchr = $map[$uchr]; 326*04fd306cSNickeau $nlen = \strlen($uchr); 327*04fd306cSNickeau 328*04fd306cSNickeau if ($nlen == $ulen) { 329*04fd306cSNickeau $nlen = $i; 330*04fd306cSNickeau do { 331*04fd306cSNickeau $s[--$nlen] = $uchr[--$ulen]; 332*04fd306cSNickeau } while ($ulen); 333*04fd306cSNickeau } else { 334*04fd306cSNickeau $s = substr_replace($s, $uchr, $i - $ulen, $ulen); 335*04fd306cSNickeau $len += $nlen - $ulen; 336*04fd306cSNickeau $i += $nlen - $ulen; 337*04fd306cSNickeau } 338*04fd306cSNickeau } 339*04fd306cSNickeau } 340*04fd306cSNickeau } 341*04fd306cSNickeau 342*04fd306cSNickeau if (null === $encoding) { 343*04fd306cSNickeau return $s; 344*04fd306cSNickeau } 345*04fd306cSNickeau 346*04fd306cSNickeau return \iconv('UTF-8', $encoding.'//IGNORE', $s); 347*04fd306cSNickeau } 348*04fd306cSNickeau 349*04fd306cSNickeau public static function mb_internal_encoding($encoding = null) 350*04fd306cSNickeau { 351*04fd306cSNickeau if (null === $encoding) { 352*04fd306cSNickeau return self::$internalEncoding; 353*04fd306cSNickeau } 354*04fd306cSNickeau 355*04fd306cSNickeau $normalizedEncoding = self::getEncoding($encoding); 356*04fd306cSNickeau 357*04fd306cSNickeau if ('UTF-8' === $normalizedEncoding || false !== @\iconv($normalizedEncoding, $normalizedEncoding, ' ')) { 358*04fd306cSNickeau self::$internalEncoding = $normalizedEncoding; 359*04fd306cSNickeau 360*04fd306cSNickeau return true; 361*04fd306cSNickeau } 362*04fd306cSNickeau 363*04fd306cSNickeau if (80000 > \PHP_VERSION_ID) { 364*04fd306cSNickeau return false; 365*04fd306cSNickeau } 366*04fd306cSNickeau 367*04fd306cSNickeau throw new \ValueError(sprintf('Argument #1 ($encoding) must be a valid encoding, "%s" given', $encoding)); 368*04fd306cSNickeau } 369*04fd306cSNickeau 370*04fd306cSNickeau public static function mb_language($lang = null) 371*04fd306cSNickeau { 372*04fd306cSNickeau if (null === $lang) { 373*04fd306cSNickeau return self::$language; 374*04fd306cSNickeau } 375*04fd306cSNickeau 376*04fd306cSNickeau switch ($normalizedLang = strtolower($lang)) { 377*04fd306cSNickeau case 'uni': 378*04fd306cSNickeau case 'neutral': 379*04fd306cSNickeau self::$language = $normalizedLang; 380*04fd306cSNickeau 381*04fd306cSNickeau return true; 382*04fd306cSNickeau } 383*04fd306cSNickeau 384*04fd306cSNickeau if (80000 > \PHP_VERSION_ID) { 385*04fd306cSNickeau return false; 386*04fd306cSNickeau } 387*04fd306cSNickeau 388*04fd306cSNickeau throw new \ValueError(sprintf('Argument #1 ($language) must be a valid language, "%s" given', $lang)); 389*04fd306cSNickeau } 390*04fd306cSNickeau 391*04fd306cSNickeau public static function mb_list_encodings() 392*04fd306cSNickeau { 393*04fd306cSNickeau return ['UTF-8']; 394*04fd306cSNickeau } 395*04fd306cSNickeau 396*04fd306cSNickeau public static function mb_encoding_aliases($encoding) 397*04fd306cSNickeau { 398*04fd306cSNickeau switch (strtoupper($encoding)) { 399*04fd306cSNickeau case 'UTF8': 400*04fd306cSNickeau case 'UTF-8': 401*04fd306cSNickeau return ['utf8']; 402*04fd306cSNickeau } 403*04fd306cSNickeau 404*04fd306cSNickeau return false; 405*04fd306cSNickeau } 406*04fd306cSNickeau 407*04fd306cSNickeau public static function mb_check_encoding($var = null, $encoding = null) 408*04fd306cSNickeau { 409*04fd306cSNickeau if (null === $encoding) { 410*04fd306cSNickeau if (null === $var) { 411*04fd306cSNickeau return false; 412*04fd306cSNickeau } 413*04fd306cSNickeau $encoding = self::$internalEncoding; 414*04fd306cSNickeau } 415*04fd306cSNickeau 416*04fd306cSNickeau return self::mb_detect_encoding($var, [$encoding]) || false !== @\iconv($encoding, $encoding, $var); 417*04fd306cSNickeau } 418*04fd306cSNickeau 419*04fd306cSNickeau public static function mb_detect_encoding($str, $encodingList = null, $strict = false) 420*04fd306cSNickeau { 421*04fd306cSNickeau if (null === $encodingList) { 422*04fd306cSNickeau $encodingList = self::$encodingList; 423*04fd306cSNickeau } else { 424*04fd306cSNickeau if (!\is_array($encodingList)) { 425*04fd306cSNickeau $encodingList = array_map('trim', explode(',', $encodingList)); 426*04fd306cSNickeau } 427*04fd306cSNickeau $encodingList = array_map('strtoupper', $encodingList); 428*04fd306cSNickeau } 429*04fd306cSNickeau 430*04fd306cSNickeau foreach ($encodingList as $enc) { 431*04fd306cSNickeau switch ($enc) { 432*04fd306cSNickeau case 'ASCII': 433*04fd306cSNickeau if (!preg_match('/[\x80-\xFF]/', $str)) { 434*04fd306cSNickeau return $enc; 435*04fd306cSNickeau } 436*04fd306cSNickeau break; 437*04fd306cSNickeau 438*04fd306cSNickeau case 'UTF8': 439*04fd306cSNickeau case 'UTF-8': 440*04fd306cSNickeau if (preg_match('//u', $str)) { 441*04fd306cSNickeau return 'UTF-8'; 442*04fd306cSNickeau } 443*04fd306cSNickeau break; 444*04fd306cSNickeau 445*04fd306cSNickeau default: 446*04fd306cSNickeau if (0 === strncmp($enc, 'ISO-8859-', 9)) { 447*04fd306cSNickeau return $enc; 448*04fd306cSNickeau } 449*04fd306cSNickeau } 450*04fd306cSNickeau } 451*04fd306cSNickeau 452*04fd306cSNickeau return false; 453*04fd306cSNickeau } 454*04fd306cSNickeau 455*04fd306cSNickeau public static function mb_detect_order($encodingList = null) 456*04fd306cSNickeau { 457*04fd306cSNickeau if (null === $encodingList) { 458*04fd306cSNickeau return self::$encodingList; 459*04fd306cSNickeau } 460*04fd306cSNickeau 461*04fd306cSNickeau if (!\is_array($encodingList)) { 462*04fd306cSNickeau $encodingList = array_map('trim', explode(',', $encodingList)); 463*04fd306cSNickeau } 464*04fd306cSNickeau $encodingList = array_map('strtoupper', $encodingList); 465*04fd306cSNickeau 466*04fd306cSNickeau foreach ($encodingList as $enc) { 467*04fd306cSNickeau switch ($enc) { 468*04fd306cSNickeau default: 469*04fd306cSNickeau if (strncmp($enc, 'ISO-8859-', 9)) { 470*04fd306cSNickeau return false; 471*04fd306cSNickeau } 472*04fd306cSNickeau // no break 473*04fd306cSNickeau case 'ASCII': 474*04fd306cSNickeau case 'UTF8': 475*04fd306cSNickeau case 'UTF-8': 476*04fd306cSNickeau } 477*04fd306cSNickeau } 478*04fd306cSNickeau 479*04fd306cSNickeau self::$encodingList = $encodingList; 480*04fd306cSNickeau 481*04fd306cSNickeau return true; 482*04fd306cSNickeau } 483*04fd306cSNickeau 484*04fd306cSNickeau public static function mb_strlen($s, $encoding = null) 485*04fd306cSNickeau { 486*04fd306cSNickeau $encoding = self::getEncoding($encoding); 487*04fd306cSNickeau if ('CP850' === $encoding || 'ASCII' === $encoding) { 488*04fd306cSNickeau return \strlen($s); 489*04fd306cSNickeau } 490*04fd306cSNickeau 491*04fd306cSNickeau return @\iconv_strlen($s, $encoding); 492*04fd306cSNickeau } 493*04fd306cSNickeau 494*04fd306cSNickeau public static function mb_strpos($haystack, $needle, $offset = 0, $encoding = null) 495*04fd306cSNickeau { 496*04fd306cSNickeau $encoding = self::getEncoding($encoding); 497*04fd306cSNickeau if ('CP850' === $encoding || 'ASCII' === $encoding) { 498*04fd306cSNickeau return strpos($haystack, $needle, $offset); 499*04fd306cSNickeau } 500*04fd306cSNickeau 501*04fd306cSNickeau $needle = (string) $needle; 502*04fd306cSNickeau if ('' === $needle) { 503*04fd306cSNickeau if (80000 > \PHP_VERSION_ID) { 504*04fd306cSNickeau trigger_error(__METHOD__.': Empty delimiter', \E_USER_WARNING); 505*04fd306cSNickeau 506*04fd306cSNickeau return false; 507*04fd306cSNickeau } 508*04fd306cSNickeau 509*04fd306cSNickeau return 0; 510*04fd306cSNickeau } 511*04fd306cSNickeau 512*04fd306cSNickeau return \iconv_strpos($haystack, $needle, $offset, $encoding); 513*04fd306cSNickeau } 514*04fd306cSNickeau 515*04fd306cSNickeau public static function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null) 516*04fd306cSNickeau { 517*04fd306cSNickeau $encoding = self::getEncoding($encoding); 518*04fd306cSNickeau if ('CP850' === $encoding || 'ASCII' === $encoding) { 519*04fd306cSNickeau return strrpos($haystack, $needle, $offset); 520*04fd306cSNickeau } 521*04fd306cSNickeau 522*04fd306cSNickeau if ($offset != (int) $offset) { 523*04fd306cSNickeau $offset = 0; 524*04fd306cSNickeau } elseif ($offset = (int) $offset) { 525*04fd306cSNickeau if ($offset < 0) { 526*04fd306cSNickeau if (0 > $offset += self::mb_strlen($needle)) { 527*04fd306cSNickeau $haystack = self::mb_substr($haystack, 0, $offset, $encoding); 528*04fd306cSNickeau } 529*04fd306cSNickeau $offset = 0; 530*04fd306cSNickeau } else { 531*04fd306cSNickeau $haystack = self::mb_substr($haystack, $offset, 2147483647, $encoding); 532*04fd306cSNickeau } 533*04fd306cSNickeau } 534*04fd306cSNickeau 535*04fd306cSNickeau $pos = '' !== $needle || 80000 > \PHP_VERSION_ID 536*04fd306cSNickeau ? \iconv_strrpos($haystack, $needle, $encoding) 537*04fd306cSNickeau : self::mb_strlen($haystack, $encoding); 538*04fd306cSNickeau 539*04fd306cSNickeau return false !== $pos ? $offset + $pos : false; 540*04fd306cSNickeau } 541*04fd306cSNickeau 542*04fd306cSNickeau public static function mb_str_split($string, $split_length = 1, $encoding = null) 543*04fd306cSNickeau { 544*04fd306cSNickeau if (null !== $string && !is_scalar($string) && !(\is_object($string) && method_exists($string, '__toString'))) { 545*04fd306cSNickeau trigger_error('mb_str_split() expects parameter 1 to be string, '.\gettype($string).' given', \E_USER_WARNING); 546*04fd306cSNickeau 547*04fd306cSNickeau return null; 548*04fd306cSNickeau } 549*04fd306cSNickeau 550*04fd306cSNickeau if (1 > $split_length = (int) $split_length) { 551*04fd306cSNickeau if (80000 > \PHP_VERSION_ID) { 552*04fd306cSNickeau trigger_error('The length of each segment must be greater than zero', \E_USER_WARNING); 553*04fd306cSNickeau return false; 554*04fd306cSNickeau } 555*04fd306cSNickeau 556*04fd306cSNickeau throw new \ValueError('Argument #2 ($length) must be greater than 0'); 557*04fd306cSNickeau } 558*04fd306cSNickeau 559*04fd306cSNickeau if (null === $encoding) { 560*04fd306cSNickeau $encoding = mb_internal_encoding(); 561*04fd306cSNickeau } 562*04fd306cSNickeau 563*04fd306cSNickeau if ('UTF-8' === $encoding = self::getEncoding($encoding)) { 564*04fd306cSNickeau $rx = '/('; 565*04fd306cSNickeau while (65535 < $split_length) { 566*04fd306cSNickeau $rx .= '.{65535}'; 567*04fd306cSNickeau $split_length -= 65535; 568*04fd306cSNickeau } 569*04fd306cSNickeau $rx .= '.{'.$split_length.'})/us'; 570*04fd306cSNickeau 571*04fd306cSNickeau return preg_split($rx, $string, null, \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY); 572*04fd306cSNickeau } 573*04fd306cSNickeau 574*04fd306cSNickeau $result = []; 575*04fd306cSNickeau $length = mb_strlen($string, $encoding); 576*04fd306cSNickeau 577*04fd306cSNickeau for ($i = 0; $i < $length; $i += $split_length) { 578*04fd306cSNickeau $result[] = mb_substr($string, $i, $split_length, $encoding); 579*04fd306cSNickeau } 580*04fd306cSNickeau 581*04fd306cSNickeau return $result; 582*04fd306cSNickeau } 583*04fd306cSNickeau 584*04fd306cSNickeau public static function mb_strtolower($s, $encoding = null) 585*04fd306cSNickeau { 586*04fd306cSNickeau return self::mb_convert_case($s, \MB_CASE_LOWER, $encoding); 587*04fd306cSNickeau } 588*04fd306cSNickeau 589*04fd306cSNickeau public static function mb_strtoupper($s, $encoding = null) 590*04fd306cSNickeau { 591*04fd306cSNickeau return self::mb_convert_case($s, \MB_CASE_UPPER, $encoding); 592*04fd306cSNickeau } 593*04fd306cSNickeau 594*04fd306cSNickeau public static function mb_substitute_character($c = null) 595*04fd306cSNickeau { 596*04fd306cSNickeau if (null === $c) { 597*04fd306cSNickeau return 'none'; 598*04fd306cSNickeau } 599*04fd306cSNickeau if (0 === strcasecmp($c, 'none')) { 600*04fd306cSNickeau return true; 601*04fd306cSNickeau } 602*04fd306cSNickeau if (80000 > \PHP_VERSION_ID) { 603*04fd306cSNickeau return false; 604*04fd306cSNickeau } 605*04fd306cSNickeau if (\is_int($c) || 'long' === $c || 'entity' === $c) { 606*04fd306cSNickeau return false; 607*04fd306cSNickeau } 608*04fd306cSNickeau 609*04fd306cSNickeau throw new \ValueError('Argument #1 ($substitute_character) must be "none", "long", "entity" or a valid codepoint'); 610*04fd306cSNickeau } 611*04fd306cSNickeau 612*04fd306cSNickeau public static function mb_substr($s, $start, $length = null, $encoding = null) 613*04fd306cSNickeau { 614*04fd306cSNickeau $encoding = self::getEncoding($encoding); 615*04fd306cSNickeau if ('CP850' === $encoding || 'ASCII' === $encoding) { 616*04fd306cSNickeau return (string) substr($s, $start, null === $length ? 2147483647 : $length); 617*04fd306cSNickeau } 618*04fd306cSNickeau 619*04fd306cSNickeau if ($start < 0) { 620*04fd306cSNickeau $start = \iconv_strlen($s, $encoding) + $start; 621*04fd306cSNickeau if ($start < 0) { 622*04fd306cSNickeau $start = 0; 623*04fd306cSNickeau } 624*04fd306cSNickeau } 625*04fd306cSNickeau 626*04fd306cSNickeau if (null === $length) { 627*04fd306cSNickeau $length = 2147483647; 628*04fd306cSNickeau } elseif ($length < 0) { 629*04fd306cSNickeau $length = \iconv_strlen($s, $encoding) + $length - $start; 630*04fd306cSNickeau if ($length < 0) { 631*04fd306cSNickeau return ''; 632*04fd306cSNickeau } 633*04fd306cSNickeau } 634*04fd306cSNickeau 635*04fd306cSNickeau return (string) \iconv_substr($s, $start, $length, $encoding); 636*04fd306cSNickeau } 637*04fd306cSNickeau 638*04fd306cSNickeau public static function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) 639*04fd306cSNickeau { 640*04fd306cSNickeau $haystack = self::mb_convert_case($haystack, self::MB_CASE_FOLD, $encoding); 641*04fd306cSNickeau $needle = self::mb_convert_case($needle, self::MB_CASE_FOLD, $encoding); 642*04fd306cSNickeau 643*04fd306cSNickeau return self::mb_strpos($haystack, $needle, $offset, $encoding); 644*04fd306cSNickeau } 645*04fd306cSNickeau 646*04fd306cSNickeau public static function mb_stristr($haystack, $needle, $part = false, $encoding = null) 647*04fd306cSNickeau { 648*04fd306cSNickeau $pos = self::mb_stripos($haystack, $needle, 0, $encoding); 649*04fd306cSNickeau 650*04fd306cSNickeau return self::getSubpart($pos, $part, $haystack, $encoding); 651*04fd306cSNickeau } 652*04fd306cSNickeau 653*04fd306cSNickeau public static function mb_strrchr($haystack, $needle, $part = false, $encoding = null) 654*04fd306cSNickeau { 655*04fd306cSNickeau $encoding = self::getEncoding($encoding); 656*04fd306cSNickeau if ('CP850' === $encoding || 'ASCII' === $encoding) { 657*04fd306cSNickeau $pos = strrpos($haystack, $needle); 658*04fd306cSNickeau } else { 659*04fd306cSNickeau $needle = self::mb_substr($needle, 0, 1, $encoding); 660*04fd306cSNickeau $pos = \iconv_strrpos($haystack, $needle, $encoding); 661*04fd306cSNickeau } 662*04fd306cSNickeau 663*04fd306cSNickeau return self::getSubpart($pos, $part, $haystack, $encoding); 664*04fd306cSNickeau } 665*04fd306cSNickeau 666*04fd306cSNickeau public static function mb_strrichr($haystack, $needle, $part = false, $encoding = null) 667*04fd306cSNickeau { 668*04fd306cSNickeau $needle = self::mb_substr($needle, 0, 1, $encoding); 669*04fd306cSNickeau $pos = self::mb_strripos($haystack, $needle, $encoding); 670*04fd306cSNickeau 671*04fd306cSNickeau return self::getSubpart($pos, $part, $haystack, $encoding); 672*04fd306cSNickeau } 673*04fd306cSNickeau 674*04fd306cSNickeau public static function mb_strripos($haystack, $needle, $offset = 0, $encoding = null) 675*04fd306cSNickeau { 676*04fd306cSNickeau $haystack = self::mb_convert_case($haystack, self::MB_CASE_FOLD, $encoding); 677*04fd306cSNickeau $needle = self::mb_convert_case($needle, self::MB_CASE_FOLD, $encoding); 678*04fd306cSNickeau 679*04fd306cSNickeau return self::mb_strrpos($haystack, $needle, $offset, $encoding); 680*04fd306cSNickeau } 681*04fd306cSNickeau 682*04fd306cSNickeau public static function mb_strstr($haystack, $needle, $part = false, $encoding = null) 683*04fd306cSNickeau { 684*04fd306cSNickeau $pos = strpos($haystack, $needle); 685*04fd306cSNickeau if (false === $pos) { 686*04fd306cSNickeau return false; 687*04fd306cSNickeau } 688*04fd306cSNickeau if ($part) { 689*04fd306cSNickeau return substr($haystack, 0, $pos); 690*04fd306cSNickeau } 691*04fd306cSNickeau 692*04fd306cSNickeau return substr($haystack, $pos); 693*04fd306cSNickeau } 694*04fd306cSNickeau 695*04fd306cSNickeau public static function mb_get_info($type = 'all') 696*04fd306cSNickeau { 697*04fd306cSNickeau $info = [ 698*04fd306cSNickeau 'internal_encoding' => self::$internalEncoding, 699*04fd306cSNickeau 'http_output' => 'pass', 700*04fd306cSNickeau 'http_output_conv_mimetypes' => '^(text/|application/xhtml\+xml)', 701*04fd306cSNickeau 'func_overload' => 0, 702*04fd306cSNickeau 'func_overload_list' => 'no overload', 703*04fd306cSNickeau 'mail_charset' => 'UTF-8', 704*04fd306cSNickeau 'mail_header_encoding' => 'BASE64', 705*04fd306cSNickeau 'mail_body_encoding' => 'BASE64', 706*04fd306cSNickeau 'illegal_chars' => 0, 707*04fd306cSNickeau 'encoding_translation' => 'Off', 708*04fd306cSNickeau 'language' => self::$language, 709*04fd306cSNickeau 'detect_order' => self::$encodingList, 710*04fd306cSNickeau 'substitute_character' => 'none', 711*04fd306cSNickeau 'strict_detection' => 'Off', 712*04fd306cSNickeau ]; 713*04fd306cSNickeau 714*04fd306cSNickeau if ('all' === $type) { 715*04fd306cSNickeau return $info; 716*04fd306cSNickeau } 717*04fd306cSNickeau if (isset($info[$type])) { 718*04fd306cSNickeau return $info[$type]; 719*04fd306cSNickeau } 720*04fd306cSNickeau 721*04fd306cSNickeau return false; 722*04fd306cSNickeau } 723*04fd306cSNickeau 724*04fd306cSNickeau public static function mb_http_input($type = '') 725*04fd306cSNickeau { 726*04fd306cSNickeau return false; 727*04fd306cSNickeau } 728*04fd306cSNickeau 729*04fd306cSNickeau public static function mb_http_output($encoding = null) 730*04fd306cSNickeau { 731*04fd306cSNickeau return null !== $encoding ? 'pass' === $encoding : 'pass'; 732*04fd306cSNickeau } 733*04fd306cSNickeau 734*04fd306cSNickeau public static function mb_strwidth($s, $encoding = null) 735*04fd306cSNickeau { 736*04fd306cSNickeau $encoding = self::getEncoding($encoding); 737*04fd306cSNickeau 738*04fd306cSNickeau if ('UTF-8' !== $encoding) { 739*04fd306cSNickeau $s = \iconv($encoding, 'UTF-8//IGNORE', $s); 740*04fd306cSNickeau } 741*04fd306cSNickeau 742*04fd306cSNickeau $s = preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $s, -1, $wide); 743*04fd306cSNickeau 744*04fd306cSNickeau return ($wide << 1) + \iconv_strlen($s, 'UTF-8'); 745*04fd306cSNickeau } 746*04fd306cSNickeau 747*04fd306cSNickeau public static function mb_substr_count($haystack, $needle, $encoding = null) 748*04fd306cSNickeau { 749*04fd306cSNickeau return substr_count($haystack, $needle); 750*04fd306cSNickeau } 751*04fd306cSNickeau 752*04fd306cSNickeau public static function mb_output_handler($contents, $status) 753*04fd306cSNickeau { 754*04fd306cSNickeau return $contents; 755*04fd306cSNickeau } 756*04fd306cSNickeau 757*04fd306cSNickeau public static function mb_chr($code, $encoding = null) 758*04fd306cSNickeau { 759*04fd306cSNickeau if (0x80 > $code %= 0x200000) { 760*04fd306cSNickeau $s = \chr($code); 761*04fd306cSNickeau } elseif (0x800 > $code) { 762*04fd306cSNickeau $s = \chr(0xC0 | $code >> 6).\chr(0x80 | $code & 0x3F); 763*04fd306cSNickeau } elseif (0x10000 > $code) { 764*04fd306cSNickeau $s = \chr(0xE0 | $code >> 12).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F); 765*04fd306cSNickeau } else { 766*04fd306cSNickeau $s = \chr(0xF0 | $code >> 18).\chr(0x80 | $code >> 12 & 0x3F).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F); 767*04fd306cSNickeau } 768*04fd306cSNickeau 769*04fd306cSNickeau if ('UTF-8' !== $encoding = self::getEncoding($encoding)) { 770*04fd306cSNickeau $s = mb_convert_encoding($s, $encoding, 'UTF-8'); 771*04fd306cSNickeau } 772*04fd306cSNickeau 773*04fd306cSNickeau return $s; 774*04fd306cSNickeau } 775*04fd306cSNickeau 776*04fd306cSNickeau public static function mb_ord($s, $encoding = null) 777*04fd306cSNickeau { 778*04fd306cSNickeau if ('UTF-8' !== $encoding = self::getEncoding($encoding)) { 779*04fd306cSNickeau $s = mb_convert_encoding($s, 'UTF-8', $encoding); 780*04fd306cSNickeau } 781*04fd306cSNickeau 782*04fd306cSNickeau if (1 === \strlen($s)) { 783*04fd306cSNickeau return \ord($s); 784*04fd306cSNickeau } 785*04fd306cSNickeau 786*04fd306cSNickeau $code = ($s = unpack('C*', substr($s, 0, 4))) ? $s[1] : 0; 787*04fd306cSNickeau if (0xF0 <= $code) { 788*04fd306cSNickeau return (($code - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80; 789*04fd306cSNickeau } 790*04fd306cSNickeau if (0xE0 <= $code) { 791*04fd306cSNickeau return (($code - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80; 792*04fd306cSNickeau } 793*04fd306cSNickeau if (0xC0 <= $code) { 794*04fd306cSNickeau return (($code - 0xC0) << 6) + $s[2] - 0x80; 795*04fd306cSNickeau } 796*04fd306cSNickeau 797*04fd306cSNickeau return $code; 798*04fd306cSNickeau } 799*04fd306cSNickeau 800*04fd306cSNickeau private static function getSubpart($pos, $part, $haystack, $encoding) 801*04fd306cSNickeau { 802*04fd306cSNickeau if (false === $pos) { 803*04fd306cSNickeau return false; 804*04fd306cSNickeau } 805*04fd306cSNickeau if ($part) { 806*04fd306cSNickeau return self::mb_substr($haystack, 0, $pos, $encoding); 807*04fd306cSNickeau } 808*04fd306cSNickeau 809*04fd306cSNickeau return self::mb_substr($haystack, $pos, null, $encoding); 810*04fd306cSNickeau } 811*04fd306cSNickeau 812*04fd306cSNickeau private static function html_encoding_callback(array $m) 813*04fd306cSNickeau { 814*04fd306cSNickeau $i = 1; 815*04fd306cSNickeau $entities = ''; 816*04fd306cSNickeau $m = unpack('C*', htmlentities($m[0], \ENT_COMPAT, 'UTF-8')); 817*04fd306cSNickeau 818*04fd306cSNickeau while (isset($m[$i])) { 819*04fd306cSNickeau if (0x80 > $m[$i]) { 820*04fd306cSNickeau $entities .= \chr($m[$i++]); 821*04fd306cSNickeau continue; 822*04fd306cSNickeau } 823*04fd306cSNickeau if (0xF0 <= $m[$i]) { 824*04fd306cSNickeau $c = (($m[$i++] - 0xF0) << 18) + (($m[$i++] - 0x80) << 12) + (($m[$i++] - 0x80) << 6) + $m[$i++] - 0x80; 825*04fd306cSNickeau } elseif (0xE0 <= $m[$i]) { 826*04fd306cSNickeau $c = (($m[$i++] - 0xE0) << 12) + (($m[$i++] - 0x80) << 6) + $m[$i++] - 0x80; 827*04fd306cSNickeau } else { 828*04fd306cSNickeau $c = (($m[$i++] - 0xC0) << 6) + $m[$i++] - 0x80; 829*04fd306cSNickeau } 830*04fd306cSNickeau 831*04fd306cSNickeau $entities .= '&#'.$c.';'; 832*04fd306cSNickeau } 833*04fd306cSNickeau 834*04fd306cSNickeau return $entities; 835*04fd306cSNickeau } 836*04fd306cSNickeau 837*04fd306cSNickeau private static function title_case(array $s) 838*04fd306cSNickeau { 839*04fd306cSNickeau return self::mb_convert_case($s[1], \MB_CASE_UPPER, 'UTF-8').self::mb_convert_case($s[2], \MB_CASE_LOWER, 'UTF-8'); 840*04fd306cSNickeau } 841*04fd306cSNickeau 842*04fd306cSNickeau private static function getData($file) 843*04fd306cSNickeau { 844*04fd306cSNickeau if (file_exists($file = __DIR__.'/Resources/unidata/'.$file.'.php')) { 845*04fd306cSNickeau return require $file; 846*04fd306cSNickeau } 847*04fd306cSNickeau 848*04fd306cSNickeau return false; 849*04fd306cSNickeau } 850*04fd306cSNickeau 851*04fd306cSNickeau private static function getEncoding($encoding) 852*04fd306cSNickeau { 853*04fd306cSNickeau if (null === $encoding) { 854*04fd306cSNickeau return self::$internalEncoding; 855*04fd306cSNickeau } 856*04fd306cSNickeau 857*04fd306cSNickeau if ('UTF-8' === $encoding) { 858*04fd306cSNickeau return 'UTF-8'; 859*04fd306cSNickeau } 860*04fd306cSNickeau 861*04fd306cSNickeau $encoding = strtoupper($encoding); 862*04fd306cSNickeau 863*04fd306cSNickeau if ('8BIT' === $encoding || 'BINARY' === $encoding) { 864*04fd306cSNickeau return 'CP850'; 865*04fd306cSNickeau } 866*04fd306cSNickeau 867*04fd306cSNickeau if ('UTF8' === $encoding) { 868*04fd306cSNickeau return 'UTF-8'; 869*04fd306cSNickeau } 870*04fd306cSNickeau 871*04fd306cSNickeau return $encoding; 872*04fd306cSNickeau } 873*04fd306cSNickeau} 874