1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license LGPL 2.1 (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * check for mb_string support 11 */ 12if(!defined('UTF8_MBSTRING')){ 13 if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){ 14 define('UTF8_MBSTRING',1); 15 }else{ 16 define('UTF8_MBSTRING',0); 17 } 18} 19 20/** 21 * Check if PREG was compiled with UTF-8 support 22 * 23 * Without this many of the functions below will not work, so this is a minimal requirement 24 */ 25if(!defined('UTF8_PREGSUPPORT')){ 26 define('UTF8_PREGSUPPORT', (bool) @preg_match('/^.$/u', 'ñ')); 27} 28 29/** 30 * Check if PREG was compiled with Unicode Property support 31 * 32 * This is not required for the functions below, but might be needed in a UTF-8 aware application 33 */ 34if(!defined('UTF8_PROPERTYSUPPORT')){ 35 define('UTF8_PROPERTYSUPPORT', (bool) @preg_match('/^\pL$/u', 'ñ')); 36} 37 38 39if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } 40 41if(!function_exists('utf8_isASCII')){ 42 /** 43 * Checks if a string contains 7bit ASCII only 44 * 45 * @author Andreas Haerter <andreas.haerter@dev.mail-node.com> 46 * 47 * @param string $str 48 * @return bool 49 */ 50 function utf8_isASCII($str){ 51 return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); 52 } 53} 54 55if(!function_exists('utf8_strip')){ 56 /** 57 * Strips all highbyte chars 58 * 59 * Returns a pure ASCII7 string 60 * 61 * @author Andreas Gohr <andi@splitbrain.org> 62 * 63 * @param string $str 64 * @return string 65 */ 66 function utf8_strip($str){ 67 $ascii = ''; 68 $len = strlen($str); 69 for($i=0; $i<$len; $i++){ 70 if(ord($str[$i]) <128){ 71 $ascii .= $str[$i]; 72 } 73 } 74 return $ascii; 75 } 76} 77 78if(!function_exists('utf8_check')){ 79 /** 80 * Tries to detect if a string is in Unicode encoding 81 * 82 * @author <bmorel@ssi.fr> 83 * @link http://php.net/manual/en/function.utf8-encode.php 84 * 85 * @param string $Str 86 * @return bool 87 */ 88 function utf8_check($Str) { 89 $len = strlen($Str); 90 for ($i=0; $i<$len; $i++) { 91 $b = ord($Str[$i]); 92 if ($b < 0x80) continue; # 0bbbbbbb 93 elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb 94 elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb 95 elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb 96 elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb 97 elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b 98 else return false; # Does not match any model 99 100 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 101 if ((++$i == $len) || ((ord($Str[$i]) & 0xC0) != 0x80)) 102 return false; 103 } 104 } 105 return true; 106 } 107} 108 109if(!function_exists('utf8_basename')){ 110 /** 111 * A locale independent basename() implementation 112 * 113 * works around a bug in PHP's basename() implementation 114 * 115 * @see basename() 116 * @link https://bugs.php.net/bug.php?id=37738 117 * 118 * @param string $path A path 119 * @param string $suffix If the name component ends in suffix this will also be cut off 120 * @return string 121 */ 122 function utf8_basename($path, $suffix=''){ 123 $path = trim($path,'\\/'); 124 $rpos = max(strrpos($path, '/'), strrpos($path, '\\')); 125 if($rpos) $path = substr($path, $rpos+1); 126 127 $suflen = strlen($suffix); 128 if($suflen && (substr($path, -$suflen) == $suffix)){ 129 $path = substr($path, 0, -$suflen); 130 } 131 132 return $path; 133 } 134} 135 136if(!function_exists('utf8_strlen')){ 137 /** 138 * Unicode aware replacement for strlen() 139 * 140 * utf8_decode() converts characters that are not in ISO-8859-1 141 * to '?', which, for the purpose of counting, is alright - It's 142 * even faster than mb_strlen. 143 * 144 * @author <chernyshevsky at hotmail dot com> 145 * @see strlen() 146 * @see utf8_decode() 147 * 148 * @param string $string 149 * @return int 150 */ 151 function utf8_strlen($string) { 152 if (function_exists('utf8_decode')) { 153 return strlen(utf8_decode($string)); 154 } elseif (UTF8_MBSTRING) { 155 return mb_strlen($string, 'UTF-8'); 156 } elseif (function_exists('iconv_strlen')) { 157 return iconv_strlen($string, 'UTF-8'); 158 } else { 159 return strlen($string); 160 } 161 } 162} 163 164if(!function_exists('utf8_substr')){ 165 /** 166 * UTF-8 aware alternative to substr 167 * 168 * Return part of a string given character offset (and optionally length) 169 * 170 * @author Harry Fuecks <hfuecks@gmail.com> 171 * @author Chris Smith <chris@jalakai.co.uk> 172 * 173 * @param string $str 174 * @param int $offset number of UTF-8 characters offset (from left) 175 * @param int $length (optional) length in UTF-8 characters from offset 176 * @return string 177 */ 178 function utf8_substr($str, $offset, $length = null) { 179 if(UTF8_MBSTRING){ 180 if( $length === null ){ 181 return mb_substr($str, $offset); 182 }else{ 183 return mb_substr($str, $offset, $length); 184 } 185 } 186 187 /* 188 * Notes: 189 * 190 * no mb string support, so we'll use pcre regex's with 'u' flag 191 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 192 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 193 * 194 * substr documentation states false can be returned in some cases (e.g. offset > string length) 195 * mb_substr never returns false, it will return an empty string instead. 196 * 197 * calculating the number of characters in the string is a relatively expensive operation, so 198 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 199 */ 200 201 // cast parameters to appropriate types to avoid multiple notices/warnings 202 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 203 $offset = (int)$offset; 204 if (!is_null($length)) $length = (int)$length; 205 206 // handle trivial cases 207 if ($length === 0) return ''; 208 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 209 210 $offset_pattern = ''; 211 $length_pattern = ''; 212 213 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 214 if ($offset < 0) { 215 $strlen = utf8_strlen($str); // see notes 216 $offset = $strlen + $offset; 217 if ($offset < 0) $offset = 0; 218 } 219 220 // establish a pattern for offset, a non-captured group equal in length to offset 221 if ($offset > 0) { 222 $Ox = (int)($offset/65535); 223 $Oy = $offset%65535; 224 225 if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}'; 226 $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})'; 227 } else { 228 $offset_pattern = '^'; // offset == 0; just anchor the pattern 229 } 230 231 // establish a pattern for length 232 if (is_null($length)) { 233 $length_pattern = '(.*)$'; // the rest of the string 234 } else { 235 236 if (!isset($strlen)) $strlen = utf8_strlen($str); // see notes 237 if ($offset > $strlen) return ''; // another trivial case 238 239 if ($length > 0) { 240 241 $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string 242 243 $Lx = (int)($length/65535); 244 $Ly = $length%65535; 245 246 // +ve length requires ... a captured group of length characters 247 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 248 $length_pattern = '('.$length_pattern.'.{'.$Ly.'})'; 249 250 } else if ($length < 0) { 251 252 if ($length < ($offset - $strlen)) return ''; 253 254 $Lx = (int)((-$length)/65535); 255 $Ly = (-$length)%65535; 256 257 // -ve length requires ... capture everything except a group of -length characters 258 // anchored at the tail-end of the string 259 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 260 $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$'; 261 } 262 } 263 264 if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return ''; 265 return $match[1]; 266 } 267} 268 269if(!function_exists('utf8_substr_replace')){ 270 /** 271 * Unicode aware replacement for substr_replace() 272 * 273 * @author Andreas Gohr <andi@splitbrain.org> 274 * @see substr_replace() 275 * 276 * @param string $string input string 277 * @param string $replacement the replacement 278 * @param int $start the replacing will begin at the start'th offset into string. 279 * @param int $length If given and is positive, it represents the length of the portion of string which is 280 * to be replaced. If length is zero then this function will have the effect of inserting 281 * replacement into string at the given start offset. 282 * @return string 283 */ 284 function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 285 $ret = ''; 286 if($start>0) $ret .= utf8_substr($string, 0, $start); 287 $ret .= $replacement; 288 $ret .= utf8_substr($string, $start+$length); 289 return $ret; 290 } 291} 292 293if(!function_exists('utf8_ltrim')){ 294 /** 295 * Unicode aware replacement for ltrim() 296 * 297 * @author Andreas Gohr <andi@splitbrain.org> 298 * @see ltrim() 299 * 300 * @param string $str 301 * @param string $charlist 302 * @return string 303 */ 304 function utf8_ltrim($str,$charlist=''){ 305 if($charlist == '') return ltrim($str); 306 307 //quote charlist for use in a characterclass 308 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 309 310 return preg_replace('/^['.$charlist.']+/u','',$str); 311 } 312} 313 314if(!function_exists('utf8_rtrim')){ 315 /** 316 * Unicode aware replacement for rtrim() 317 * 318 * @author Andreas Gohr <andi@splitbrain.org> 319 * @see rtrim() 320 * 321 * @param string $str 322 * @param string $charlist 323 * @return string 324 */ 325 function utf8_rtrim($str,$charlist=''){ 326 if($charlist == '') return rtrim($str); 327 328 //quote charlist for use in a characterclass 329 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 330 331 return preg_replace('/['.$charlist.']+$/u','',$str); 332 } 333} 334 335if(!function_exists('utf8_trim')){ 336 /** 337 * Unicode aware replacement for trim() 338 * 339 * @author Andreas Gohr <andi@splitbrain.org> 340 * @see trim() 341 * 342 * @param string $str 343 * @param string $charlist 344 * @return string 345 */ 346 function utf8_trim($str,$charlist='') { 347 if($charlist == '') return trim($str); 348 349 return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist); 350 } 351} 352 353if(!function_exists('utf8_strtolower')){ 354 /** 355 * This is a unicode aware replacement for strtolower() 356 * 357 * Uses mb_string extension if available 358 * 359 * @author Leo Feyer <leo@typolight.org> 360 * @see strtolower() 361 * @see utf8_strtoupper() 362 * 363 * @param string $string 364 * @return string 365 */ 366 function utf8_strtolower($string){ 367 if(UTF8_MBSTRING) { 368 if (class_exists("Normalizer", $autoload = false)) 369 return normalizer::normalize(mb_strtolower($string,'utf-8')); 370 else 371 return (mb_strtolower($string,'utf-8')); 372 } 373 global $UTF8_UPPER_TO_LOWER; 374 return strtr($string,$UTF8_UPPER_TO_LOWER); 375 } 376} 377 378if(!function_exists('utf8_strtoupper')){ 379 /** 380 * This is a unicode aware replacement for strtoupper() 381 * 382 * Uses mb_string extension if available 383 * 384 * @author Leo Feyer <leo@typolight.org> 385 * @see strtoupper() 386 * @see utf8_strtoupper() 387 * 388 * @param string $string 389 * @return string 390 */ 391 function utf8_strtoupper($string){ 392 if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); 393 394 global $UTF8_LOWER_TO_UPPER; 395 return strtr($string,$UTF8_LOWER_TO_UPPER); 396 } 397} 398 399if(!function_exists('utf8_ucfirst')){ 400 /** 401 * UTF-8 aware alternative to ucfirst 402 * Make a string's first character uppercase 403 * 404 * @author Harry Fuecks 405 * 406 * @param string $str 407 * @return string with first character as upper case (if applicable) 408 */ 409 function utf8_ucfirst($str){ 410 switch ( utf8_strlen($str) ) { 411 case 0: 412 return ''; 413 case 1: 414 return utf8_strtoupper($str); 415 default: 416 preg_match('/^(.{1})(.*)$/us', $str, $matches); 417 return utf8_strtoupper($matches[1]).$matches[2]; 418 } 419 } 420} 421 422if(!function_exists('utf8_ucwords')){ 423 /** 424 * UTF-8 aware alternative to ucwords 425 * Uppercase the first character of each word in a string 426 * 427 * @author Harry Fuecks 428 * @see http://php.net/ucwords 429 * 430 * @param string $str 431 * @return string with first char of each word uppercase 432 */ 433 function utf8_ucwords($str) { 434 // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; 435 // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns 436 // This corresponds to the definition of a "word" defined at http://php.net/ucwords 437 $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; 438 439 return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str); 440 } 441 442 /** 443 * Callback function for preg_replace_callback call in utf8_ucwords 444 * You don't need to call this yourself 445 * 446 * @author Harry Fuecks 447 * @see utf8_ucwords 448 * @see utf8_strtoupper 449 * 450 * @param array $matches matches corresponding to a single word 451 * @return string with first char of the word in uppercase 452 */ 453 function utf8_ucwords_callback($matches) { 454 $leadingws = $matches[2]; 455 $ucfirst = utf8_strtoupper($matches[3]); 456 $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1); 457 return $leadingws . $ucword; 458 } 459} 460 461if(!function_exists('utf8_deaccent')){ 462 /** 463 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 464 * 465 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 466 * letters. Default is to deaccent both cases ($case = 0) 467 * 468 * @author Andreas Gohr <andi@splitbrain.org> 469 * 470 * @param string $string 471 * @param int $case 472 * @return string 473 */ 474 function utf8_deaccent($string,$case=0){ 475 if($case <= 0){ 476 global $UTF8_LOWER_ACCENTS; 477 $string = strtr($string,$UTF8_LOWER_ACCENTS); 478 } 479 if($case >= 0){ 480 global $UTF8_UPPER_ACCENTS; 481 $string = strtr($string,$UTF8_UPPER_ACCENTS); 482 } 483 return $string; 484 } 485} 486 487if(!function_exists('utf8_romanize')){ 488 /** 489 * Romanize a non-latin string 490 * 491 * @author Andreas Gohr <andi@splitbrain.org> 492 * 493 * @param string $string 494 * @return string 495 */ 496 function utf8_romanize($string){ 497 if(utf8_isASCII($string)) return $string; //nothing to do 498 499 global $UTF8_ROMANIZATION; 500 return strtr($string,$UTF8_ROMANIZATION); 501 } 502} 503 504if(!function_exists('utf8_stripspecials')){ 505 /** 506 * Removes special characters (nonalphanumeric) from a UTF-8 string 507 * 508 * This function adds the controlchars 0x00 to 0x19 to the array of 509 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 510 * 511 * @author Andreas Gohr <andi@splitbrain.org> 512 * 513 * @param string $string The UTF8 string to strip of special chars 514 * @param string $repl Replace special with this string 515 * @param string $additional Additional chars to strip (used in regexp char class) 516 * @return string 517 */ 518 function utf8_stripspecials($string,$repl='',$additional=''){ 519 global $UTF8_SPECIAL_CHARS2; 520 521 static $specials = null; 522 if(is_null($specials)){ 523 #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 524 $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/'); 525 } 526 527 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 528 } 529} 530 531if(!function_exists('utf8_strpos')){ 532 /** 533 * This is an Unicode aware replacement for strpos 534 * 535 * @author Leo Feyer <leo@typolight.org> 536 * @see strpos() 537 * 538 * @param string $haystack 539 * @param string $needle 540 * @param integer $offset 541 * @return integer 542 */ 543 function utf8_strpos($haystack, $needle, $offset=0){ 544 $comp = 0; 545 $length = null; 546 547 while (is_null($length) || $length < $offset) { 548 $pos = strpos($haystack, $needle, $offset + $comp); 549 550 if ($pos === false) 551 return false; 552 553 $length = utf8_strlen(substr($haystack, 0, $pos)); 554 555 if ($length < $offset) 556 $comp = $pos - $length; 557 } 558 559 return $length; 560 } 561} 562 563if(!function_exists('utf8_tohtml')){ 564 /** 565 * Encodes UTF-8 characters to HTML entities 566 * 567 * @author Tom N Harris <tnharris@whoopdedo.org> 568 * @author <vpribish at shopping dot com> 569 * @link http://php.net/manual/en/function.utf8-decode.php 570 * 571 * @param string $str 572 * @return string 573 */ 574 function utf8_tohtml ($str) { 575 $ret = ''; 576 foreach (utf8_to_unicode($str) as $cp) { 577 if ($cp < 0x80) 578 $ret .= chr($cp); 579 elseif ($cp < 0x100) 580 $ret .= "&#$cp;"; 581 else 582 $ret .= '&#x'.dechex($cp).';'; 583 } 584 return $ret; 585 } 586} 587 588if(!function_exists('utf8_unhtml')){ 589 /** 590 * Decodes HTML entities to UTF-8 characters 591 * 592 * Convert any &#..; entity to a codepoint, 593 * The entities flag defaults to only decoding numeric entities. 594 * Pass HTML_ENTITIES and named entities, including & < etc. 595 * are handled as well. Avoids the problem that would occur if you 596 * had to decode "&#38;&amp;#38;" 597 * 598 * unhtmlspecialchars(utf8_unhtml($s)) -> "&&" 599 * utf8_unhtml(unhtmlspecialchars($s)) -> "&&#38;" 600 * what it should be -> "&&#38;" 601 * 602 * @author Tom N Harris <tnharris@whoopdedo.org> 603 * 604 * @param string $str UTF-8 encoded string 605 * @param boolean $entities Flag controlling decoding of named entities. 606 * @return string UTF-8 encoded string with numeric (and named) entities replaced. 607 */ 608 function utf8_unhtml($str, $entities=null) { 609 static $decoder = null; 610 if (is_null($decoder)) 611 $decoder = new utf8_entity_decoder(); 612 if (is_null($entities)) 613 return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m', 614 'utf8_decode_numeric', $str); 615 else 616 return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m', 617 array(&$decoder, 'decode'), $str); 618 } 619} 620 621if(!function_exists('utf8_decode_numeric')){ 622 /** 623 * Decodes numeric HTML entities to their correct UTF-8 characters 624 * 625 * @param $ent string A numeric entity 626 * @return string|false 627 */ 628 function utf8_decode_numeric($ent) { 629 switch ($ent[2]) { 630 case 'X': 631 case 'x': 632 $cp = hexdec($ent[3]); 633 break; 634 default: 635 $cp = intval($ent[3]); 636 break; 637 } 638 return unicode_to_utf8(array($cp)); 639 } 640} 641 642if(!class_exists('utf8_entity_decoder')){ 643 /** 644 * Encapsulate HTML entity decoding tables 645 */ 646 class utf8_entity_decoder { 647 protected $table; 648 649 /** 650 * Initializes the decoding tables 651 */ 652 function __construct() { 653 $table = get_html_translation_table(HTML_ENTITIES); 654 $table = array_flip($table); 655 $this->table = array_map(array(&$this,'makeutf8'), $table); 656 } 657 658 /** 659 * Wrapper around unicode_to_utf8() 660 * 661 * @param string $c 662 * @return string|false 663 */ 664 function makeutf8($c) { 665 return unicode_to_utf8(array(ord($c))); 666 } 667 668 /** 669 * Decodes any HTML entity to it's correct UTF-8 char equivalent 670 * 671 * @param string $ent An entity 672 * @return string|false 673 */ 674 function decode($ent) { 675 if ($ent[1] == '#') { 676 return utf8_decode_numeric($ent); 677 } elseif (array_key_exists($ent[0],$this->table)) { 678 return $this->table[$ent[0]]; 679 } else { 680 return $ent[0]; 681 } 682 } 683 } 684} 685 686if(!function_exists('utf8_to_unicode')){ 687 /** 688 * Takes an UTF-8 string and returns an array of ints representing the 689 * Unicode characters. Astral planes are supported ie. the ints in the 690 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 691 * are not allowed. 692 * 693 * If $strict is set to true the function returns false if the input 694 * string isn't a valid UTF-8 octet sequence and raises a PHP error at 695 * level E_USER_WARNING 696 * 697 * Note: this function has been modified slightly in this library to 698 * trigger errors on encountering bad bytes 699 * 700 * @author <hsivonen@iki.fi> 701 * @author Harry Fuecks <hfuecks@gmail.com> 702 * @see unicode_to_utf8 703 * @link http://hsivonen.iki.fi/php-utf8/ 704 * @link http://sourceforge.net/projects/phputf8/ 705 * 706 * @param string $str UTF-8 encoded string 707 * @param boolean $strict Check for invalid sequences? 708 * @return mixed array of unicode code points or false if UTF-8 invalid 709 */ 710 function utf8_to_unicode($str,$strict=false) { 711 $mState = 0; // cached expected number of octets after the current octet 712 // until the beginning of the next UTF8 character sequence 713 $mUcs4 = 0; // cached Unicode character 714 $mBytes = 1; // cached expected number of octets in the current sequence 715 716 $out = array(); 717 718 $len = strlen($str); 719 720 for($i = 0; $i < $len; $i++) { 721 722 $in = ord($str[$i]); 723 724 if ( $mState == 0) { 725 726 // When mState is zero we expect either a US-ASCII character or a 727 // multi-octet sequence. 728 if (0 == (0x80 & ($in))) { 729 // US-ASCII, pass straight through. 730 $out[] = $in; 731 $mBytes = 1; 732 733 } else if (0xC0 == (0xE0 & ($in))) { 734 // First octet of 2 octet sequence 735 $mUcs4 = ($in); 736 $mUcs4 = ($mUcs4 & 0x1F) << 6; 737 $mState = 1; 738 $mBytes = 2; 739 740 } else if (0xE0 == (0xF0 & ($in))) { 741 // First octet of 3 octet sequence 742 $mUcs4 = ($in); 743 $mUcs4 = ($mUcs4 & 0x0F) << 12; 744 $mState = 2; 745 $mBytes = 3; 746 747 } else if (0xF0 == (0xF8 & ($in))) { 748 // First octet of 4 octet sequence 749 $mUcs4 = ($in); 750 $mUcs4 = ($mUcs4 & 0x07) << 18; 751 $mState = 3; 752 $mBytes = 4; 753 754 } else if (0xF8 == (0xFC & ($in))) { 755 /* First octet of 5 octet sequence. 756 * 757 * This is illegal because the encoded codepoint must be either 758 * (a) not the shortest form or 759 * (b) outside the Unicode range of 0-0x10FFFF. 760 * Rather than trying to resynchronize, we will carry on until the end 761 * of the sequence and let the later error handling code catch it. 762 */ 763 $mUcs4 = ($in); 764 $mUcs4 = ($mUcs4 & 0x03) << 24; 765 $mState = 4; 766 $mBytes = 5; 767 768 } else if (0xFC == (0xFE & ($in))) { 769 // First octet of 6 octet sequence, see comments for 5 octet sequence. 770 $mUcs4 = ($in); 771 $mUcs4 = ($mUcs4 & 1) << 30; 772 $mState = 5; 773 $mBytes = 6; 774 775 } elseif($strict) { 776 /* Current octet is neither in the US-ASCII range nor a legal first 777 * octet of a multi-octet sequence. 778 */ 779 trigger_error( 780 'utf8_to_unicode: Illegal sequence identifier '. 781 'in UTF-8 at byte '.$i, 782 E_USER_WARNING 783 ); 784 return false; 785 786 } 787 788 } else { 789 790 // When mState is non-zero, we expect a continuation of the multi-octet 791 // sequence 792 if (0x80 == (0xC0 & ($in))) { 793 794 // Legal continuation. 795 $shift = ($mState - 1) * 6; 796 $tmp = $in; 797 $tmp = ($tmp & 0x0000003F) << $shift; 798 $mUcs4 |= $tmp; 799 800 /** 801 * End of the multi-octet sequence. mUcs4 now contains the final 802 * Unicode codepoint to be output 803 */ 804 if (0 == --$mState) { 805 806 /* 807 * Check for illegal sequences and codepoints. 808 */ 809 // From Unicode 3.1, non-shortest form is illegal 810 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 811 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 812 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 813 (4 < $mBytes) || 814 // From Unicode 3.2, surrogate characters are illegal 815 (($mUcs4 & 0xFFFFF800) == 0xD800) || 816 // Codepoints outside the Unicode range are illegal 817 ($mUcs4 > 0x10FFFF)) { 818 819 if($strict){ 820 trigger_error( 821 'utf8_to_unicode: Illegal sequence or codepoint '. 822 'in UTF-8 at byte '.$i, 823 E_USER_WARNING 824 ); 825 826 return false; 827 } 828 829 } 830 831 if (0xFEFF != $mUcs4) { 832 // BOM is legal but we don't want to output it 833 $out[] = $mUcs4; 834 } 835 836 //initialize UTF8 cache 837 $mState = 0; 838 $mUcs4 = 0; 839 $mBytes = 1; 840 } 841 842 } elseif($strict) { 843 /** 844 *((0xC0 & (*in) != 0x80) && (mState != 0)) 845 * Incomplete multi-octet sequence. 846 */ 847 trigger_error( 848 'utf8_to_unicode: Incomplete multi-octet '. 849 ' sequence in UTF-8 at byte '.$i, 850 E_USER_WARNING 851 ); 852 853 return false; 854 } 855 } 856 } 857 return $out; 858 } 859} 860 861if(!function_exists('unicode_to_utf8')){ 862 /** 863 * Takes an array of ints representing the Unicode characters and returns 864 * a UTF-8 string. Astral planes are supported ie. the ints in the 865 * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 866 * are not allowed. 867 * 868 * If $strict is set to true the function returns false if the input 869 * array contains ints that represent surrogates or are outside the 870 * Unicode range and raises a PHP error at level E_USER_WARNING 871 * 872 * Note: this function has been modified slightly in this library to use 873 * output buffering to concatenate the UTF-8 string (faster) as well as 874 * reference the array by it's keys 875 * 876 * @param array $arr of unicode code points representing a string 877 * @param boolean $strict Check for invalid sequences? 878 * @return string|false UTF-8 string or false if array contains invalid code points 879 * 880 * @author <hsivonen@iki.fi> 881 * @author Harry Fuecks <hfuecks@gmail.com> 882 * @see utf8_to_unicode 883 * @link http://hsivonen.iki.fi/php-utf8/ 884 * @link http://sourceforge.net/projects/phputf8/ 885 */ 886 function unicode_to_utf8($arr,$strict=false) { 887 if (!is_array($arr)) return ''; 888 ob_start(); 889 890 foreach (array_keys($arr) as $k) { 891 892 if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { 893 # ASCII range (including control chars) 894 895 echo chr($arr[$k]); 896 897 } else if ($arr[$k] <= 0x07ff) { 898 # 2 byte sequence 899 900 echo chr(0xc0 | ($arr[$k] >> 6)); 901 echo chr(0x80 | ($arr[$k] & 0x003f)); 902 903 } else if($arr[$k] == 0xFEFF) { 904 # Byte order mark (skip) 905 906 // nop -- zap the BOM 907 908 } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { 909 # Test for illegal surrogates 910 911 // found a surrogate 912 if($strict){ 913 trigger_error( 914 'unicode_to_utf8: Illegal surrogate '. 915 'at index: '.$k.', value: '.$arr[$k], 916 E_USER_WARNING 917 ); 918 return false; 919 } 920 921 } else if ($arr[$k] <= 0xffff) { 922 # 3 byte sequence 923 924 echo chr(0xe0 | ($arr[$k] >> 12)); 925 echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); 926 echo chr(0x80 | ($arr[$k] & 0x003f)); 927 928 } else if ($arr[$k] <= 0x10ffff) { 929 # 4 byte sequence 930 931 echo chr(0xf0 | ($arr[$k] >> 18)); 932 echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); 933 echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); 934 echo chr(0x80 | ($arr[$k] & 0x3f)); 935 936 } elseif($strict) { 937 938 trigger_error( 939 'unicode_to_utf8: Codepoint out of Unicode range '. 940 'at index: '.$k.', value: '.$arr[$k], 941 E_USER_WARNING 942 ); 943 944 // out of range 945 return false; 946 } 947 } 948 949 $result = ob_get_contents(); 950 ob_end_clean(); 951 return $result; 952 } 953} 954 955if(!function_exists('utf8_to_utf16be')){ 956 /** 957 * UTF-8 to UTF-16BE conversion. 958 * 959 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 960 * 961 * @param string $str 962 * @param bool $bom 963 * @return string 964 */ 965 function utf8_to_utf16be(&$str, $bom = false) { 966 $out = $bom ? "\xFE\xFF" : ''; 967 if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 968 969 $uni = utf8_to_unicode($str); 970 foreach($uni as $cp){ 971 $out .= pack('n',$cp); 972 } 973 return $out; 974 } 975} 976 977if(!function_exists('utf16be_to_utf8')){ 978 /** 979 * UTF-8 to UTF-16BE conversion. 980 * 981 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 982 * 983 * @param string $str 984 * @return false|string 985 */ 986 function utf16be_to_utf8(&$str) { 987 $uni = unpack('n*',$str); 988 return unicode_to_utf8($uni); 989 } 990} 991 992if(!function_exists('utf8_bad_replace')){ 993 /** 994 * Replace bad bytes with an alternative character 995 * 996 * ASCII character is recommended for replacement char 997 * 998 * PCRE Pattern to locate bad bytes in a UTF-8 string 999 * Comes from W3 FAQ: Multilingual Forms 1000 * Note: modified to include full ASCII range including control chars 1001 * 1002 * @author Harry Fuecks <hfuecks@gmail.com> 1003 * @see http://www.w3.org/International/questions/qa-forms-utf-8 1004 * 1005 * @param string $str to search 1006 * @param string $replace to replace bad bytes with (defaults to '?') - use ASCII 1007 * @return string 1008 */ 1009 function utf8_bad_replace($str, $replace = '') { 1010 $UTF8_BAD = 1011 '([\x00-\x7F]'. # ASCII (including control chars) 1012 '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte 1013 '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs 1014 '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte 1015 '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates 1016 '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 1017 '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 1018 '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 1019 '|(.{1}))'; # invalid byte 1020 ob_start(); 1021 while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { 1022 if ( !isset($matches[2])) { 1023 echo $matches[0]; 1024 } else { 1025 echo $replace; 1026 } 1027 $str = substr($str,strlen($matches[0])); 1028 } 1029 $result = ob_get_contents(); 1030 ob_end_clean(); 1031 return $result; 1032 } 1033} 1034 1035if(!function_exists('utf8_correctIdx')){ 1036 /** 1037 * adjust a byte index into a utf8 string to a utf8 character boundary 1038 * 1039 * @param string $str utf8 character string 1040 * @param int $i byte index into $str 1041 * @param $next bool direction to search for boundary, 1042 * false = up (current character) 1043 * true = down (next character) 1044 * 1045 * @return int byte index into $str now pointing to a utf8 character boundary 1046 * 1047 * @author chris smith <chris@jalakai.co.uk> 1048 */ 1049 function utf8_correctIdx(&$str,$i,$next=false) { 1050 1051 if ($i <= 0) return 0; 1052 1053 $limit = strlen($str); 1054 if ($i>=$limit) return $limit; 1055 1056 if ($next) { 1057 while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; 1058 } else { 1059 while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; 1060 } 1061 1062 return $i; 1063 } 1064} 1065 1066// only needed if no mb_string available 1067if(!UTF8_MBSTRING){ 1068 /** 1069 * UTF-8 Case lookup table 1070 * 1071 * This lookuptable defines the upper case letters to their correspponding 1072 * lower case letter in UTF-8 1073 * 1074 * @author Andreas Gohr <andi@splitbrain.org> 1075 */ 1076 global $UTF8_LOWER_TO_UPPER; 1077 if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array( 1078 "z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T","s"=>"S","r"=>"R","q"=>"Q", 1079 "p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J","i"=>"I","h"=>"H","g"=>"G", 1080 "f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A","ῳ"=>"ῼ","ῥ"=>"Ῥ","ῡ"=>"Ῡ","ῑ"=>"Ῑ", 1081 "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ", 1082 "ᾣ"=>"ᾫ","ᾢ"=>"ᾪ","ᾡ"=>"ᾩ","ᾗ"=>"ᾟ","ᾖ"=>"ᾞ","ᾕ"=>"ᾝ","ᾔ"=>"ᾜ","ᾓ"=>"ᾛ","ᾒ"=>"ᾚ","ᾑ"=>"ᾙ", 1083 "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ", 1084 "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ", 1085 "ὲ"=>"Ὲ","ά"=>"Ά","ὰ"=>"Ὰ","ὧ"=>"Ὧ","ὦ"=>"Ὦ","ὥ"=>"Ὥ","ὤ"=>"Ὤ","ὣ"=>"Ὣ","ὢ"=>"Ὢ","ὡ"=>"Ὡ", 1086 "ὗ"=>"Ὗ","ὕ"=>"Ὕ","ὓ"=>"Ὓ","ὑ"=>"Ὑ","ὅ"=>"Ὅ","ὄ"=>"Ὄ","ὃ"=>"Ὃ","ὂ"=>"Ὂ","ὁ"=>"Ὁ","ὀ"=>"Ὀ", 1087 "ἷ"=>"Ἷ","ἶ"=>"Ἶ","ἵ"=>"Ἵ","ἴ"=>"Ἴ","ἳ"=>"Ἳ","ἲ"=>"Ἲ","ἱ"=>"Ἱ","ἰ"=>"Ἰ","ἧ"=>"Ἧ","ἦ"=>"Ἦ", 1088 "ἥ"=>"Ἥ","ἤ"=>"Ἤ","ἣ"=>"Ἣ","ἢ"=>"Ἢ","ἡ"=>"Ἡ","ἕ"=>"Ἕ","ἔ"=>"Ἔ","ἓ"=>"Ἓ","ἒ"=>"Ἒ","ἑ"=>"Ἑ", 1089 "ἐ"=>"Ἐ","ἇ"=>"Ἇ","ἆ"=>"Ἆ","ἅ"=>"Ἅ","ἄ"=>"Ἄ","ἃ"=>"Ἃ","ἂ"=>"Ἂ","ἁ"=>"Ἁ","ἀ"=>"Ἀ","ỹ"=>"Ỹ", 1090 "ỷ"=>"Ỷ","ỵ"=>"Ỵ","ỳ"=>"Ỳ","ự"=>"Ự","ữ"=>"Ữ","ử"=>"Ử","ừ"=>"Ừ","ứ"=>"Ứ","ủ"=>"Ủ","ụ"=>"Ụ", 1091 "ợ"=>"Ợ","ỡ"=>"Ỡ","ở"=>"Ở","ờ"=>"Ờ","ớ"=>"Ớ","ộ"=>"Ộ","ỗ"=>"Ỗ","ổ"=>"Ổ","ồ"=>"Ồ","ố"=>"Ố", 1092 "ỏ"=>"Ỏ","ọ"=>"Ọ","ị"=>"Ị","ỉ"=>"Ỉ","ệ"=>"Ệ","ễ"=>"Ễ","ể"=>"Ể","ề"=>"Ề","ế"=>"Ế","ẽ"=>"Ẽ", 1093 "ẻ"=>"Ẻ","ẹ"=>"Ẹ","ặ"=>"Ặ","ẵ"=>"Ẵ","ẳ"=>"Ẳ","ằ"=>"Ằ","ắ"=>"Ắ","ậ"=>"Ậ","ẫ"=>"Ẫ","ẩ"=>"Ẩ", 1094 "ầ"=>"Ầ","ấ"=>"Ấ","ả"=>"Ả","ạ"=>"Ạ","ẛ"=>"Ṡ","ẕ"=>"Ẕ","ẓ"=>"Ẓ","ẑ"=>"Ẑ","ẏ"=>"Ẏ","ẍ"=>"Ẍ", 1095 "ẋ"=>"Ẋ","ẉ"=>"Ẉ","ẇ"=>"Ẇ","ẅ"=>"Ẅ","ẃ"=>"Ẃ","ẁ"=>"Ẁ","ṿ"=>"Ṿ","ṽ"=>"Ṽ","ṻ"=>"Ṻ","ṹ"=>"Ṹ", 1096 "ṷ"=>"Ṷ","ṵ"=>"Ṵ","ṳ"=>"Ṳ","ṱ"=>"Ṱ","ṯ"=>"Ṯ","ṭ"=>"Ṭ","ṫ"=>"Ṫ","ṩ"=>"Ṩ","ṧ"=>"Ṧ","ṥ"=>"Ṥ", 1097 "ṣ"=>"Ṣ","ṡ"=>"Ṡ","ṟ"=>"Ṟ","ṝ"=>"Ṝ","ṛ"=>"Ṛ","ṙ"=>"Ṙ","ṗ"=>"Ṗ","ṕ"=>"Ṕ","ṓ"=>"Ṓ","ṑ"=>"Ṑ", 1098 "ṏ"=>"Ṏ","ṍ"=>"Ṍ","ṋ"=>"Ṋ","ṉ"=>"Ṉ","ṇ"=>"Ṇ","ṅ"=>"Ṅ","ṃ"=>"Ṃ","ṁ"=>"Ṁ","ḿ"=>"Ḿ","ḽ"=>"Ḽ", 1099 "ḻ"=>"Ḻ","ḹ"=>"Ḹ","ḷ"=>"Ḷ","ḵ"=>"Ḵ","ḳ"=>"Ḳ","ḱ"=>"Ḱ","ḯ"=>"Ḯ","ḭ"=>"Ḭ","ḫ"=>"Ḫ","ḩ"=>"Ḩ", 1100 "ḧ"=>"Ḧ","ḥ"=>"Ḥ","ḣ"=>"Ḣ","ḡ"=>"Ḡ","ḟ"=>"Ḟ","ḝ"=>"Ḝ","ḛ"=>"Ḛ","ḙ"=>"Ḙ","ḗ"=>"Ḗ","ḕ"=>"Ḕ", 1101 "ḓ"=>"Ḓ","ḑ"=>"Ḑ","ḏ"=>"Ḏ","ḍ"=>"Ḍ","ḋ"=>"Ḋ","ḉ"=>"Ḉ","ḇ"=>"Ḇ","ḅ"=>"Ḅ","ḃ"=>"Ḃ","ḁ"=>"Ḁ", 1102 "ֆ"=>"Ֆ","օ"=>"Օ","ք"=>"Ք","փ"=>"Փ","ւ"=>"Ւ","ց"=>"Ց","ր"=>"Ր","տ"=>"Տ","վ"=>"Վ","ս"=>"Ս", 1103 "ռ"=>"Ռ","ջ"=>"Ջ","պ"=>"Պ","չ"=>"Չ","ո"=>"Ո","շ"=>"Շ","ն"=>"Ն","յ"=>"Յ","մ"=>"Մ","ճ"=>"Ճ", 1104 "ղ"=>"Ղ","ձ"=>"Ձ","հ"=>"Հ","կ"=>"Կ","ծ"=>"Ծ","խ"=>"Խ","լ"=>"Լ","ի"=>"Ի","ժ"=>"Ժ","թ"=>"Թ", 1105 "ը"=>"Ը","է"=>"Է","զ"=>"Զ","ե"=>"Ե","դ"=>"Դ","գ"=>"Գ","բ"=>"Բ","ա"=>"Ա","ԏ"=>"Ԏ","ԍ"=>"Ԍ", 1106 "ԋ"=>"Ԋ","ԉ"=>"Ԉ","ԇ"=>"Ԇ","ԅ"=>"Ԅ","ԃ"=>"Ԃ","ԁ"=>"Ԁ","ӹ"=>"Ӹ","ӵ"=>"Ӵ","ӳ"=>"Ӳ","ӱ"=>"Ӱ", 1107 "ӯ"=>"Ӯ","ӭ"=>"Ӭ","ӫ"=>"Ӫ","ө"=>"Ө","ӧ"=>"Ӧ","ӥ"=>"Ӥ","ӣ"=>"Ӣ","ӡ"=>"Ӡ","ӟ"=>"Ӟ","ӝ"=>"Ӝ", 1108 "ӛ"=>"Ӛ","ә"=>"Ә","ӗ"=>"Ӗ","ӕ"=>"Ӕ","ӓ"=>"Ӓ","ӑ"=>"Ӑ","ӎ"=>"Ӎ","ӌ"=>"Ӌ","ӊ"=>"Ӊ","ӈ"=>"Ӈ", 1109 "ӆ"=>"Ӆ","ӄ"=>"Ӄ","ӂ"=>"Ӂ","ҿ"=>"Ҿ","ҽ"=>"Ҽ","һ"=>"Һ","ҹ"=>"Ҹ","ҷ"=>"Ҷ","ҵ"=>"Ҵ","ҳ"=>"Ҳ", 1110 "ұ"=>"Ұ","ү"=>"Ү","ҭ"=>"Ҭ","ҫ"=>"Ҫ","ҩ"=>"Ҩ","ҧ"=>"Ҧ","ҥ"=>"Ҥ","ң"=>"Ң","ҡ"=>"Ҡ","ҟ"=>"Ҟ", 1111 "ҝ"=>"Ҝ","қ"=>"Қ","ҙ"=>"Ҙ","җ"=>"Җ","ҕ"=>"Ҕ","ғ"=>"Ғ","ґ"=>"Ґ","ҏ"=>"Ҏ","ҍ"=>"Ҍ","ҋ"=>"Ҋ", 1112 "ҁ"=>"Ҁ","ѿ"=>"Ѿ","ѽ"=>"Ѽ","ѻ"=>"Ѻ","ѹ"=>"Ѹ","ѷ"=>"Ѷ","ѵ"=>"Ѵ","ѳ"=>"Ѳ","ѱ"=>"Ѱ","ѯ"=>"Ѯ", 1113 "ѭ"=>"Ѭ","ѫ"=>"Ѫ","ѩ"=>"Ѩ","ѧ"=>"Ѧ","ѥ"=>"Ѥ","ѣ"=>"Ѣ","ѡ"=>"Ѡ","џ"=>"Џ","ў"=>"Ў","ѝ"=>"Ѝ", 1114 "ќ"=>"Ќ","ћ"=>"Ћ","њ"=>"Њ","љ"=>"Љ","ј"=>"Ј","ї"=>"Ї","і"=>"І","ѕ"=>"Ѕ","є"=>"Є","ѓ"=>"Ѓ", 1115 "ђ"=>"Ђ","ё"=>"Ё","ѐ"=>"Ѐ","я"=>"Я","ю"=>"Ю","э"=>"Э","ь"=>"Ь","ы"=>"Ы","ъ"=>"Ъ","щ"=>"Щ", 1116 "ш"=>"Ш","ч"=>"Ч","ц"=>"Ц","х"=>"Х","ф"=>"Ф","у"=>"У","т"=>"Т","с"=>"С","р"=>"Р","п"=>"П", 1117 "о"=>"О","н"=>"Н","м"=>"М","л"=>"Л","к"=>"К","й"=>"Й","и"=>"И","з"=>"З","ж"=>"Ж","е"=>"Е", 1118 "д"=>"Д","г"=>"Г","в"=>"В","б"=>"Б","а"=>"А","ϵ"=>"Ε","ϲ"=>"Σ","ϱ"=>"Ρ","ϰ"=>"Κ","ϯ"=>"Ϯ", 1119 "ϭ"=>"Ϭ","ϫ"=>"Ϫ","ϩ"=>"Ϩ","ϧ"=>"Ϧ","ϥ"=>"Ϥ","ϣ"=>"Ϣ","ϡ"=>"Ϡ","ϟ"=>"Ϟ","ϝ"=>"Ϝ","ϛ"=>"Ϛ", 1120 "ϙ"=>"Ϙ","ϖ"=>"Π","ϕ"=>"Φ","ϑ"=>"Θ","ϐ"=>"Β","ώ"=>"Ώ","ύ"=>"Ύ","ό"=>"Ό","ϋ"=>"Ϋ","ϊ"=>"Ϊ", 1121 "ω"=>"Ω","ψ"=>"Ψ","χ"=>"Χ","φ"=>"Φ","υ"=>"Υ","τ"=>"Τ","σ"=>"Σ","ς"=>"Σ","ρ"=>"Ρ","π"=>"Π", 1122 "ο"=>"Ο","ξ"=>"Ξ","ν"=>"Ν","μ"=>"Μ","λ"=>"Λ","κ"=>"Κ","ι"=>"Ι","θ"=>"Θ","η"=>"Η","ζ"=>"Ζ", 1123 "ε"=>"Ε","δ"=>"Δ","γ"=>"Γ","β"=>"Β","α"=>"Α","ί"=>"Ί","ή"=>"Ή","έ"=>"Έ","ά"=>"Ά","ʒ"=>"Ʒ", 1124 "ʋ"=>"Ʋ","ʊ"=>"Ʊ","ʈ"=>"Ʈ","ʃ"=>"Ʃ","ʀ"=>"Ʀ","ɵ"=>"Ɵ","ɲ"=>"Ɲ","ɯ"=>"Ɯ","ɩ"=>"Ɩ","ɨ"=>"Ɨ", 1125 "ɣ"=>"Ɣ","ɛ"=>"Ɛ","ə"=>"Ə","ɗ"=>"Ɗ","ɖ"=>"Ɖ","ɔ"=>"Ɔ","ɓ"=>"Ɓ","ȳ"=>"Ȳ","ȱ"=>"Ȱ","ȯ"=>"Ȯ", 1126 "ȭ"=>"Ȭ","ȫ"=>"Ȫ","ȩ"=>"Ȩ","ȧ"=>"Ȧ","ȥ"=>"Ȥ","ȣ"=>"Ȣ","ȟ"=>"Ȟ","ȝ"=>"Ȝ","ț"=>"Ț","ș"=>"Ș", 1127 "ȗ"=>"Ȗ","ȕ"=>"Ȕ","ȓ"=>"Ȓ","ȑ"=>"Ȑ","ȏ"=>"Ȏ","ȍ"=>"Ȍ","ȋ"=>"Ȋ","ȉ"=>"Ȉ","ȇ"=>"Ȇ","ȅ"=>"Ȅ", 1128 "ȃ"=>"Ȃ","ȁ"=>"Ȁ","ǿ"=>"Ǿ","ǽ"=>"Ǽ","ǻ"=>"Ǻ","ǹ"=>"Ǹ","ǵ"=>"Ǵ","dz"=>"Dz","ǯ"=>"Ǯ","ǭ"=>"Ǭ", 1129 "ǫ"=>"Ǫ","ǩ"=>"Ǩ","ǧ"=>"Ǧ","ǥ"=>"Ǥ","ǣ"=>"Ǣ","ǡ"=>"Ǡ","ǟ"=>"Ǟ","ǝ"=>"Ǝ","ǜ"=>"Ǜ","ǚ"=>"Ǚ", 1130 "ǘ"=>"Ǘ","ǖ"=>"Ǖ","ǔ"=>"Ǔ","ǒ"=>"Ǒ","ǐ"=>"Ǐ","ǎ"=>"Ǎ","nj"=>"Nj","lj"=>"Lj","dž"=>"Dž","ƿ"=>"Ƿ", 1131 "ƽ"=>"Ƽ","ƹ"=>"Ƹ","ƶ"=>"Ƶ","ƴ"=>"Ƴ","ư"=>"Ư","ƭ"=>"Ƭ","ƨ"=>"Ƨ","ƥ"=>"Ƥ","ƣ"=>"Ƣ","ơ"=>"Ơ", 1132 "ƞ"=>"Ƞ","ƙ"=>"Ƙ","ƕ"=>"Ƕ","ƒ"=>"Ƒ","ƌ"=>"Ƌ","ƈ"=>"Ƈ","ƅ"=>"Ƅ","ƃ"=>"Ƃ","ſ"=>"S","ž"=>"Ž", 1133 "ż"=>"Ż","ź"=>"Ź","ŷ"=>"Ŷ","ŵ"=>"Ŵ","ų"=>"Ų","ű"=>"Ű","ů"=>"Ů","ŭ"=>"Ŭ","ū"=>"Ū","ũ"=>"Ũ", 1134 "ŧ"=>"Ŧ","ť"=>"Ť","ţ"=>"Ţ","š"=>"Š","ş"=>"Ş","ŝ"=>"Ŝ","ś"=>"Ś","ř"=>"Ř","ŗ"=>"Ŗ","ŕ"=>"Ŕ", 1135 "œ"=>"Œ","ő"=>"Ő","ŏ"=>"Ŏ","ō"=>"Ō","ŋ"=>"Ŋ","ň"=>"Ň","ņ"=>"Ņ","ń"=>"Ń","ł"=>"Ł","ŀ"=>"Ŀ", 1136 "ľ"=>"Ľ","ļ"=>"Ļ","ĺ"=>"Ĺ","ķ"=>"Ķ","ĵ"=>"Ĵ","ij"=>"IJ","ı"=>"I","į"=>"Į","ĭ"=>"Ĭ","ī"=>"Ī", 1137 "ĩ"=>"Ĩ","ħ"=>"Ħ","ĥ"=>"Ĥ","ģ"=>"Ģ","ġ"=>"Ġ","ğ"=>"Ğ","ĝ"=>"Ĝ","ě"=>"Ě","ę"=>"Ę","ė"=>"Ė", 1138 "ĕ"=>"Ĕ","ē"=>"Ē","đ"=>"Đ","ď"=>"Ď","č"=>"Č","ċ"=>"Ċ","ĉ"=>"Ĉ","ć"=>"Ć","ą"=>"Ą","ă"=>"Ă", 1139 "ā"=>"Ā","ÿ"=>"Ÿ","þ"=>"Þ","ý"=>"Ý","ü"=>"Ü","û"=>"Û","ú"=>"Ú","ù"=>"Ù","ø"=>"Ø","ö"=>"Ö", 1140 "õ"=>"Õ","ô"=>"Ô","ó"=>"Ó","ò"=>"Ò","ñ"=>"Ñ","ð"=>"Ð","ï"=>"Ï","î"=>"Î","í"=>"Í","ì"=>"Ì", 1141 "ë"=>"Ë","ê"=>"Ê","é"=>"É","è"=>"È","ç"=>"Ç","æ"=>"Æ","å"=>"Å","ä"=>"Ä","ã"=>"Ã","â"=>"Â", 1142 "á"=>"Á","à"=>"À","µ"=>"Μ","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T", 1143 "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J", 1144 "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A" 1145 ); 1146 1147 /** 1148 * UTF-8 Case lookup table 1149 * 1150 * This lookuptable defines the lower case letters to their corresponding 1151 * upper case letter in UTF-8 1152 * 1153 * @author Andreas Gohr <andi@splitbrain.org> 1154 */ 1155 global $UTF8_UPPER_TO_LOWER; 1156 if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array ( 1157 "Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t","S"=>"s","R"=>"r","Q"=>"q", 1158 "P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j","I"=>"i","H"=>"h","G"=>"g", 1159 "F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a","ῼ"=>"ῳ","Ῥ"=>"ῥ","Ῡ"=>"ῡ","Ῑ"=>"ῑ", 1160 "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ", 1161 "ᾫ"=>"ᾣ","ᾪ"=>"ᾢ","ᾩ"=>"ᾡ","ᾟ"=>"ᾗ","ᾞ"=>"ᾖ","ᾝ"=>"ᾕ","ᾜ"=>"ᾔ","ᾛ"=>"ᾓ","ᾚ"=>"ᾒ","ᾙ"=>"ᾑ", 1162 "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ", 1163 "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ", 1164 "Ὲ"=>"ὲ","Ά"=>"ά","Ὰ"=>"ὰ","Ὧ"=>"ὧ","Ὦ"=>"ὦ","Ὥ"=>"ὥ","Ὤ"=>"ὤ","Ὣ"=>"ὣ","Ὢ"=>"ὢ","Ὡ"=>"ὡ", 1165 "Ὗ"=>"ὗ","Ὕ"=>"ὕ","Ὓ"=>"ὓ","Ὑ"=>"ὑ","Ὅ"=>"ὅ","Ὄ"=>"ὄ","Ὃ"=>"ὃ","Ὂ"=>"ὂ","Ὁ"=>"ὁ","Ὀ"=>"ὀ", 1166 "Ἷ"=>"ἷ","Ἶ"=>"ἶ","Ἵ"=>"ἵ","Ἴ"=>"ἴ","Ἳ"=>"ἳ","Ἲ"=>"ἲ","Ἱ"=>"ἱ","Ἰ"=>"ἰ","Ἧ"=>"ἧ","Ἦ"=>"ἦ", 1167 "Ἥ"=>"ἥ","Ἤ"=>"ἤ","Ἣ"=>"ἣ","Ἢ"=>"ἢ","Ἡ"=>"ἡ","Ἕ"=>"ἕ","Ἔ"=>"ἔ","Ἓ"=>"ἓ","Ἒ"=>"ἒ","Ἑ"=>"ἑ", 1168 "Ἐ"=>"ἐ","Ἇ"=>"ἇ","Ἆ"=>"ἆ","Ἅ"=>"ἅ","Ἄ"=>"ἄ","Ἃ"=>"ἃ","Ἂ"=>"ἂ","Ἁ"=>"ἁ","Ἀ"=>"ἀ","Ỹ"=>"ỹ", 1169 "Ỷ"=>"ỷ","Ỵ"=>"ỵ","Ỳ"=>"ỳ","Ự"=>"ự","Ữ"=>"ữ","Ử"=>"ử","Ừ"=>"ừ","Ứ"=>"ứ","Ủ"=>"ủ","Ụ"=>"ụ", 1170 "Ợ"=>"ợ","Ỡ"=>"ỡ","Ở"=>"ở","Ờ"=>"ờ","Ớ"=>"ớ","Ộ"=>"ộ","Ỗ"=>"ỗ","Ổ"=>"ổ","Ồ"=>"ồ","Ố"=>"ố", 1171 "Ỏ"=>"ỏ","Ọ"=>"ọ","Ị"=>"ị","Ỉ"=>"ỉ","Ệ"=>"ệ","Ễ"=>"ễ","Ể"=>"ể","Ề"=>"ề","Ế"=>"ế","Ẽ"=>"ẽ", 1172 "Ẻ"=>"ẻ","Ẹ"=>"ẹ","Ặ"=>"ặ","Ẵ"=>"ẵ","Ẳ"=>"ẳ","Ằ"=>"ằ","Ắ"=>"ắ","Ậ"=>"ậ","Ẫ"=>"ẫ","Ẩ"=>"ẩ", 1173 "Ầ"=>"ầ","Ấ"=>"ấ","Ả"=>"ả","Ạ"=>"ạ","Ṡ"=>"ẛ","Ẕ"=>"ẕ","Ẓ"=>"ẓ","Ẑ"=>"ẑ","Ẏ"=>"ẏ","Ẍ"=>"ẍ", 1174 "Ẋ"=>"ẋ","Ẉ"=>"ẉ","Ẇ"=>"ẇ","Ẅ"=>"ẅ","Ẃ"=>"ẃ","Ẁ"=>"ẁ","Ṿ"=>"ṿ","Ṽ"=>"ṽ","Ṻ"=>"ṻ","Ṹ"=>"ṹ", 1175 "Ṷ"=>"ṷ","Ṵ"=>"ṵ","Ṳ"=>"ṳ","Ṱ"=>"ṱ","Ṯ"=>"ṯ","Ṭ"=>"ṭ","Ṫ"=>"ṫ","Ṩ"=>"ṩ","Ṧ"=>"ṧ","Ṥ"=>"ṥ", 1176 "Ṣ"=>"ṣ","Ṡ"=>"ṡ","Ṟ"=>"ṟ","Ṝ"=>"ṝ","Ṛ"=>"ṛ","Ṙ"=>"ṙ","Ṗ"=>"ṗ","Ṕ"=>"ṕ","Ṓ"=>"ṓ","Ṑ"=>"ṑ", 1177 "Ṏ"=>"ṏ","Ṍ"=>"ṍ","Ṋ"=>"ṋ","Ṉ"=>"ṉ","Ṇ"=>"ṇ","Ṅ"=>"ṅ","Ṃ"=>"ṃ","Ṁ"=>"ṁ","Ḿ"=>"ḿ","Ḽ"=>"ḽ", 1178 "Ḻ"=>"ḻ","Ḹ"=>"ḹ","Ḷ"=>"ḷ","Ḵ"=>"ḵ","Ḳ"=>"ḳ","Ḱ"=>"ḱ","Ḯ"=>"ḯ","Ḭ"=>"ḭ","Ḫ"=>"ḫ","Ḩ"=>"ḩ", 1179 "Ḧ"=>"ḧ","Ḥ"=>"ḥ","Ḣ"=>"ḣ","Ḡ"=>"ḡ","Ḟ"=>"ḟ","Ḝ"=>"ḝ","Ḛ"=>"ḛ","Ḙ"=>"ḙ","Ḗ"=>"ḗ","Ḕ"=>"ḕ", 1180 "Ḓ"=>"ḓ","Ḑ"=>"ḑ","Ḏ"=>"ḏ","Ḍ"=>"ḍ","Ḋ"=>"ḋ","Ḉ"=>"ḉ","Ḇ"=>"ḇ","Ḅ"=>"ḅ","Ḃ"=>"ḃ","Ḁ"=>"ḁ", 1181 "Ֆ"=>"ֆ","Օ"=>"օ","Ք"=>"ք","Փ"=>"փ","Ւ"=>"ւ","Ց"=>"ց","Ր"=>"ր","Տ"=>"տ","Վ"=>"վ","Ս"=>"ս", 1182 "Ռ"=>"ռ","Ջ"=>"ջ","Պ"=>"պ","Չ"=>"չ","Ո"=>"ո","Շ"=>"շ","Ն"=>"ն","Յ"=>"յ","Մ"=>"մ","Ճ"=>"ճ", 1183 "Ղ"=>"ղ","Ձ"=>"ձ","Հ"=>"հ","Կ"=>"կ","Ծ"=>"ծ","Խ"=>"խ","Լ"=>"լ","Ի"=>"ի","Ժ"=>"ժ","Թ"=>"թ", 1184 "Ը"=>"ը","Է"=>"է","Զ"=>"զ","Ե"=>"ե","Դ"=>"դ","Գ"=>"գ","Բ"=>"բ","Ա"=>"ա","Ԏ"=>"ԏ","Ԍ"=>"ԍ", 1185 "Ԋ"=>"ԋ","Ԉ"=>"ԉ","Ԇ"=>"ԇ","Ԅ"=>"ԅ","Ԃ"=>"ԃ","Ԁ"=>"ԁ","Ӹ"=>"ӹ","Ӵ"=>"ӵ","Ӳ"=>"ӳ","Ӱ"=>"ӱ", 1186 "Ӯ"=>"ӯ","Ӭ"=>"ӭ","Ӫ"=>"ӫ","Ө"=>"ө","Ӧ"=>"ӧ","Ӥ"=>"ӥ","Ӣ"=>"ӣ","Ӡ"=>"ӡ","Ӟ"=>"ӟ","Ӝ"=>"ӝ", 1187 "Ӛ"=>"ӛ","Ә"=>"ә","Ӗ"=>"ӗ","Ӕ"=>"ӕ","Ӓ"=>"ӓ","Ӑ"=>"ӑ","Ӎ"=>"ӎ","Ӌ"=>"ӌ","Ӊ"=>"ӊ","Ӈ"=>"ӈ", 1188 "Ӆ"=>"ӆ","Ӄ"=>"ӄ","Ӂ"=>"ӂ","Ҿ"=>"ҿ","Ҽ"=>"ҽ","Һ"=>"һ","Ҹ"=>"ҹ","Ҷ"=>"ҷ","Ҵ"=>"ҵ","Ҳ"=>"ҳ", 1189 "Ұ"=>"ұ","Ү"=>"ү","Ҭ"=>"ҭ","Ҫ"=>"ҫ","Ҩ"=>"ҩ","Ҧ"=>"ҧ","Ҥ"=>"ҥ","Ң"=>"ң","Ҡ"=>"ҡ","Ҟ"=>"ҟ", 1190 "Ҝ"=>"ҝ","Қ"=>"қ","Ҙ"=>"ҙ","Җ"=>"җ","Ҕ"=>"ҕ","Ғ"=>"ғ","Ґ"=>"ґ","Ҏ"=>"ҏ","Ҍ"=>"ҍ","Ҋ"=>"ҋ", 1191 "Ҁ"=>"ҁ","Ѿ"=>"ѿ","Ѽ"=>"ѽ","Ѻ"=>"ѻ","Ѹ"=>"ѹ","Ѷ"=>"ѷ","Ѵ"=>"ѵ","Ѳ"=>"ѳ","Ѱ"=>"ѱ","Ѯ"=>"ѯ", 1192 "Ѭ"=>"ѭ","Ѫ"=>"ѫ","Ѩ"=>"ѩ","Ѧ"=>"ѧ","Ѥ"=>"ѥ","Ѣ"=>"ѣ","Ѡ"=>"ѡ","Џ"=>"џ","Ў"=>"ў","Ѝ"=>"ѝ", 1193 "Ќ"=>"ќ","Ћ"=>"ћ","Њ"=>"њ","Љ"=>"љ","Ј"=>"ј","Ї"=>"ї","І"=>"і","Ѕ"=>"ѕ","Є"=>"є","Ѓ"=>"ѓ", 1194 "Ђ"=>"ђ","Ё"=>"ё","Ѐ"=>"ѐ","Я"=>"я","Ю"=>"ю","Э"=>"э","Ь"=>"ь","Ы"=>"ы","Ъ"=>"ъ","Щ"=>"щ", 1195 "Ш"=>"ш","Ч"=>"ч","Ц"=>"ц","Х"=>"х","Ф"=>"ф","У"=>"у","Т"=>"т","С"=>"с","Р"=>"р","П"=>"п", 1196 "О"=>"о","Н"=>"н","М"=>"м","Л"=>"л","К"=>"к","Й"=>"й","И"=>"и","З"=>"з","Ж"=>"ж","Е"=>"е", 1197 "Д"=>"д","Г"=>"г","В"=>"в","Б"=>"б","А"=>"а","Ε"=>"ϵ","Σ"=>"ϲ","Ρ"=>"ϱ","Κ"=>"ϰ","Ϯ"=>"ϯ", 1198 "Ϭ"=>"ϭ","Ϫ"=>"ϫ","Ϩ"=>"ϩ","Ϧ"=>"ϧ","Ϥ"=>"ϥ","Ϣ"=>"ϣ","Ϡ"=>"ϡ","Ϟ"=>"ϟ","Ϝ"=>"ϝ","Ϛ"=>"ϛ", 1199 "Ϙ"=>"ϙ","Π"=>"ϖ","Φ"=>"ϕ","Θ"=>"ϑ","Β"=>"ϐ","Ώ"=>"ώ","Ύ"=>"ύ","Ό"=>"ό","Ϋ"=>"ϋ","Ϊ"=>"ϊ", 1200 "Ω"=>"ω","Ψ"=>"ψ","Χ"=>"χ","Φ"=>"φ","Υ"=>"υ","Τ"=>"τ","Σ"=>"σ","Σ"=>"ς","Ρ"=>"ρ","Π"=>"π", 1201 "Ο"=>"ο","Ξ"=>"ξ","Ν"=>"ν","Μ"=>"μ","Λ"=>"λ","Κ"=>"κ","Ι"=>"ι","Θ"=>"θ","Η"=>"η","Ζ"=>"ζ", 1202 "Ε"=>"ε","Δ"=>"δ","Γ"=>"γ","Β"=>"β","Α"=>"α","Ί"=>"ί","Ή"=>"ή","Έ"=>"έ","Ά"=>"ά","Ʒ"=>"ʒ", 1203 "Ʋ"=>"ʋ","Ʊ"=>"ʊ","Ʈ"=>"ʈ","Ʃ"=>"ʃ","Ʀ"=>"ʀ","Ɵ"=>"ɵ","Ɲ"=>"ɲ","Ɯ"=>"ɯ","Ɩ"=>"ɩ","Ɨ"=>"ɨ", 1204 "Ɣ"=>"ɣ","Ɛ"=>"ɛ","Ə"=>"ə","Ɗ"=>"ɗ","Ɖ"=>"ɖ","Ɔ"=>"ɔ","Ɓ"=>"ɓ","Ȳ"=>"ȳ","Ȱ"=>"ȱ","Ȯ"=>"ȯ", 1205 "Ȭ"=>"ȭ","Ȫ"=>"ȫ","Ȩ"=>"ȩ","Ȧ"=>"ȧ","Ȥ"=>"ȥ","Ȣ"=>"ȣ","Ȟ"=>"ȟ","Ȝ"=>"ȝ","Ț"=>"ț","Ș"=>"ș", 1206 "Ȗ"=>"ȗ","Ȕ"=>"ȕ","Ȓ"=>"ȓ","Ȑ"=>"ȑ","Ȏ"=>"ȏ","Ȍ"=>"ȍ","Ȋ"=>"ȋ","Ȉ"=>"ȉ","Ȇ"=>"ȇ","Ȅ"=>"ȅ", 1207 "Ȃ"=>"ȃ","Ȁ"=>"ȁ","Ǿ"=>"ǿ","Ǽ"=>"ǽ","Ǻ"=>"ǻ","Ǹ"=>"ǹ","Ǵ"=>"ǵ","Dz"=>"dz","Ǯ"=>"ǯ","Ǭ"=>"ǭ", 1208 "Ǫ"=>"ǫ","Ǩ"=>"ǩ","Ǧ"=>"ǧ","Ǥ"=>"ǥ","Ǣ"=>"ǣ","Ǡ"=>"ǡ","Ǟ"=>"ǟ","Ǝ"=>"ǝ","Ǜ"=>"ǜ","Ǚ"=>"ǚ", 1209 "Ǘ"=>"ǘ","Ǖ"=>"ǖ","Ǔ"=>"ǔ","Ǒ"=>"ǒ","Ǐ"=>"ǐ","Ǎ"=>"ǎ","Nj"=>"nj","Lj"=>"lj","Dž"=>"dž","Ƿ"=>"ƿ", 1210 "Ƽ"=>"ƽ","Ƹ"=>"ƹ","Ƶ"=>"ƶ","Ƴ"=>"ƴ","Ư"=>"ư","Ƭ"=>"ƭ","Ƨ"=>"ƨ","Ƥ"=>"ƥ","Ƣ"=>"ƣ","Ơ"=>"ơ", 1211 "Ƞ"=>"ƞ","Ƙ"=>"ƙ","Ƕ"=>"ƕ","Ƒ"=>"ƒ","Ƌ"=>"ƌ","Ƈ"=>"ƈ","Ƅ"=>"ƅ","Ƃ"=>"ƃ","S"=>"ſ","Ž"=>"ž", 1212 "Ż"=>"ż","Ź"=>"ź","Ŷ"=>"ŷ","Ŵ"=>"ŵ","Ų"=>"ų","Ű"=>"ű","Ů"=>"ů","Ŭ"=>"ŭ","Ū"=>"ū","Ũ"=>"ũ", 1213 "Ŧ"=>"ŧ","Ť"=>"ť","Ţ"=>"ţ","Š"=>"š","Ş"=>"ş","Ŝ"=>"ŝ","Ś"=>"ś","Ř"=>"ř","Ŗ"=>"ŗ","Ŕ"=>"ŕ", 1214 "Œ"=>"œ","Ő"=>"ő","Ŏ"=>"ŏ","Ō"=>"ō","Ŋ"=>"ŋ","Ň"=>"ň","Ņ"=>"ņ","Ń"=>"ń","Ł"=>"ł","Ŀ"=>"ŀ", 1215 "Ľ"=>"ľ","Ļ"=>"ļ","Ĺ"=>"ĺ","Ķ"=>"ķ","Ĵ"=>"ĵ","IJ"=>"ij","I"=>"ı","Į"=>"į","Ĭ"=>"ĭ","Ī"=>"ī", 1216 "Ĩ"=>"ĩ","Ħ"=>"ħ","Ĥ"=>"ĥ","Ģ"=>"ģ","Ġ"=>"ġ","Ğ"=>"ğ","Ĝ"=>"ĝ","Ě"=>"ě","Ę"=>"ę","Ė"=>"ė", 1217 "Ĕ"=>"ĕ","Ē"=>"ē","Đ"=>"đ","Ď"=>"ď","Č"=>"č","Ċ"=>"ċ","Ĉ"=>"ĉ","Ć"=>"ć","Ą"=>"ą","Ă"=>"ă", 1218 "Ā"=>"ā","Ÿ"=>"ÿ","Þ"=>"þ","Ý"=>"ý","Ü"=>"ü","Û"=>"û","Ú"=>"ú","Ù"=>"ù","Ø"=>"ø","Ö"=>"ö", 1219 "Õ"=>"õ","Ô"=>"ô","Ó"=>"ó","Ò"=>"ò","Ñ"=>"ñ","Ð"=>"ð","Ï"=>"ï","Î"=>"î","Í"=>"í","Ì"=>"ì", 1220 "Ë"=>"ë","Ê"=>"ê","É"=>"é","È"=>"è","Ç"=>"ç","Æ"=>"æ","Å"=>"å","Ä"=>"ä","Ã"=>"ã","Â"=>"â", 1221 "Á"=>"á","À"=>"à","Μ"=>"µ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t", 1222 "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j", 1223 "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a" 1224 ); 1225}; // end of case lookup tables 1226 1227/** 1228 * UTF-8 lookup table for lower case accented letters 1229 * 1230 * This lookuptable defines replacements for accented characters from the ASCII-7 1231 * range. This are lower case letters only. 1232 * 1233 * @author Andreas Gohr <andi@splitbrain.org> 1234 * @see utf8_deaccent() 1235 */ 1236global $UTF8_LOWER_ACCENTS; 1237if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array( 1238 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 1239 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 1240 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 1241 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 1242 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 1243 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 1244 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 1245 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 1246 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 1247 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 1248 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 1249 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 1250 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 1251 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 1252 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', 1253); 1254 1255/** 1256 * UTF-8 lookup table for upper case accented letters 1257 * 1258 * This lookuptable defines replacements for accented characters from the ASCII-7 1259 * range. This are upper case letters only. 1260 * 1261 * @author Andreas Gohr <andi@splitbrain.org> 1262 * @see utf8_deaccent() 1263 */ 1264global $UTF8_UPPER_ACCENTS; 1265if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array( 1266 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', 1267 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 1268 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', 1269 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', 1270 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', 1271 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', 1272 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', 1273 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', 1274 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 1275 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', 1276 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', 1277 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 1278 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 1279 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', 1280 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', 1281); 1282 1283/** 1284 * UTF-8 array of common special characters 1285 * 1286 * This array should contain all special characters (not a letter or digit) 1287 * defined in the various local charsets - it's not a complete list of non-alphanum 1288 * characters in UTF-8. It's not perfect but should match most cases of special 1289 * chars. 1290 * 1291 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 1292 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 1293 * 1294 * @author Andreas Gohr <andi@splitbrain.org> 1295 * @see utf8_stripspecials() 1296 */ 1297global $UTF8_SPECIAL_CHARS; 1298if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array( 1299 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 1300 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 1301 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 1302 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 1303 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 1304 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 1305 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 1306 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 1307 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 1308 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 1309 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 1310 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 1311 0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 1312 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 1313 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 1314 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 1315 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 1316 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 1317 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 1318 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 1319 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 1320 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 1321 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 1322 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 1323 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 1324 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 1325 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 1326 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 1327 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 1328 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 1329 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 1330 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 1331 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 1332 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 1333 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 1334 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 1335 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 1336 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 1337 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 1338 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 1339 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 1340 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 1341 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 1342 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 1343 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 1344 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 1345 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017, 1346 0x3018, 0x3019, 0x301a, 0x301b, 0x3036, 1347 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 1348 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 1349 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 1350 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 1351 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 1352 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 1353 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c, 1354 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b, 1355 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 1356 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea, 1357 0xffeb, 0xffec, 0xffed, 0xffee, 1358 0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703, 1359 0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b, 1360 0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713, 1361 0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b, 1362 0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf, 1363); 1364 1365// utf8 version of above data 1366global $UTF8_SPECIAL_CHARS2; 1367if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 = 1368 "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~ �'. 1369 '� ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½�'. 1370 '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. 1371 '�ׁׂ׃׳״،؛؟ـًٌٍَُِّْ٪฿–—―‗‘’‚“”�'. 1372 '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'. 1373 '⇐⇑⇒⇓⇔∀∂∃∅∆∇∈∉∋∏∑−∕∗∙√∝∞∠∧∨�'. 1374 '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'. 1375 '��┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠'. 1376 '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌▐░▒▓■▲▼◆◊●�'. 1377 '�★☎☛☞♠♣♥♦✁✂✃✄✆✇✈✉✌✍✎✏✐✑✒✓✔✕�'. 1378 '��✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✩✪✫✬✭✮✯✰✱'. 1379 '✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋�'. 1380 '�❏❐❑❒❖❘❙❚❛❜❝❞❡❢❣❤❥❦❧❿➉➓➔➘➙➚�'. 1381 '��➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶'. 1382 '➷➸➹➺➻➼➽➾'. 1383 ' 、。〃〈〉《》「」『』【】〒〔〕〖〗〘〙〚〛〶'. 1384 '�'. 1385 '�ﹼﹽ'. 1386 '!"#$%&'()*+,-./:;<=>?@[\]^`{|}~'. 1387 '⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○'. 1388 ''. 1389 ' '; 1390 1391/** 1392 * Romanization lookup table 1393 * 1394 * This lookup tables provides a way to transform strings written in a language 1395 * different from the ones based upon latin letters into plain ASCII. 1396 * 1397 * Please note: this is not a scientific transliteration table. It only works 1398 * oneway from nonlatin to ASCII and it works by simple character replacement 1399 * only. Specialities of each language are not supported. 1400 * 1401 * @author Andreas Gohr <andi@splitbrain.org> 1402 * @author Vitaly Blokhin <vitinfo@vitn.com> 1403 * @link http://www.uconv.com/translit.htm 1404 * @author Bisqwit <bisqwit@iki.fi> 1405 * @link http://kanjidict.stc.cx/hiragana.php?src=2 1406 * @link http://www.translatum.gr/converter/greek-transliteration.htm 1407 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 1408 * @link http://www.btranslations.com/resources/romanization/korean.asp 1409 * @author Arthit Suriyawongkul <arthit@gmail.com> 1410 * @author Denis Scheither <amorphis@uni-bremen.de> 1411 * @author Eivind Morland <eivind.morland@gmail.com> 1412 */ 1413global $UTF8_ROMANIZATION; 1414if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array( 1415 // scandinavian - differs from what we do in deaccent 1416 'å'=>'a','Å'=>'A','ä'=>'a','Ä'=>'A','ö'=>'o','Ö'=>'O', 1417 1418 //russian cyrillic 1419 'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G', 1420 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh', 1421 'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K', 1422 'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Н'=>'N','о'=>'o','О'=>'O', 1423 'п'=>'p','П'=>'P','р'=>'r','Р'=>'R','с'=>'s','С'=>'S','т'=>'t','Т'=>'T', 1424 'у'=>'u','У'=>'U','ф'=>'f','Ф'=>'F','х'=>'x','Х'=>'X','ц'=>'c','Ц'=>'C', 1425 'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'sch','Щ'=>'Sch','ъ'=>'', 1426 'Ъ'=>'','ы'=>'y','Ы'=>'Y','ь'=>'','Ь'=>'','э'=>'eh','Э'=>'Eh','ю'=>'ju', 1427 'Ю'=>'Ju','я'=>'ja','Я'=>'Ja', 1428 // Ukrainian cyrillic 1429 'Ґ'=>'Gh','ґ'=>'gh','Є'=>'Je','є'=>'je','І'=>'I','і'=>'i','Ї'=>'Ji','ї'=>'ji', 1430 // Georgian 1431 'ა'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th', 1432 'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','ო'=>'o','პ'=>'p','ჟ'=>'zh', 1433 'რ'=>'r','ს'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ქ'=>'kh','ღ'=>'gh','ყ'=>'q', 1434 'შ'=>'sh','ჩ'=>'ch','ც'=>'c','ძ'=>'dh','წ'=>'w','ჭ'=>'j','ხ'=>'x','ჯ'=>'jh', 1435 'ჰ'=>'xh', 1436 //Sanskrit 1437 'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry', 1438 'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','ए'=>'e','ऐ'=>'ay','ओ'=>'o','औ'=>'aw', 1439 'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh', 1440 'च'=>'c','छ'=>'ch','ज'=>'j','झ'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh', 1441 'ड'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','थ'=>'th','द'=>'d','ध'=>'dh', 1442 'न'=>'n','प'=>'p','फ'=>'ph','ब'=>'b','भ'=>'bh','म'=>'m','य'=>'z','र'=>'r', 1443 'ल'=>'l','व'=>'v','श'=>'sh','ष'=>'sqh','स'=>'s','ह'=>'x', 1444 //Sanskrit diacritics 1445 'Ā'=>'A','Ī'=>'I','Ū'=>'U','Ṛ'=>'R','Ṝ'=>'R','Ṅ'=>'N','Ñ'=>'N','Ṭ'=>'T', 1446 'Ḍ'=>'D','Ṇ'=>'N','Ś'=>'S','Ṣ'=>'S','Ṁ'=>'M','Ṃ'=>'M','Ḥ'=>'H','Ḷ'=>'L','Ḹ'=>'L', 1447 'ā'=>'a','ī'=>'i','ū'=>'u','ṛ'=>'r','ṝ'=>'r','ṅ'=>'n','ñ'=>'n','ṭ'=>'t', 1448 'ḍ'=>'d','ṇ'=>'n','ś'=>'s','ṣ'=>'s','ṁ'=>'m','ṃ'=>'m','ḥ'=>'h','ḷ'=>'l','ḹ'=>'l', 1449 //Hebrew 1450 'א'=>'a', 'ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'kh','ט'=>'th', 1451 'י'=>'y','ך'=>'h','כ'=>'k','ל'=>'l','ם'=>'m','מ'=>'m','ן'=>'n','נ'=>'n', 1452 'ס'=>'s','ע'=>'ah','ף'=>'f','פ'=>'p','ץ'=>'c','צ'=>'c','ק'=>'q','ר'=>'r', 1453 'ש'=>'sh','ת'=>'t', 1454 //Arabic 1455 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'g','ح'=>'xh','خ'=>'x','د'=>'d', 1456 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s\'','ض'=>'d\'', 1457 'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k', 1458 'ل'=>'l','م'=>'m','ن'=>'n','ه'=>'x\'','و'=>'u','ي'=>'i', 1459 1460 // Japanese characters (last update: 2008-05-09) 1461 1462 // Japanese hiragana 1463 1464 // 3 character syllables, っ doubles the consonant after 1465 'っちゃ'=>'ccha','っちぇ'=>'cche','っちょ'=>'ccho','っちゅ'=>'cchu', 1466 'っびゃ'=>'bbya','っびぇ'=>'bbye','っびぃ'=>'bbyi','っびょ'=>'bbyo','っびゅ'=>'bbyu', 1467 'っぴゃ'=>'ppya','っぴぇ'=>'ppye','っぴぃ'=>'ppyi','っぴょ'=>'ppyo','っぴゅ'=>'ppyu', 1468 'っちゃ'=>'ccha','っちぇ'=>'cche','っち'=>'cchi','っちょ'=>'ccho','っちゅ'=>'cchu', 1469 // 'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu', 1470 'っきゃ'=>'kkya','っきぇ'=>'kkye','っきぃ'=>'kkyi','っきょ'=>'kkyo','っきゅ'=>'kkyu', 1471 'っぎゃ'=>'ggya','っぎぇ'=>'ggye','っぎぃ'=>'ggyi','っぎょ'=>'ggyo','っぎゅ'=>'ggyu', 1472 'っみゃ'=>'mmya','っみぇ'=>'mmye','っみぃ'=>'mmyi','っみょ'=>'mmyo','っみゅ'=>'mmyu', 1473 'っにゃ'=>'nnya','っにぇ'=>'nnye','っにぃ'=>'nnyi','っにょ'=>'nnyo','っにゅ'=>'nnyu', 1474 'っりゃ'=>'rrya','っりぇ'=>'rrye','っりぃ'=>'rryi','っりょ'=>'rryo','っりゅ'=>'rryu', 1475 'っしゃ'=>'ssha','っしぇ'=>'sshe','っし'=>'sshi','っしょ'=>'ssho','っしゅ'=>'sshu', 1476 1477 // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway) 1478 'んあ'=>'n_a','んえ'=>'n_e','んい'=>'n_i','んお'=>'n_o','んう'=>'n_u', 1479 'んや'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu', 1480 1481 // 2 character syllables - normal 1482 'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo', 1483 'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu', 1484 'ひゃ'=>'hya','ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu', 1485 'びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu', 1486 'ぴゃ'=>'pya','ぴぇ'=>'pye','ぴぃ'=>'pyi','ぴょ'=>'pyo','ぴゅ'=>'pyu', 1487 'きゃ'=>'kya','きぇ'=>'kye','きぃ'=>'kyi','きょ'=>'kyo','きゅ'=>'kyu', 1488 'ぎゃ'=>'gya','ぎぇ'=>'gye','ぎぃ'=>'gyi','ぎょ'=>'gyo','ぎゅ'=>'gyu', 1489 'みゃ'=>'mya','みぇ'=>'mye','みぃ'=>'myi','みょ'=>'myo','みゅ'=>'myu', 1490 'にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu', 1491 'りゃ'=>'rya','りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu', 1492 'しゃ'=>'sha','しぇ'=>'she','し'=>'shi','しょ'=>'sho','しゅ'=>'shu', 1493 'じゃ'=>'ja','じぇ'=>'je','じょ'=>'jo','じゅ'=>'ju', 1494 'うぇ'=>'we','うぃ'=>'wi', 1495 'いぇ'=>'ye', 1496 1497 // 2 character syllables, っ doubles the consonant after 1498 'っば'=>'bba','っべ'=>'bbe','っび'=>'bbi','っぼ'=>'bbo','っぶ'=>'bbu', 1499 'っぱ'=>'ppa','っぺ'=>'ppe','っぴ'=>'ppi','っぽ'=>'ppo','っぷ'=>'ppu', 1500 'った'=>'tta','って'=>'tte','っち'=>'cchi','っと'=>'tto','っつ'=>'ttsu', 1501 'っだ'=>'dda','っで'=>'dde','っぢ'=>'ddi','っど'=>'ddo','っづ'=>'ddu', 1502 'っが'=>'gga','っげ'=>'gge','っぎ'=>'ggi','っご'=>'ggo','っぐ'=>'ggu', 1503 'っか'=>'kka','っけ'=>'kke','っき'=>'kki','っこ'=>'kko','っく'=>'kku', 1504 'っま'=>'mma','っめ'=>'mme','っみ'=>'mmi','っも'=>'mmo','っむ'=>'mmu', 1505 'っな'=>'nna','っね'=>'nne','っに'=>'nni','っの'=>'nno','っぬ'=>'nnu', 1506 'っら'=>'rra','っれ'=>'rre','っり'=>'rri','っろ'=>'rro','っる'=>'rru', 1507 'っさ'=>'ssa','っせ'=>'sse','っし'=>'sshi','っそ'=>'sso','っす'=>'ssu', 1508 'っざ'=>'zza','っぜ'=>'zze','っじ'=>'jji','っぞ'=>'zzo','っず'=>'zzu', 1509 1510 // 1 character syllabels 1511 'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ん'=>'n', 1512 'は'=>'ha','へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'fu', 1513 'ば'=>'ba','べ'=>'be','び'=>'bi','ぼ'=>'bo','ぶ'=>'bu', 1514 'ぱ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po','ぷ'=>'pu', 1515 'た'=>'ta','て'=>'te','ち'=>'chi','と'=>'to','つ'=>'tsu', 1516 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1517 'が'=>'ga','げ'=>'ge','ぎ'=>'gi','ご'=>'go','ぐ'=>'gu', 1518 'か'=>'ka','け'=>'ke','き'=>'ki','こ'=>'ko','く'=>'ku', 1519 'ま'=>'ma','め'=>'me','み'=>'mi','も'=>'mo','む'=>'mu', 1520 'な'=>'na','ね'=>'ne','に'=>'ni','の'=>'no','ぬ'=>'nu', 1521 'ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru', 1522 'さ'=>'sa','せ'=>'se','し'=>'shi','そ'=>'so','す'=>'su', 1523 'わ'=>'wa','を'=>'wo', 1524 'ざ'=>'za','ぜ'=>'ze','じ'=>'ji','ぞ'=>'zo','ず'=>'zu', 1525 'や'=>'ya','よ'=>'yo','ゆ'=>'yu', 1526 // old characters 1527 'ゑ'=>'we','ゐ'=>'wi', 1528 1529 // convert what's left (probably only kicks in when something's missing above) 1530 // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u', 1531 // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu', 1532 1533 // never seen one of those (disabled for the moment) 1534 // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu', 1535 // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu', 1536 // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu', 1537 // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu', 1538 // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu', 1539 // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu', 1540 // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu', 1541 // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu', 1542 // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu', 1543 // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu', 1544 // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu', 1545 // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu', 1546 // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu', 1547 // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu', 1548 1549 // 'spare' characters from other romanization systems 1550 // 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1551 // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu', 1552 // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su', 1553 // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu', 1554 //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu', 1555 //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu', 1556 //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu', 1557 //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu', 1558 //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi', 1559 //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju', 1560 1561 1562 // Japanese katakana 1563 1564 // 4 character syllables: ッ doubles the consonant after, ー doubles the vowel before (usualy written with macron, but we don't want that in our URLs) 1565 'ッビャー'=>'bbyaa','ッビェー'=>'bbyee','ッビィー'=>'bbyii','ッビョー'=>'bbyoo','ッビュー'=>'bbyuu', 1566 'ッピャー'=>'ppyaa','ッピェー'=>'ppyee','ッピィー'=>'ppyii','ッピョー'=>'ppyoo','ッピュー'=>'ppyuu', 1567 'ッキャー'=>'kkyaa','ッキェー'=>'kkyee','ッキィー'=>'kkyii','ッキョー'=>'kkyoo','ッキュー'=>'kkyuu', 1568 'ッギャー'=>'ggyaa','ッギェー'=>'ggyee','ッギィー'=>'ggyii','ッギョー'=>'ggyoo','ッギュー'=>'ggyuu', 1569 'ッミャー'=>'mmyaa','ッミェー'=>'mmyee','ッミィー'=>'mmyii','ッミョー'=>'mmyoo','ッミュー'=>'mmyuu', 1570 'ッニャー'=>'nnyaa','ッニェー'=>'nnyee','ッニィー'=>'nnyii','ッニョー'=>'nnyoo','ッニュー'=>'nnyuu', 1571 'ッリャー'=>'rryaa','ッリェー'=>'rryee','ッリィー'=>'rryii','ッリョー'=>'rryoo','ッリュー'=>'rryuu', 1572 'ッシャー'=>'sshaa','ッシェー'=>'sshee','ッシー'=>'sshii','ッショー'=>'sshoo','ッシュー'=>'sshuu', 1573 'ッチャー'=>'cchaa','ッチェー'=>'cchee','ッチー'=>'cchii','ッチョー'=>'cchoo','ッチュー'=>'cchuu', 1574 'ッティー'=>'ttii', 1575 'ッヂィー'=>'ddii', 1576 1577 // 3 character syllables - doubled vowels 1578 'ファー'=>'faa','フェー'=>'fee','フィー'=>'fii','フォー'=>'foo', 1579 'フャー'=>'fyaa','フェー'=>'fyee','フィー'=>'fyii','フョー'=>'fyoo','フュー'=>'fyuu', 1580 'ヒャー'=>'hyaa','ヒェー'=>'hyee','ヒィー'=>'hyii','ヒョー'=>'hyoo','ヒュー'=>'hyuu', 1581 'ビャー'=>'byaa','ビェー'=>'byee','ビィー'=>'byii','ビョー'=>'byoo','ビュー'=>'byuu', 1582 'ピャー'=>'pyaa','ピェー'=>'pyee','ピィー'=>'pyii','ピョー'=>'pyoo','ピュー'=>'pyuu', 1583 'キャー'=>'kyaa','キェー'=>'kyee','キィー'=>'kyii','キョー'=>'kyoo','キュー'=>'kyuu', 1584 'ギャー'=>'gyaa','ギェー'=>'gyee','ギィー'=>'gyii','ギョー'=>'gyoo','ギュー'=>'gyuu', 1585 'ミャー'=>'myaa','ミェー'=>'myee','ミィー'=>'myii','ミョー'=>'myoo','ミュー'=>'myuu', 1586 'ニャー'=>'nyaa','ニェー'=>'nyee','ニィー'=>'nyii','ニョー'=>'nyoo','ニュー'=>'nyuu', 1587 'リャー'=>'ryaa','リェー'=>'ryee','リィー'=>'ryii','リョー'=>'ryoo','リュー'=>'ryuu', 1588 'シャー'=>'shaa','シェー'=>'shee','シー'=>'shii','ショー'=>'shoo','シュー'=>'shuu', 1589 'ジャー'=>'jaa','ジェー'=>'jee','ジー'=>'jii','ジョー'=>'joo','ジュー'=>'juu', 1590 'スァー'=>'swaa','スェー'=>'swee','スィー'=>'swii','スォー'=>'swoo','スゥー'=>'swuu', 1591 'デァー'=>'daa','デェー'=>'dee','ディー'=>'dii','デォー'=>'doo','デゥー'=>'duu', 1592 'チャー'=>'chaa','チェー'=>'chee','チー'=>'chii','チョー'=>'choo','チュー'=>'chuu', 1593 'ヂャー'=>'dyaa','ヂェー'=>'dyee','ヂィー'=>'dyii','ヂョー'=>'dyoo','ヂュー'=>'dyuu', 1594 'ツャー'=>'tsaa','ツェー'=>'tsee','ツィー'=>'tsii','ツョー'=>'tsoo','ツー'=>'tsuu', 1595 'トァー'=>'twaa','トェー'=>'twee','トィー'=>'twii','トォー'=>'twoo','トゥー'=>'twuu', 1596 'ドァー'=>'dwaa','ドェー'=>'dwee','ドィー'=>'dwii','ドォー'=>'dwoo','ドゥー'=>'dwuu', 1597 'ウァー'=>'whaa','ウェー'=>'whee','ウィー'=>'whii','ウォー'=>'whoo','ウゥー'=>'whuu', 1598 'ヴャー'=>'vyaa','ヴェー'=>'vyee','ヴィー'=>'vyii','ヴョー'=>'vyoo','ヴュー'=>'vyuu', 1599 'ヴァー'=>'vaa','ヴェー'=>'vee','ヴィー'=>'vii','ヴォー'=>'voo','ヴー'=>'vuu', 1600 'ウェー'=>'wee','ウィー'=>'wii', 1601 'イェー'=>'yee', 1602 'ティー'=>'tii', 1603 'ヂィー'=>'dii', 1604 1605 // 3 character syllables - doubled consonants 1606 'ッビャ'=>'bbya','ッビェ'=>'bbye','ッビィ'=>'bbyi','ッビョ'=>'bbyo','ッビュ'=>'bbyu', 1607 'ッピャ'=>'ppya','ッピェ'=>'ppye','ッピィ'=>'ppyi','ッピョ'=>'ppyo','ッピュ'=>'ppyu', 1608 'ッキャ'=>'kkya','ッキェ'=>'kkye','ッキィ'=>'kkyi','ッキョ'=>'kkyo','ッキュ'=>'kkyu', 1609 'ッギャ'=>'ggya','ッギェ'=>'ggye','ッギィ'=>'ggyi','ッギョ'=>'ggyo','ッギュ'=>'ggyu', 1610 'ッミャ'=>'mmya','ッミェ'=>'mmye','ッミィ'=>'mmyi','ッミョ'=>'mmyo','ッミュ'=>'mmyu', 1611 'ッニャ'=>'nnya','ッニェ'=>'nnye','ッニィ'=>'nnyi','ッニョ'=>'nnyo','ッニュ'=>'nnyu', 1612 'ッリャ'=>'rrya','ッリェ'=>'rrye','ッリィ'=>'rryi','ッリョ'=>'rryo','ッリュ'=>'rryu', 1613 'ッシャ'=>'ssha','ッシェ'=>'sshe','ッシ'=>'sshi','ッショ'=>'ssho','ッシュ'=>'sshu', 1614 'ッチャ'=>'ccha','ッチェ'=>'cche','ッチ'=>'cchi','ッチョ'=>'ccho','ッチュ'=>'cchu', 1615 'ッティ'=>'tti', 1616 'ッヂィ'=>'ddi', 1617 1618 // 3 character syllables - doubled vowel and consonants 1619 'ッバー'=>'bbaa','ッベー'=>'bbee','ッビー'=>'bbii','ッボー'=>'bboo','ッブー'=>'bbuu', 1620 'ッパー'=>'ppaa','ッペー'=>'ppee','ッピー'=>'ppii','ッポー'=>'ppoo','ップー'=>'ppuu', 1621 'ッケー'=>'kkee','ッキー'=>'kkii','ッコー'=>'kkoo','ックー'=>'kkuu','ッカー'=>'kkaa', 1622 'ッガー'=>'ggaa','ッゲー'=>'ggee','ッギー'=>'ggii','ッゴー'=>'ggoo','ッグー'=>'gguu', 1623 'ッマー'=>'maa','ッメー'=>'mee','ッミー'=>'mii','ッモー'=>'moo','ッムー'=>'muu', 1624 'ッナー'=>'nnaa','ッネー'=>'nnee','ッニー'=>'nnii','ッノー'=>'nnoo','ッヌー'=>'nnuu', 1625 'ッラー'=>'rraa','ッレー'=>'rree','ッリー'=>'rrii','ッロー'=>'rroo','ッルー'=>'rruu', 1626 'ッサー'=>'ssaa','ッセー'=>'ssee','ッシー'=>'sshii','ッソー'=>'ssoo','ッスー'=>'ssuu', 1627 'ッザー'=>'zzaa','ッゼー'=>'zzee','ッジー'=>'jjii','ッゾー'=>'zzoo','ッズー'=>'zzuu', 1628 'ッター'=>'ttaa','ッテー'=>'ttee','ッチー'=>'chii','ットー'=>'ttoo','ッツー'=>'ttsuu', 1629 'ッダー'=>'ddaa','ッデー'=>'ddee','ッヂー'=>'ddii','ッドー'=>'ddoo','ッヅー'=>'dduu', 1630 1631 // 2 character syllables - normal 1632 'ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フゥ'=>'fu', 1633 // 'フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo','フュ'=>'fyu', 1634 'フャ'=>'fa','フェ'=>'fe','フィ'=>'fi','フョ'=>'fo','フュ'=>'fu', 1635 'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu', 1636 'ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo','ビュ'=>'byu', 1637 'ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu', 1638 'キャ'=>'kya','キェ'=>'kye','キィ'=>'kyi','キョ'=>'kyo','キュ'=>'kyu', 1639 'ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu', 1640 'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu', 1641 'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu', 1642 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu', 1643 'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu', 1644 'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju', 1645 'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu', 1646 'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du', 1647 'チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu', 1648 // 'ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi','ヂョ'=>'dyo','ヂュ'=>'dyu', 1649 'ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu', 1650 'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu', 1651 'ドァ'=>'dwa','ドェ'=>'dwe','ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu', 1652 'ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi','ウォ'=>'who','ウゥ'=>'whu', 1653 'ヴャ'=>'vya','ヴェ'=>'vye','ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu', 1654 'ヴァ'=>'va','ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu', 1655 'ウェ'=>'we','ウィ'=>'wi', 1656 'イェ'=>'ye', 1657 'ティ'=>'ti', 1658 'ヂィ'=>'di', 1659 1660 // 2 character syllables - doubled vocal 1661 'アー'=>'aa','エー'=>'ee','イー'=>'ii','オー'=>'oo','ウー'=>'uu', 1662 'ダー'=>'daa','デー'=>'dee','ヂー'=>'dii','ドー'=>'doo','ヅー'=>'duu', 1663 'ハー'=>'haa','ヘー'=>'hee','ヒー'=>'hii','ホー'=>'hoo','フー'=>'fuu', 1664 'バー'=>'baa','ベー'=>'bee','ビー'=>'bii','ボー'=>'boo','ブー'=>'buu', 1665 'パー'=>'paa','ペー'=>'pee','ピー'=>'pii','ポー'=>'poo','プー'=>'puu', 1666 'ケー'=>'kee','キー'=>'kii','コー'=>'koo','クー'=>'kuu','カー'=>'kaa', 1667 'ガー'=>'gaa','ゲー'=>'gee','ギー'=>'gii','ゴー'=>'goo','グー'=>'guu', 1668 'マー'=>'maa','メー'=>'mee','ミー'=>'mii','モー'=>'moo','ムー'=>'muu', 1669 'ナー'=>'naa','ネー'=>'nee','ニー'=>'nii','ノー'=>'noo','ヌー'=>'nuu', 1670 'ラー'=>'raa','レー'=>'ree','リー'=>'rii','ロー'=>'roo','ルー'=>'ruu', 1671 'サー'=>'saa','セー'=>'see','シー'=>'shii','ソー'=>'soo','スー'=>'suu', 1672 'ザー'=>'zaa','ゼー'=>'zee','ジー'=>'jii','ゾー'=>'zoo','ズー'=>'zuu', 1673 'ター'=>'taa','テー'=>'tee','チー'=>'chii','トー'=>'too','ツー'=>'tsuu', 1674 'ワー'=>'waa','ヲー'=>'woo', 1675 'ヤー'=>'yaa','ヨー'=>'yoo','ユー'=>'yuu', 1676 'ヵー'=>'kaa','ヶー'=>'kee', 1677 // old characters 1678 'ヱー'=>'wee','ヰー'=>'wii', 1679 1680 // seperate katakana 'n' 1681 'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u', 1682 'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンユ'=>'n_yu', 1683 1684 // 2 character syllables - doubled consonants 1685 'ッバ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu', 1686 'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッポ'=>'ppo','ップ'=>'ppu', 1687 'ッケ'=>'kke','ッキ'=>'kki','ッコ'=>'kko','ック'=>'kku','ッカ'=>'kka', 1688 'ッガ'=>'gga','ッゲ'=>'gge','ッギ'=>'ggi','ッゴ'=>'ggo','ッグ'=>'ggu', 1689 'ッマ'=>'ma','ッメ'=>'me','ッミ'=>'mi','ッモ'=>'mo','ッム'=>'mu', 1690 'ッナ'=>'nna','ッネ'=>'nne','ッニ'=>'nni','ッノ'=>'nno','ッヌ'=>'nnu', 1691 'ッラ'=>'rra','ッレ'=>'rre','ッリ'=>'rri','ッロ'=>'rro','ッル'=>'rru', 1692 'ッサ'=>'ssa','ッセ'=>'sse','ッシ'=>'sshi','ッソ'=>'sso','ッス'=>'ssu', 1693 'ッザ'=>'zza','ッゼ'=>'zze','ッジ'=>'jji','ッゾ'=>'zzo','ッズ'=>'zzu', 1694 'ッタ'=>'tta','ッテ'=>'tte','ッチ'=>'cchi','ット'=>'tto','ッツ'=>'ttsu', 1695 'ッダ'=>'dda','ッデ'=>'dde','ッヂ'=>'ddi','ッド'=>'ddo','ッヅ'=>'ddu', 1696 1697 // 1 character syllables 1698 'ア'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','ン'=>'n', 1699 'ハ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho','フ'=>'fu', 1700 'バ'=>'ba','ベ'=>'be','ビ'=>'bi','ボ'=>'bo','ブ'=>'bu', 1701 'パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ポ'=>'po','プ'=>'pu', 1702 'ケ'=>'ke','キ'=>'ki','コ'=>'ko','ク'=>'ku','カ'=>'ka', 1703 'ガ'=>'ga','ゲ'=>'ge','ギ'=>'gi','ゴ'=>'go','グ'=>'gu', 1704 'マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu', 1705 'ナ'=>'na','ネ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu', 1706 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ロ'=>'ro','ル'=>'ru', 1707 'サ'=>'sa','セ'=>'se','シ'=>'shi','ソ'=>'so','ス'=>'su', 1708 'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu', 1709 'タ'=>'ta','テ'=>'te','チ'=>'chi','ト'=>'to','ツ'=>'tsu', 1710 'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du', 1711 'ワ'=>'wa','ヲ'=>'wo', 1712 'ヤ'=>'ya','ヨ'=>'yo','ユ'=>'yu', 1713 'ヵ'=>'ka','ヶ'=>'ke', 1714 // old characters 1715 'ヱ'=>'we','ヰ'=>'wi', 1716 1717 // convert what's left (probably only kicks in when something's missing above) 1718 'ァ'=>'a','ェ'=>'e','ィ'=>'i','ォ'=>'o','ゥ'=>'u', 1719 'ャ'=>'ya','ョ'=>'yo','ュ'=>'yu', 1720 1721 // special characters 1722 '・'=>'_','、'=>'_', 1723 'ー'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise 1724 1725 // 'ラ'=>'la','レ'=>'le','リ'=>'li','ロ'=>'lo','ル'=>'lu', 1726 // 'チャ'=>'cya','チェ'=>'cye','チィ'=>'cyi','チョ'=>'cyo','チュ'=>'cyu', 1727 //'デャ'=>'dha','デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu', 1728 // 'リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi','リョ'=>'lyo','リュ'=>'lyu', 1729 // 'テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho','テュ'=>'thu', 1730 //'ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi','フォ'=>'fwo','フゥ'=>'fwu', 1731 //'チャ'=>'tya','チェ'=>'tye','チィ'=>'tyi','チョ'=>'tyo','チュ'=>'tyu', 1732 // 'ジャ'=>'jya','ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu', 1733 // 'ジャ'=>'zha','ジェ'=>'zhe','ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu', 1734 //'ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi','ジョ'=>'zyo','ジュ'=>'zyu', 1735 //'シャ'=>'sya','シェ'=>'sye','シィ'=>'syi','ショ'=>'syo','シュ'=>'syu', 1736 //'シ'=>'ci','フ'=>'hu',シ'=>'si','チ'=>'ti','ツ'=>'tu','イ'=>'yi','ヂ'=>'dzi', 1737 1738 // "Greeklish" 1739 'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Π'=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps', 1740 'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','π'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps', 1741 1742 // Thai 1743 'ก'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch', 1744 'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','ญ'=>'y','ฎ'=>'d','ฏ'=>'t','ฐ'=>'th', 1745 'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th', 1746 'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','ฝ'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph', 1747 'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue', 1748 'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h', 1749 'ะ'=>'a','ั'=>'a','รร'=>'a','า'=>'a','ๅ'=>'a','ำ'=>'am','ํา'=>'am', 1750 'ิ'=>'i','ี'=>'i','ึ'=>'ue','ี'=>'ue','ุ'=>'u','ู'=>'u', 1751 'เ'=>'e','แ'=>'ae','โ'=>'o','อ'=>'o', 1752 'ียะ'=>'ia','ีย'=>'ia','ือะ'=>'uea','ือ'=>'uea','ัวะ'=>'ua','ัว'=>'ua', 1753 'ใ'=>'ai','ไ'=>'ai','ัย'=>'ai','าย'=>'ai','าว'=>'ao', 1754 'ุย'=>'ui','อย'=>'oi','ือย'=>'ueai','วย'=>'uai', 1755 'ิว'=>'io','็ว'=>'eo','ียว'=>'iao', 1756 '่'=>'','้'=>'','๊'=>'','๋'=>'','็'=>'', 1757 '์'=>'','๎'=>'','ํ'=>'','ฺ'=>'', 1758 'ๆ'=>'2','๏'=>'o','ฯ'=>'-','๚'=>'-','๛'=>'-', 1759 '๐'=>'0','๑'=>'1','๒'=>'2','๓'=>'3','๔'=>'4', 1760 '๕'=>'5','๖'=>'6','๗'=>'7','๘'=>'8','๙'=>'9', 1761 1762 // Korean 1763 'ㄱ'=>'k','ㅋ'=>'kh','ㄲ'=>'kk','ㄷ'=>'t','ㅌ'=>'th','ㄸ'=>'tt','ㅂ'=>'p', 1764 'ㅍ'=>'ph','ㅃ'=>'pp','ㅈ'=>'c','ㅊ'=>'ch','ㅉ'=>'cc','ㅅ'=>'s','ㅆ'=>'ss', 1765 'ㅎ'=>'h','ㅇ'=>'ng','ㄴ'=>'n','ㄹ'=>'l','ㅁ'=>'m', 'ㅏ'=>'a','ㅓ'=>'e','ㅗ'=>'o', 1766 'ㅜ'=>'wu','ㅡ'=>'u','ㅣ'=>'i','ㅐ'=>'ay','ㅔ'=>'ey','ㅚ'=>'oy','ㅘ'=>'wa','ㅝ'=>'we', 1767 'ㅟ'=>'wi','ㅙ'=>'way','ㅞ'=>'wey','ㅢ'=>'uy','ㅑ'=>'ya','ㅕ'=>'ye','ㅛ'=>'oy', 1768 'ㅠ'=>'yu','ㅒ'=>'yay','ㅖ'=>'yey', 1769); 1770 1771 1772