1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license LGPL 2.1 (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * check for mb_string support 11 */ 12if(!defined('UTF8_MBSTRING')){ 13 if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){ 14 define('UTF8_MBSTRING',1); 15 }else{ 16 define('UTF8_MBSTRING',0); 17 } 18} 19 20/** 21 * Check if PREG was compiled with UTF-8 support 22 * 23 * Without this many of the functions below will not work, so this is a minimal requirement 24 */ 25if(!defined('UTF8_PREGSUPPORT')){ 26 define('UTF8_PREGSUPPORT', (bool) @preg_match('/^.$/u', 'ñ')); 27} 28 29/** 30 * Check if PREG was compiled with Unicode Property support 31 * 32 * This is not required for the functions below, but might be needed in a UTF-8 aware application 33 */ 34if(!defined('UTF8_PROPERTYSUPPORT')){ 35 define('UTF8_PROPERTYSUPPORT', (bool) @preg_match('/^\pL$/u', 'ñ')); 36} 37 38 39if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } 40 41if(!function_exists('utf8_isASCII')){ 42 /** 43 * Checks if a string contains 7bit ASCII only 44 * 45 * @author Andreas Haerter <andreas.haerter@dev.mail-node.com> 46 * 47 * @param string $str 48 * @return bool 49 */ 50 function utf8_isASCII($str){ 51 return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); 52 } 53} 54 55if(!function_exists('utf8_strip')){ 56 /** 57 * Strips all highbyte chars 58 * 59 * Returns a pure ASCII7 string 60 * 61 * @author Andreas Gohr <andi@splitbrain.org> 62 * 63 * @param string $str 64 * @return string 65 */ 66 function utf8_strip($str){ 67 $ascii = ''; 68 $len = strlen($str); 69 for($i=0; $i<$len; $i++){ 70 if(ord($str{$i}) <128){ 71 $ascii .= $str{$i}; 72 } 73 } 74 return $ascii; 75 } 76} 77 78if(!function_exists('utf8_check')){ 79 /** 80 * Tries to detect if a string is in Unicode encoding 81 * 82 * @author <bmorel@ssi.fr> 83 * @link http://www.php.net/manual/en/function.utf8-encode.php 84 * 85 * @param string $Str 86 * @return bool 87 */ 88 function utf8_check($Str) { 89 $len = strlen($Str); 90 for ($i=0; $i<$len; $i++) { 91 $b = ord($Str[$i]); 92 if ($b < 0x80) continue; # 0bbbbbbb 93 elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb 94 elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb 95 elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb 96 elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb 97 elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b 98 else return false; # Does not match any model 99 100 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 101 if ((++$i == $len) || ((ord($Str[$i]) & 0xC0) != 0x80)) 102 return false; 103 } 104 } 105 return true; 106 } 107} 108 109if(!function_exists('utf8_basename')){ 110 /** 111 * A locale independent basename() implementation 112 * 113 * works around a bug in PHP's basename() implementation 114 * 115 * @see basename() 116 * @link https://bugs.php.net/bug.php?id=37738 117 * 118 * @param string $path A path 119 * @param string $suffix If the name component ends in suffix this will also be cut off 120 * @return string 121 */ 122 function utf8_basename($path, $suffix=''){ 123 $path = trim($path,'\\/'); 124 $rpos = max(strrpos($path, '/'), strrpos($path, '\\')); 125 if($rpos) $path = substr($path, $rpos+1); 126 127 $suflen = strlen($suffix); 128 if($suflen && (substr($path, -$suflen) == $suffix)){ 129 $path = substr($path, 0, -$suflen); 130 } 131 132 return $path; 133 } 134} 135 136if(!function_exists('utf8_strlen')){ 137 /** 138 * Unicode aware replacement for strlen() 139 * 140 * utf8_decode() converts characters that are not in ISO-8859-1 141 * to '?', which, for the purpose of counting, is alright - It's 142 * even faster than mb_strlen. 143 * 144 * @author <chernyshevsky at hotmail dot com> 145 * @see strlen() 146 * @see utf8_decode() 147 * 148 * @param string $string 149 * @return int 150 */ 151 function utf8_strlen($string){ 152 return strlen(utf8_decode($string)); 153 } 154} 155 156if(!function_exists('utf8_substr')){ 157 /** 158 * UTF-8 aware alternative to substr 159 * 160 * Return part of a string given character offset (and optionally length) 161 * 162 * @author Harry Fuecks <hfuecks@gmail.com> 163 * @author Chris Smith <chris@jalakai.co.uk> 164 * 165 * @param string $str 166 * @param int $offset number of UTF-8 characters offset (from left) 167 * @param int $length (optional) length in UTF-8 characters from offset 168 * @return string 169 */ 170 function utf8_substr($str, $offset, $length = null) { 171 if(UTF8_MBSTRING){ 172 if( $length === null ){ 173 return mb_substr($str, $offset); 174 }else{ 175 return mb_substr($str, $offset, $length); 176 } 177 } 178 179 /* 180 * Notes: 181 * 182 * no mb string support, so we'll use pcre regex's with 'u' flag 183 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 184 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 185 * 186 * substr documentation states false can be returned in some cases (e.g. offset > string length) 187 * mb_substr never returns false, it will return an empty string instead. 188 * 189 * calculating the number of characters in the string is a relatively expensive operation, so 190 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 191 */ 192 193 // cast parameters to appropriate types to avoid multiple notices/warnings 194 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 195 $offset = (int)$offset; 196 if (!is_null($length)) $length = (int)$length; 197 198 // handle trivial cases 199 if ($length === 0) return ''; 200 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 201 202 $offset_pattern = ''; 203 $length_pattern = ''; 204 205 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 206 if ($offset < 0) { 207 $strlen = strlen(utf8_decode($str)); // see notes 208 $offset = $strlen + $offset; 209 if ($offset < 0) $offset = 0; 210 } 211 212 // establish a pattern for offset, a non-captured group equal in length to offset 213 if ($offset > 0) { 214 $Ox = (int)($offset/65535); 215 $Oy = $offset%65535; 216 217 if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}'; 218 $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})'; 219 } else { 220 $offset_pattern = '^'; // offset == 0; just anchor the pattern 221 } 222 223 // establish a pattern for length 224 if (is_null($length)) { 225 $length_pattern = '(.*)$'; // the rest of the string 226 } else { 227 228 if (!isset($strlen)) $strlen = strlen(utf8_decode($str)); // see notes 229 if ($offset > $strlen) return ''; // another trivial case 230 231 if ($length > 0) { 232 233 $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string 234 235 $Lx = (int)($length/65535); 236 $Ly = $length%65535; 237 238 // +ve length requires ... a captured group of length characters 239 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 240 $length_pattern = '('.$length_pattern.'.{'.$Ly.'})'; 241 242 } else if ($length < 0) { 243 244 if ($length < ($offset - $strlen)) return ''; 245 246 $Lx = (int)((-$length)/65535); 247 $Ly = (-$length)%65535; 248 249 // -ve length requires ... capture everything except a group of -length characters 250 // anchored at the tail-end of the string 251 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 252 $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$'; 253 } 254 } 255 256 if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return ''; 257 return $match[1]; 258 } 259} 260 261if(!function_exists('utf8_substr_replace')){ 262 /** 263 * Unicode aware replacement for substr_replace() 264 * 265 * @author Andreas Gohr <andi@splitbrain.org> 266 * @see substr_replace() 267 * 268 * @param string $string input string 269 * @param string $replacement the replacement 270 * @param int $start the replacing will begin at the start'th offset into string. 271 * @param int $length If given and is positive, it represents the length of the portion of string which is 272 * to be replaced. If length is zero then this function will have the effect of inserting 273 * replacement into string at the given start offset. 274 * @return string 275 */ 276 function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 277 $ret = ''; 278 if($start>0) $ret .= utf8_substr($string, 0, $start); 279 $ret .= $replacement; 280 $ret .= utf8_substr($string, $start+$length); 281 return $ret; 282 } 283} 284 285if(!function_exists('utf8_ltrim')){ 286 /** 287 * Unicode aware replacement for ltrim() 288 * 289 * @author Andreas Gohr <andi@splitbrain.org> 290 * @see ltrim() 291 * 292 * @param string $str 293 * @param string $charlist 294 * @return string 295 */ 296 function utf8_ltrim($str,$charlist=''){ 297 if($charlist == '') return ltrim($str); 298 299 //quote charlist for use in a characterclass 300 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 301 302 return preg_replace('/^['.$charlist.']+/u','',$str); 303 } 304} 305 306if(!function_exists('utf8_rtrim')){ 307 /** 308 * Unicode aware replacement for rtrim() 309 * 310 * @author Andreas Gohr <andi@splitbrain.org> 311 * @see rtrim() 312 * 313 * @param string $str 314 * @param string $charlist 315 * @return string 316 */ 317 function utf8_rtrim($str,$charlist=''){ 318 if($charlist == '') return rtrim($str); 319 320 //quote charlist for use in a characterclass 321 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 322 323 return preg_replace('/['.$charlist.']+$/u','',$str); 324 } 325} 326 327if(!function_exists('utf8_trim')){ 328 /** 329 * Unicode aware replacement for trim() 330 * 331 * @author Andreas Gohr <andi@splitbrain.org> 332 * @see trim() 333 * 334 * @param string $str 335 * @param string $charlist 336 * @return string 337 */ 338 function utf8_trim($str,$charlist='') { 339 if($charlist == '') return trim($str); 340 341 return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist); 342 } 343} 344 345if(!function_exists('utf8_strtolower')){ 346 /** 347 * This is a unicode aware replacement for strtolower() 348 * 349 * Uses mb_string extension if available 350 * 351 * @author Leo Feyer <leo@typolight.org> 352 * @see strtolower() 353 * @see utf8_strtoupper() 354 * 355 * @param string $string 356 * @return string 357 */ 358 function utf8_strtolower($string){ 359 if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8'); 360 361 global $UTF8_UPPER_TO_LOWER; 362 return strtr($string,$UTF8_UPPER_TO_LOWER); 363 } 364} 365 366if(!function_exists('utf8_strtoupper')){ 367 /** 368 * This is a unicode aware replacement for strtoupper() 369 * 370 * Uses mb_string extension if available 371 * 372 * @author Leo Feyer <leo@typolight.org> 373 * @see strtoupper() 374 * @see utf8_strtoupper() 375 * 376 * @param string $string 377 * @return string 378 */ 379 function utf8_strtoupper($string){ 380 if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); 381 382 global $UTF8_LOWER_TO_UPPER; 383 return strtr($string,$UTF8_LOWER_TO_UPPER); 384 } 385} 386 387if(!function_exists('utf8_ucfirst')){ 388 /** 389 * UTF-8 aware alternative to ucfirst 390 * Make a string's first character uppercase 391 * 392 * @author Harry Fuecks 393 * 394 * @param string $str 395 * @return string with first character as upper case (if applicable) 396 */ 397 function utf8_ucfirst($str){ 398 switch ( utf8_strlen($str) ) { 399 case 0: 400 return ''; 401 case 1: 402 return utf8_strtoupper($str); 403 default: 404 preg_match('/^(.{1})(.*)$/us', $str, $matches); 405 return utf8_strtoupper($matches[1]).$matches[2]; 406 } 407 } 408} 409 410if(!function_exists('utf8_ucwords')){ 411 /** 412 * UTF-8 aware alternative to ucwords 413 * Uppercase the first character of each word in a string 414 * 415 * @author Harry Fuecks 416 * @see http://www.php.net/ucwords 417 * 418 * @param string $str 419 * @return string with first char of each word uppercase 420 */ 421 function utf8_ucwords($str) { 422 // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; 423 // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns 424 // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords 425 $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; 426 427 return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str); 428 } 429 430 /** 431 * Callback function for preg_replace_callback call in utf8_ucwords 432 * You don't need to call this yourself 433 * 434 * @author Harry Fuecks 435 * @see utf8_ucwords 436 * @see utf8_strtoupper 437 * 438 * @param array $matches matches corresponding to a single word 439 * @return string with first char of the word in uppercase 440 */ 441 function utf8_ucwords_callback($matches) { 442 $leadingws = $matches[2]; 443 $ucfirst = utf8_strtoupper($matches[3]); 444 $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1); 445 return $leadingws . $ucword; 446 } 447} 448 449if(!function_exists('utf8_deaccent')){ 450 /** 451 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 452 * 453 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 454 * letters. Default is to deaccent both cases ($case = 0) 455 * 456 * @author Andreas Gohr <andi@splitbrain.org> 457 * 458 * @param string $string 459 * @param int $case 460 * @return string 461 */ 462 function utf8_deaccent($string,$case=0){ 463 if($case <= 0){ 464 global $UTF8_LOWER_ACCENTS; 465 $string = strtr($string,$UTF8_LOWER_ACCENTS); 466 } 467 if($case >= 0){ 468 global $UTF8_UPPER_ACCENTS; 469 $string = strtr($string,$UTF8_UPPER_ACCENTS); 470 } 471 return $string; 472 } 473} 474 475if(!function_exists('utf8_romanize')){ 476 /** 477 * Romanize a non-latin string 478 * 479 * @author Andreas Gohr <andi@splitbrain.org> 480 * 481 * @param string $string 482 * @return string 483 */ 484 function utf8_romanize($string){ 485 if(utf8_isASCII($string)) return $string; //nothing to do 486 487 global $UTF8_ROMANIZATION; 488 return strtr($string,$UTF8_ROMANIZATION); 489 } 490} 491 492if(!function_exists('utf8_stripspecials')){ 493 /** 494 * Removes special characters (nonalphanumeric) from a UTF-8 string 495 * 496 * This function adds the controlchars 0x00 to 0x19 to the array of 497 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 498 * 499 * @author Andreas Gohr <andi@splitbrain.org> 500 * 501 * @param string $string The UTF8 string to strip of special chars 502 * @param string $repl Replace special with this string 503 * @param string $additional Additional chars to strip (used in regexp char class) 504 * @return string 505 */ 506 function utf8_stripspecials($string,$repl='',$additional=''){ 507 global $UTF8_SPECIAL_CHARS2; 508 509 static $specials = null; 510 if(is_null($specials)){ 511 #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 512 $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/'); 513 } 514 515 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 516 } 517} 518 519if(!function_exists('utf8_strpos')){ 520 /** 521 * This is an Unicode aware replacement for strpos 522 * 523 * @author Leo Feyer <leo@typolight.org> 524 * @see strpos() 525 * 526 * @param string $haystack 527 * @param string $needle 528 * @param integer $offset 529 * @return integer 530 */ 531 function utf8_strpos($haystack, $needle, $offset=0){ 532 $comp = 0; 533 $length = null; 534 535 while (is_null($length) || $length < $offset) { 536 $pos = strpos($haystack, $needle, $offset + $comp); 537 538 if ($pos === false) 539 return false; 540 541 $length = utf8_strlen(substr($haystack, 0, $pos)); 542 543 if ($length < $offset) 544 $comp = $pos - $length; 545 } 546 547 return $length; 548 } 549} 550 551if(!function_exists('utf8_tohtml')){ 552 /** 553 * Encodes UTF-8 characters to HTML entities 554 * 555 * @author Tom N Harris <tnharris@whoopdedo.org> 556 * @author <vpribish at shopping dot com> 557 * @link http://www.php.net/manual/en/function.utf8-decode.php 558 * 559 * @param string $str 560 * @return string 561 */ 562 function utf8_tohtml ($str) { 563 $ret = ''; 564 foreach (utf8_to_unicode($str) as $cp) { 565 if ($cp < 0x80) 566 $ret .= chr($cp); 567 elseif ($cp < 0x100) 568 $ret .= "&#$cp;"; 569 else 570 $ret .= '&#x'.dechex($cp).';'; 571 } 572 return $ret; 573 } 574} 575 576if(!function_exists('utf8_unhtml')){ 577 /** 578 * Decodes HTML entities to UTF-8 characters 579 * 580 * Convert any &#..; entity to a codepoint, 581 * The entities flag defaults to only decoding numeric entities. 582 * Pass HTML_ENTITIES and named entities, including & < etc. 583 * are handled as well. Avoids the problem that would occur if you 584 * had to decode "&#38;&amp;#38;" 585 * 586 * unhtmlspecialchars(utf8_unhtml($s)) -> "&&" 587 * utf8_unhtml(unhtmlspecialchars($s)) -> "&&#38;" 588 * what it should be -> "&&#38;" 589 * 590 * @author Tom N Harris <tnharris@whoopdedo.org> 591 * 592 * @param string $str UTF-8 encoded string 593 * @param boolean $entities Flag controlling decoding of named entities. 594 * @return string UTF-8 encoded string with numeric (and named) entities replaced. 595 */ 596 function utf8_unhtml($str, $entities=null) { 597 static $decoder = null; 598 if (is_null($decoder)) 599 $decoder = new utf8_entity_decoder(); 600 if (is_null($entities)) 601 return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m', 602 'utf8_decode_numeric', $str); 603 else 604 return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m', 605 array(&$decoder, 'decode'), $str); 606 } 607} 608 609if(!function_exists('utf8_decode_numeric')){ 610 /** 611 * Decodes numeric HTML entities to their correct UTF-8 characters 612 * 613 * @param $ent string A numeric entity 614 * @return string|false 615 */ 616 function utf8_decode_numeric($ent) { 617 switch ($ent[2]) { 618 case 'X': 619 case 'x': 620 $cp = hexdec($ent[3]); 621 break; 622 default: 623 $cp = intval($ent[3]); 624 break; 625 } 626 return unicode_to_utf8(array($cp)); 627 } 628} 629 630if(!class_exists('utf8_entity_decoder')){ 631 /** 632 * Encapsulate HTML entity decoding tables 633 */ 634 class utf8_entity_decoder { 635 var $table; 636 637 /** 638 * Initializes the decoding tables 639 */ 640 function __construct() { 641 $table = get_html_translation_table(HTML_ENTITIES); 642 $table = array_flip($table); 643 $this->table = array_map(array(&$this,'makeutf8'), $table); 644 } 645 646 /** 647 * Wrapper around unicode_to_utf8() 648 * 649 * @param string $c 650 * @return string|false 651 */ 652 function makeutf8($c) { 653 return unicode_to_utf8(array(ord($c))); 654 } 655 656 /** 657 * Decodes any HTML entity to it's correct UTF-8 char equivalent 658 * 659 * @param string $ent An entity 660 * @return string|false 661 */ 662 function decode($ent) { 663 if ($ent[1] == '#') { 664 return utf8_decode_numeric($ent); 665 } elseif (array_key_exists($ent[0],$this->table)) { 666 return $this->table[$ent[0]]; 667 } else { 668 return $ent[0]; 669 } 670 } 671 } 672} 673 674if(!function_exists('utf8_to_unicode')){ 675 /** 676 * Takes an UTF-8 string and returns an array of ints representing the 677 * Unicode characters. Astral planes are supported ie. the ints in the 678 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 679 * are not allowed. 680 * 681 * If $strict is set to true the function returns false if the input 682 * string isn't a valid UTF-8 octet sequence and raises a PHP error at 683 * level E_USER_WARNING 684 * 685 * Note: this function has been modified slightly in this library to 686 * trigger errors on encountering bad bytes 687 * 688 * @author <hsivonen@iki.fi> 689 * @author Harry Fuecks <hfuecks@gmail.com> 690 * @see unicode_to_utf8 691 * @link http://hsivonen.iki.fi/php-utf8/ 692 * @link http://sourceforge.net/projects/phputf8/ 693 * 694 * @param string $str UTF-8 encoded string 695 * @param boolean $strict Check for invalid sequences? 696 * @return mixed array of unicode code points or false if UTF-8 invalid 697 */ 698 function utf8_to_unicode($str,$strict=false) { 699 $mState = 0; // cached expected number of octets after the current octet 700 // until the beginning of the next UTF8 character sequence 701 $mUcs4 = 0; // cached Unicode character 702 $mBytes = 1; // cached expected number of octets in the current sequence 703 704 $out = array(); 705 706 $len = strlen($str); 707 708 for($i = 0; $i < $len; $i++) { 709 710 $in = ord($str{$i}); 711 712 if ( $mState == 0) { 713 714 // When mState is zero we expect either a US-ASCII character or a 715 // multi-octet sequence. 716 if (0 == (0x80 & ($in))) { 717 // US-ASCII, pass straight through. 718 $out[] = $in; 719 $mBytes = 1; 720 721 } else if (0xC0 == (0xE0 & ($in))) { 722 // First octet of 2 octet sequence 723 $mUcs4 = ($in); 724 $mUcs4 = ($mUcs4 & 0x1F) << 6; 725 $mState = 1; 726 $mBytes = 2; 727 728 } else if (0xE0 == (0xF0 & ($in))) { 729 // First octet of 3 octet sequence 730 $mUcs4 = ($in); 731 $mUcs4 = ($mUcs4 & 0x0F) << 12; 732 $mState = 2; 733 $mBytes = 3; 734 735 } else if (0xF0 == (0xF8 & ($in))) { 736 // First octet of 4 octet sequence 737 $mUcs4 = ($in); 738 $mUcs4 = ($mUcs4 & 0x07) << 18; 739 $mState = 3; 740 $mBytes = 4; 741 742 } else if (0xF8 == (0xFC & ($in))) { 743 /* First octet of 5 octet sequence. 744 * 745 * This is illegal because the encoded codepoint must be either 746 * (a) not the shortest form or 747 * (b) outside the Unicode range of 0-0x10FFFF. 748 * Rather than trying to resynchronize, we will carry on until the end 749 * of the sequence and let the later error handling code catch it. 750 */ 751 $mUcs4 = ($in); 752 $mUcs4 = ($mUcs4 & 0x03) << 24; 753 $mState = 4; 754 $mBytes = 5; 755 756 } else if (0xFC == (0xFE & ($in))) { 757 // First octet of 6 octet sequence, see comments for 5 octet sequence. 758 $mUcs4 = ($in); 759 $mUcs4 = ($mUcs4 & 1) << 30; 760 $mState = 5; 761 $mBytes = 6; 762 763 } elseif($strict) { 764 /* Current octet is neither in the US-ASCII range nor a legal first 765 * octet of a multi-octet sequence. 766 */ 767 trigger_error( 768 'utf8_to_unicode: Illegal sequence identifier '. 769 'in UTF-8 at byte '.$i, 770 E_USER_WARNING 771 ); 772 return false; 773 774 } 775 776 } else { 777 778 // When mState is non-zero, we expect a continuation of the multi-octet 779 // sequence 780 if (0x80 == (0xC0 & ($in))) { 781 782 // Legal continuation. 783 $shift = ($mState - 1) * 6; 784 $tmp = $in; 785 $tmp = ($tmp & 0x0000003F) << $shift; 786 $mUcs4 |= $tmp; 787 788 /** 789 * End of the multi-octet sequence. mUcs4 now contains the final 790 * Unicode codepoint to be output 791 */ 792 if (0 == --$mState) { 793 794 /* 795 * Check for illegal sequences and codepoints. 796 */ 797 // From Unicode 3.1, non-shortest form is illegal 798 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 799 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 800 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 801 (4 < $mBytes) || 802 // From Unicode 3.2, surrogate characters are illegal 803 (($mUcs4 & 0xFFFFF800) == 0xD800) || 804 // Codepoints outside the Unicode range are illegal 805 ($mUcs4 > 0x10FFFF)) { 806 807 if($strict){ 808 trigger_error( 809 'utf8_to_unicode: Illegal sequence or codepoint '. 810 'in UTF-8 at byte '.$i, 811 E_USER_WARNING 812 ); 813 814 return false; 815 } 816 817 } 818 819 if (0xFEFF != $mUcs4) { 820 // BOM is legal but we don't want to output it 821 $out[] = $mUcs4; 822 } 823 824 //initialize UTF8 cache 825 $mState = 0; 826 $mUcs4 = 0; 827 $mBytes = 1; 828 } 829 830 } elseif($strict) { 831 /** 832 *((0xC0 & (*in) != 0x80) && (mState != 0)) 833 * Incomplete multi-octet sequence. 834 */ 835 trigger_error( 836 'utf8_to_unicode: Incomplete multi-octet '. 837 ' sequence in UTF-8 at byte '.$i, 838 E_USER_WARNING 839 ); 840 841 return false; 842 } 843 } 844 } 845 return $out; 846 } 847} 848 849if(!function_exists('unicode_to_utf8')){ 850 /** 851 * Takes an array of ints representing the Unicode characters and returns 852 * a UTF-8 string. Astral planes are supported ie. the ints in the 853 * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 854 * are not allowed. 855 * 856 * If $strict is set to true the function returns false if the input 857 * array contains ints that represent surrogates or are outside the 858 * Unicode range and raises a PHP error at level E_USER_WARNING 859 * 860 * Note: this function has been modified slightly in this library to use 861 * output buffering to concatenate the UTF-8 string (faster) as well as 862 * reference the array by it's keys 863 * 864 * @param array $arr of unicode code points representing a string 865 * @param boolean $strict Check for invalid sequences? 866 * @return string|false UTF-8 string or false if array contains invalid code points 867 * 868 * @author <hsivonen@iki.fi> 869 * @author Harry Fuecks <hfuecks@gmail.com> 870 * @see utf8_to_unicode 871 * @link http://hsivonen.iki.fi/php-utf8/ 872 * @link http://sourceforge.net/projects/phputf8/ 873 */ 874 function unicode_to_utf8($arr,$strict=false) { 875 if (!is_array($arr)) return ''; 876 ob_start(); 877 878 foreach (array_keys($arr) as $k) { 879 880 if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { 881 # ASCII range (including control chars) 882 883 echo chr($arr[$k]); 884 885 } else if ($arr[$k] <= 0x07ff) { 886 # 2 byte sequence 887 888 echo chr(0xc0 | ($arr[$k] >> 6)); 889 echo chr(0x80 | ($arr[$k] & 0x003f)); 890 891 } else if($arr[$k] == 0xFEFF) { 892 # Byte order mark (skip) 893 894 // nop -- zap the BOM 895 896 } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { 897 # Test for illegal surrogates 898 899 // found a surrogate 900 if($strict){ 901 trigger_error( 902 'unicode_to_utf8: Illegal surrogate '. 903 'at index: '.$k.', value: '.$arr[$k], 904 E_USER_WARNING 905 ); 906 return false; 907 } 908 909 } else if ($arr[$k] <= 0xffff) { 910 # 3 byte sequence 911 912 echo chr(0xe0 | ($arr[$k] >> 12)); 913 echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); 914 echo chr(0x80 | ($arr[$k] & 0x003f)); 915 916 } else if ($arr[$k] <= 0x10ffff) { 917 # 4 byte sequence 918 919 echo chr(0xf0 | ($arr[$k] >> 18)); 920 echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); 921 echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); 922 echo chr(0x80 | ($arr[$k] & 0x3f)); 923 924 } elseif($strict) { 925 926 trigger_error( 927 'unicode_to_utf8: Codepoint out of Unicode range '. 928 'at index: '.$k.', value: '.$arr[$k], 929 E_USER_WARNING 930 ); 931 932 // out of range 933 return false; 934 } 935 } 936 937 $result = ob_get_contents(); 938 ob_end_clean(); 939 return $result; 940 } 941} 942 943if(!function_exists('utf8_to_utf16be')){ 944 /** 945 * UTF-8 to UTF-16BE conversion. 946 * 947 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 948 * 949 * @param string $str 950 * @param bool $bom 951 * @return string 952 */ 953 function utf8_to_utf16be(&$str, $bom = false) { 954 $out = $bom ? "\xFE\xFF" : ''; 955 if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 956 957 $uni = utf8_to_unicode($str); 958 foreach($uni as $cp){ 959 $out .= pack('n',$cp); 960 } 961 return $out; 962 } 963} 964 965if(!function_exists('utf16be_to_utf8')){ 966 /** 967 * UTF-8 to UTF-16BE conversion. 968 * 969 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 970 * 971 * @param string $str 972 * @return false|string 973 */ 974 function utf16be_to_utf8(&$str) { 975 $uni = unpack('n*',$str); 976 return unicode_to_utf8($uni); 977 } 978} 979 980if(!function_exists('utf8_bad_replace')){ 981 /** 982 * Replace bad bytes with an alternative character 983 * 984 * ASCII character is recommended for replacement char 985 * 986 * PCRE Pattern to locate bad bytes in a UTF-8 string 987 * Comes from W3 FAQ: Multilingual Forms 988 * Note: modified to include full ASCII range including control chars 989 * 990 * @author Harry Fuecks <hfuecks@gmail.com> 991 * @see http://www.w3.org/International/questions/qa-forms-utf-8 992 * 993 * @param string $str to search 994 * @param string $replace to replace bad bytes with (defaults to '?') - use ASCII 995 * @return string 996 */ 997 function utf8_bad_replace($str, $replace = '') { 998 $UTF8_BAD = 999 '([\x00-\x7F]'. # ASCII (including control chars) 1000 '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte 1001 '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs 1002 '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte 1003 '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates 1004 '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 1005 '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 1006 '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 1007 '|(.{1}))'; # invalid byte 1008 ob_start(); 1009 while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { 1010 if ( !isset($matches[2])) { 1011 echo $matches[0]; 1012 } else { 1013 echo $replace; 1014 } 1015 $str = substr($str,strlen($matches[0])); 1016 } 1017 $result = ob_get_contents(); 1018 ob_end_clean(); 1019 return $result; 1020 } 1021} 1022 1023if(!function_exists('utf8_correctIdx')){ 1024 /** 1025 * adjust a byte index into a utf8 string to a utf8 character boundary 1026 * 1027 * @param string $str utf8 character string 1028 * @param int $i byte index into $str 1029 * @param $next bool direction to search for boundary, 1030 * false = up (current character) 1031 * true = down (next character) 1032 * 1033 * @return int byte index into $str now pointing to a utf8 character boundary 1034 * 1035 * @author chris smith <chris@jalakai.co.uk> 1036 */ 1037 function utf8_correctIdx(&$str,$i,$next=false) { 1038 1039 if ($i <= 0) return 0; 1040 1041 $limit = strlen($str); 1042 if ($i>=$limit) return $limit; 1043 1044 if ($next) { 1045 while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; 1046 } else { 1047 while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; 1048 } 1049 1050 return $i; 1051 } 1052} 1053 1054// only needed if no mb_string available 1055if(!UTF8_MBSTRING){ 1056 /** 1057 * UTF-8 Case lookup table 1058 * 1059 * This lookuptable defines the upper case letters to their correspponding 1060 * lower case letter in UTF-8 1061 * 1062 * @author Andreas Gohr <andi@splitbrain.org> 1063 */ 1064 global $UTF8_LOWER_TO_UPPER; 1065 if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array( 1066 "z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T","s"=>"S","r"=>"R","q"=>"Q", 1067 "p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J","i"=>"I","h"=>"H","g"=>"G", 1068 "f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A","ῳ"=>"ῼ","ῥ"=>"Ῥ","ῡ"=>"Ῡ","ῑ"=>"Ῑ", 1069 "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ", 1070 "ᾣ"=>"ᾫ","ᾢ"=>"ᾪ","ᾡ"=>"ᾩ","ᾗ"=>"ᾟ","ᾖ"=>"ᾞ","ᾕ"=>"ᾝ","ᾔ"=>"ᾜ","ᾓ"=>"ᾛ","ᾒ"=>"ᾚ","ᾑ"=>"ᾙ", 1071 "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ", 1072 "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ", 1073 "ὲ"=>"Ὲ","ά"=>"Ά","ὰ"=>"Ὰ","ὧ"=>"Ὧ","ὦ"=>"Ὦ","ὥ"=>"Ὥ","ὤ"=>"Ὤ","ὣ"=>"Ὣ","ὢ"=>"Ὢ","ὡ"=>"Ὡ", 1074 "ὗ"=>"Ὗ","ὕ"=>"Ὕ","ὓ"=>"Ὓ","ὑ"=>"Ὑ","ὅ"=>"Ὅ","ὄ"=>"Ὄ","ὃ"=>"Ὃ","ὂ"=>"Ὂ","ὁ"=>"Ὁ","ὀ"=>"Ὀ", 1075 "ἷ"=>"Ἷ","ἶ"=>"Ἶ","ἵ"=>"Ἵ","ἴ"=>"Ἴ","ἳ"=>"Ἳ","ἲ"=>"Ἲ","ἱ"=>"Ἱ","ἰ"=>"Ἰ","ἧ"=>"Ἧ","ἦ"=>"Ἦ", 1076 "ἥ"=>"Ἥ","ἤ"=>"Ἤ","ἣ"=>"Ἣ","ἢ"=>"Ἢ","ἡ"=>"Ἡ","ἕ"=>"Ἕ","ἔ"=>"Ἔ","ἓ"=>"Ἓ","ἒ"=>"Ἒ","ἑ"=>"Ἑ", 1077 "ἐ"=>"Ἐ","ἇ"=>"Ἇ","ἆ"=>"Ἆ","ἅ"=>"Ἅ","ἄ"=>"Ἄ","ἃ"=>"Ἃ","ἂ"=>"Ἂ","ἁ"=>"Ἁ","ἀ"=>"Ἀ","ỹ"=>"Ỹ", 1078 "ỷ"=>"Ỷ","ỵ"=>"Ỵ","ỳ"=>"Ỳ","ự"=>"Ự","ữ"=>"Ữ","ử"=>"Ử","ừ"=>"Ừ","ứ"=>"Ứ","ủ"=>"Ủ","ụ"=>"Ụ", 1079 "ợ"=>"Ợ","ỡ"=>"Ỡ","ở"=>"Ở","ờ"=>"Ờ","ớ"=>"Ớ","ộ"=>"Ộ","ỗ"=>"Ỗ","ổ"=>"Ổ","ồ"=>"Ồ","ố"=>"Ố", 1080 "ỏ"=>"Ỏ","ọ"=>"Ọ","ị"=>"Ị","ỉ"=>"Ỉ","ệ"=>"Ệ","ễ"=>"Ễ","ể"=>"Ể","ề"=>"Ề","ế"=>"Ế","ẽ"=>"Ẽ", 1081 "ẻ"=>"Ẻ","ẹ"=>"Ẹ","ặ"=>"Ặ","ẵ"=>"Ẵ","ẳ"=>"Ẳ","ằ"=>"Ằ","ắ"=>"Ắ","ậ"=>"Ậ","ẫ"=>"Ẫ","ẩ"=>"Ẩ", 1082 "ầ"=>"Ầ","ấ"=>"Ấ","ả"=>"Ả","ạ"=>"Ạ","ẛ"=>"Ṡ","ẕ"=>"Ẕ","ẓ"=>"Ẓ","ẑ"=>"Ẑ","ẏ"=>"Ẏ","ẍ"=>"Ẍ", 1083 "ẋ"=>"Ẋ","ẉ"=>"Ẉ","ẇ"=>"Ẇ","ẅ"=>"Ẅ","ẃ"=>"Ẃ","ẁ"=>"Ẁ","ṿ"=>"Ṿ","ṽ"=>"Ṽ","ṻ"=>"Ṻ","ṹ"=>"Ṹ", 1084 "ṷ"=>"Ṷ","ṵ"=>"Ṵ","ṳ"=>"Ṳ","ṱ"=>"Ṱ","ṯ"=>"Ṯ","ṭ"=>"Ṭ","ṫ"=>"Ṫ","ṩ"=>"Ṩ","ṧ"=>"Ṧ","ṥ"=>"Ṥ", 1085 "ṣ"=>"Ṣ","ṡ"=>"Ṡ","ṟ"=>"Ṟ","ṝ"=>"Ṝ","ṛ"=>"Ṛ","ṙ"=>"Ṙ","ṗ"=>"Ṗ","ṕ"=>"Ṕ","ṓ"=>"Ṓ","ṑ"=>"Ṑ", 1086 "ṏ"=>"Ṏ","ṍ"=>"Ṍ","ṋ"=>"Ṋ","ṉ"=>"Ṉ","ṇ"=>"Ṇ","ṅ"=>"Ṅ","ṃ"=>"Ṃ","ṁ"=>"Ṁ","ḿ"=>"Ḿ","ḽ"=>"Ḽ", 1087 "ḻ"=>"Ḻ","ḹ"=>"Ḹ","ḷ"=>"Ḷ","ḵ"=>"Ḵ","ḳ"=>"Ḳ","ḱ"=>"Ḱ","ḯ"=>"Ḯ","ḭ"=>"Ḭ","ḫ"=>"Ḫ","ḩ"=>"Ḩ", 1088 "ḧ"=>"Ḧ","ḥ"=>"Ḥ","ḣ"=>"Ḣ","ḡ"=>"Ḡ","ḟ"=>"Ḟ","ḝ"=>"Ḝ","ḛ"=>"Ḛ","ḙ"=>"Ḙ","ḗ"=>"Ḗ","ḕ"=>"Ḕ", 1089 "ḓ"=>"Ḓ","ḑ"=>"Ḑ","ḏ"=>"Ḏ","ḍ"=>"Ḍ","ḋ"=>"Ḋ","ḉ"=>"Ḉ","ḇ"=>"Ḇ","ḅ"=>"Ḅ","ḃ"=>"Ḃ","ḁ"=>"Ḁ", 1090 "ֆ"=>"Ֆ","օ"=>"Օ","ք"=>"Ք","փ"=>"Փ","ւ"=>"Ւ","ց"=>"Ց","ր"=>"Ր","տ"=>"Տ","վ"=>"Վ","ս"=>"Ս", 1091 "ռ"=>"Ռ","ջ"=>"Ջ","պ"=>"Պ","չ"=>"Չ","ո"=>"Ո","շ"=>"Շ","ն"=>"Ն","յ"=>"Յ","մ"=>"Մ","ճ"=>"Ճ", 1092 "ղ"=>"Ղ","ձ"=>"Ձ","հ"=>"Հ","կ"=>"Կ","ծ"=>"Ծ","խ"=>"Խ","լ"=>"Լ","ի"=>"Ի","ժ"=>"Ժ","թ"=>"Թ", 1093 "ը"=>"Ը","է"=>"Է","զ"=>"Զ","ե"=>"Ե","դ"=>"Դ","գ"=>"Գ","բ"=>"Բ","ա"=>"Ա","ԏ"=>"Ԏ","ԍ"=>"Ԍ", 1094 "ԋ"=>"Ԋ","ԉ"=>"Ԉ","ԇ"=>"Ԇ","ԅ"=>"Ԅ","ԃ"=>"Ԃ","ԁ"=>"Ԁ","ӹ"=>"Ӹ","ӵ"=>"Ӵ","ӳ"=>"Ӳ","ӱ"=>"Ӱ", 1095 "ӯ"=>"Ӯ","ӭ"=>"Ӭ","ӫ"=>"Ӫ","ө"=>"Ө","ӧ"=>"Ӧ","ӥ"=>"Ӥ","ӣ"=>"Ӣ","ӡ"=>"Ӡ","ӟ"=>"Ӟ","ӝ"=>"Ӝ", 1096 "ӛ"=>"Ӛ","ә"=>"Ә","ӗ"=>"Ӗ","ӕ"=>"Ӕ","ӓ"=>"Ӓ","ӑ"=>"Ӑ","ӎ"=>"Ӎ","ӌ"=>"Ӌ","ӊ"=>"Ӊ","ӈ"=>"Ӈ", 1097 "ӆ"=>"Ӆ","ӄ"=>"Ӄ","ӂ"=>"Ӂ","ҿ"=>"Ҿ","ҽ"=>"Ҽ","һ"=>"Һ","ҹ"=>"Ҹ","ҷ"=>"Ҷ","ҵ"=>"Ҵ","ҳ"=>"Ҳ", 1098 "ұ"=>"Ұ","ү"=>"Ү","ҭ"=>"Ҭ","ҫ"=>"Ҫ","ҩ"=>"Ҩ","ҧ"=>"Ҧ","ҥ"=>"Ҥ","ң"=>"Ң","ҡ"=>"Ҡ","ҟ"=>"Ҟ", 1099 "ҝ"=>"Ҝ","қ"=>"Қ","ҙ"=>"Ҙ","җ"=>"Җ","ҕ"=>"Ҕ","ғ"=>"Ғ","ґ"=>"Ґ","ҏ"=>"Ҏ","ҍ"=>"Ҍ","ҋ"=>"Ҋ", 1100 "ҁ"=>"Ҁ","ѿ"=>"Ѿ","ѽ"=>"Ѽ","ѻ"=>"Ѻ","ѹ"=>"Ѹ","ѷ"=>"Ѷ","ѵ"=>"Ѵ","ѳ"=>"Ѳ","ѱ"=>"Ѱ","ѯ"=>"Ѯ", 1101 "ѭ"=>"Ѭ","ѫ"=>"Ѫ","ѩ"=>"Ѩ","ѧ"=>"Ѧ","ѥ"=>"Ѥ","ѣ"=>"Ѣ","ѡ"=>"Ѡ","џ"=>"Џ","ў"=>"Ў","ѝ"=>"Ѝ", 1102 "ќ"=>"Ќ","ћ"=>"Ћ","њ"=>"Њ","љ"=>"Љ","ј"=>"Ј","ї"=>"Ї","і"=>"І","ѕ"=>"Ѕ","є"=>"Є","ѓ"=>"Ѓ", 1103 "ђ"=>"Ђ","ё"=>"Ё","ѐ"=>"Ѐ","я"=>"Я","ю"=>"Ю","э"=>"Э","ь"=>"Ь","ы"=>"Ы","ъ"=>"Ъ","щ"=>"Щ", 1104 "ш"=>"Ш","ч"=>"Ч","ц"=>"Ц","х"=>"Х","ф"=>"Ф","у"=>"У","т"=>"Т","с"=>"С","р"=>"Р","п"=>"П", 1105 "о"=>"О","н"=>"Н","м"=>"М","л"=>"Л","к"=>"К","й"=>"Й","и"=>"И","з"=>"З","ж"=>"Ж","е"=>"Е", 1106 "д"=>"Д","г"=>"Г","в"=>"В","б"=>"Б","а"=>"А","ϵ"=>"Ε","ϲ"=>"Σ","ϱ"=>"Ρ","ϰ"=>"Κ","ϯ"=>"Ϯ", 1107 "ϭ"=>"Ϭ","ϫ"=>"Ϫ","ϩ"=>"Ϩ","ϧ"=>"Ϧ","ϥ"=>"Ϥ","ϣ"=>"Ϣ","ϡ"=>"Ϡ","ϟ"=>"Ϟ","ϝ"=>"Ϝ","ϛ"=>"Ϛ", 1108 "ϙ"=>"Ϙ","ϖ"=>"Π","ϕ"=>"Φ","ϑ"=>"Θ","ϐ"=>"Β","ώ"=>"Ώ","ύ"=>"Ύ","ό"=>"Ό","ϋ"=>"Ϋ","ϊ"=>"Ϊ", 1109 "ω"=>"Ω","ψ"=>"Ψ","χ"=>"Χ","φ"=>"Φ","υ"=>"Υ","τ"=>"Τ","σ"=>"Σ","ς"=>"Σ","ρ"=>"Ρ","π"=>"Π", 1110 "ο"=>"Ο","ξ"=>"Ξ","ν"=>"Ν","μ"=>"Μ","λ"=>"Λ","κ"=>"Κ","ι"=>"Ι","θ"=>"Θ","η"=>"Η","ζ"=>"Ζ", 1111 "ε"=>"Ε","δ"=>"Δ","γ"=>"Γ","β"=>"Β","α"=>"Α","ί"=>"Ί","ή"=>"Ή","έ"=>"Έ","ά"=>"Ά","ʒ"=>"Ʒ", 1112 "ʋ"=>"Ʋ","ʊ"=>"Ʊ","ʈ"=>"Ʈ","ʃ"=>"Ʃ","ʀ"=>"Ʀ","ɵ"=>"Ɵ","ɲ"=>"Ɲ","ɯ"=>"Ɯ","ɩ"=>"Ɩ","ɨ"=>"Ɨ", 1113 "ɣ"=>"Ɣ","ɛ"=>"Ɛ","ə"=>"Ə","ɗ"=>"Ɗ","ɖ"=>"Ɖ","ɔ"=>"Ɔ","ɓ"=>"Ɓ","ȳ"=>"Ȳ","ȱ"=>"Ȱ","ȯ"=>"Ȯ", 1114 "ȭ"=>"Ȭ","ȫ"=>"Ȫ","ȩ"=>"Ȩ","ȧ"=>"Ȧ","ȥ"=>"Ȥ","ȣ"=>"Ȣ","ȟ"=>"Ȟ","ȝ"=>"Ȝ","ț"=>"Ț","ș"=>"Ș", 1115 "ȗ"=>"Ȗ","ȕ"=>"Ȕ","ȓ"=>"Ȓ","ȑ"=>"Ȑ","ȏ"=>"Ȏ","ȍ"=>"Ȍ","ȋ"=>"Ȋ","ȉ"=>"Ȉ","ȇ"=>"Ȇ","ȅ"=>"Ȅ", 1116 "ȃ"=>"Ȃ","ȁ"=>"Ȁ","ǿ"=>"Ǿ","ǽ"=>"Ǽ","ǻ"=>"Ǻ","ǹ"=>"Ǹ","ǵ"=>"Ǵ","dz"=>"Dz","ǯ"=>"Ǯ","ǭ"=>"Ǭ", 1117 "ǫ"=>"Ǫ","ǩ"=>"Ǩ","ǧ"=>"Ǧ","ǥ"=>"Ǥ","ǣ"=>"Ǣ","ǡ"=>"Ǡ","ǟ"=>"Ǟ","ǝ"=>"Ǝ","ǜ"=>"Ǜ","ǚ"=>"Ǚ", 1118 "ǘ"=>"Ǘ","ǖ"=>"Ǖ","ǔ"=>"Ǔ","ǒ"=>"Ǒ","ǐ"=>"Ǐ","ǎ"=>"Ǎ","nj"=>"Nj","lj"=>"Lj","dž"=>"Dž","ƿ"=>"Ƿ", 1119 "ƽ"=>"Ƽ","ƹ"=>"Ƹ","ƶ"=>"Ƶ","ƴ"=>"Ƴ","ư"=>"Ư","ƭ"=>"Ƭ","ƨ"=>"Ƨ","ƥ"=>"Ƥ","ƣ"=>"Ƣ","ơ"=>"Ơ", 1120 "ƞ"=>"Ƞ","ƙ"=>"Ƙ","ƕ"=>"Ƕ","ƒ"=>"Ƒ","ƌ"=>"Ƌ","ƈ"=>"Ƈ","ƅ"=>"Ƅ","ƃ"=>"Ƃ","ſ"=>"S","ž"=>"Ž", 1121 "ż"=>"Ż","ź"=>"Ź","ŷ"=>"Ŷ","ŵ"=>"Ŵ","ų"=>"Ų","ű"=>"Ű","ů"=>"Ů","ŭ"=>"Ŭ","ū"=>"Ū","ũ"=>"Ũ", 1122 "ŧ"=>"Ŧ","ť"=>"Ť","ţ"=>"Ţ","š"=>"Š","ş"=>"Ş","ŝ"=>"Ŝ","ś"=>"Ś","ř"=>"Ř","ŗ"=>"Ŗ","ŕ"=>"Ŕ", 1123 "œ"=>"Œ","ő"=>"Ő","ŏ"=>"Ŏ","ō"=>"Ō","ŋ"=>"Ŋ","ň"=>"Ň","ņ"=>"Ņ","ń"=>"Ń","ł"=>"Ł","ŀ"=>"Ŀ", 1124 "ľ"=>"Ľ","ļ"=>"Ļ","ĺ"=>"Ĺ","ķ"=>"Ķ","ĵ"=>"Ĵ","ij"=>"IJ","ı"=>"I","į"=>"Į","ĭ"=>"Ĭ","ī"=>"Ī", 1125 "ĩ"=>"Ĩ","ħ"=>"Ħ","ĥ"=>"Ĥ","ģ"=>"Ģ","ġ"=>"Ġ","ğ"=>"Ğ","ĝ"=>"Ĝ","ě"=>"Ě","ę"=>"Ę","ė"=>"Ė", 1126 "ĕ"=>"Ĕ","ē"=>"Ē","đ"=>"Đ","ď"=>"Ď","č"=>"Č","ċ"=>"Ċ","ĉ"=>"Ĉ","ć"=>"Ć","ą"=>"Ą","ă"=>"Ă", 1127 "ā"=>"Ā","ÿ"=>"Ÿ","þ"=>"Þ","ý"=>"Ý","ü"=>"Ü","û"=>"Û","ú"=>"Ú","ù"=>"Ù","ø"=>"Ø","ö"=>"Ö", 1128 "õ"=>"Õ","ô"=>"Ô","ó"=>"Ó","ò"=>"Ò","ñ"=>"Ñ","ð"=>"Ð","ï"=>"Ï","î"=>"Î","í"=>"Í","ì"=>"Ì", 1129 "ë"=>"Ë","ê"=>"Ê","é"=>"É","è"=>"È","ç"=>"Ç","æ"=>"Æ","å"=>"Å","ä"=>"Ä","ã"=>"Ã","â"=>"Â", 1130 "á"=>"Á","à"=>"À","µ"=>"Μ","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T", 1131 "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J", 1132 "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A" 1133 ); 1134 1135 /** 1136 * UTF-8 Case lookup table 1137 * 1138 * This lookuptable defines the lower case letters to their corresponding 1139 * upper case letter in UTF-8 1140 * 1141 * @author Andreas Gohr <andi@splitbrain.org> 1142 */ 1143 global $UTF8_UPPER_TO_LOWER; 1144 if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array ( 1145 "Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t","S"=>"s","R"=>"r","Q"=>"q", 1146 "P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j","I"=>"i","H"=>"h","G"=>"g", 1147 "F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a","ῼ"=>"ῳ","Ῥ"=>"ῥ","Ῡ"=>"ῡ","Ῑ"=>"ῑ", 1148 "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ", 1149 "ᾫ"=>"ᾣ","ᾪ"=>"ᾢ","ᾩ"=>"ᾡ","ᾟ"=>"ᾗ","ᾞ"=>"ᾖ","ᾝ"=>"ᾕ","ᾜ"=>"ᾔ","ᾛ"=>"ᾓ","ᾚ"=>"ᾒ","ᾙ"=>"ᾑ", 1150 "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ", 1151 "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ", 1152 "Ὲ"=>"ὲ","Ά"=>"ά","Ὰ"=>"ὰ","Ὧ"=>"ὧ","Ὦ"=>"ὦ","Ὥ"=>"ὥ","Ὤ"=>"ὤ","Ὣ"=>"ὣ","Ὢ"=>"ὢ","Ὡ"=>"ὡ", 1153 "Ὗ"=>"ὗ","Ὕ"=>"ὕ","Ὓ"=>"ὓ","Ὑ"=>"ὑ","Ὅ"=>"ὅ","Ὄ"=>"ὄ","Ὃ"=>"ὃ","Ὂ"=>"ὂ","Ὁ"=>"ὁ","Ὀ"=>"ὀ", 1154 "Ἷ"=>"ἷ","Ἶ"=>"ἶ","Ἵ"=>"ἵ","Ἴ"=>"ἴ","Ἳ"=>"ἳ","Ἲ"=>"ἲ","Ἱ"=>"ἱ","Ἰ"=>"ἰ","Ἧ"=>"ἧ","Ἦ"=>"ἦ", 1155 "Ἥ"=>"ἥ","Ἤ"=>"ἤ","Ἣ"=>"ἣ","Ἢ"=>"ἢ","Ἡ"=>"ἡ","Ἕ"=>"ἕ","Ἔ"=>"ἔ","Ἓ"=>"ἓ","Ἒ"=>"ἒ","Ἑ"=>"ἑ", 1156 "Ἐ"=>"ἐ","Ἇ"=>"ἇ","Ἆ"=>"ἆ","Ἅ"=>"ἅ","Ἄ"=>"ἄ","Ἃ"=>"ἃ","Ἂ"=>"ἂ","Ἁ"=>"ἁ","Ἀ"=>"ἀ","Ỹ"=>"ỹ", 1157 "Ỷ"=>"ỷ","Ỵ"=>"ỵ","Ỳ"=>"ỳ","Ự"=>"ự","Ữ"=>"ữ","Ử"=>"ử","Ừ"=>"ừ","Ứ"=>"ứ","Ủ"=>"ủ","Ụ"=>"ụ", 1158 "Ợ"=>"ợ","Ỡ"=>"ỡ","Ở"=>"ở","Ờ"=>"ờ","Ớ"=>"ớ","Ộ"=>"ộ","Ỗ"=>"ỗ","Ổ"=>"ổ","Ồ"=>"ồ","Ố"=>"ố", 1159 "Ỏ"=>"ỏ","Ọ"=>"ọ","Ị"=>"ị","Ỉ"=>"ỉ","Ệ"=>"ệ","Ễ"=>"ễ","Ể"=>"ể","Ề"=>"ề","Ế"=>"ế","Ẽ"=>"ẽ", 1160 "Ẻ"=>"ẻ","Ẹ"=>"ẹ","Ặ"=>"ặ","Ẵ"=>"ẵ","Ẳ"=>"ẳ","Ằ"=>"ằ","Ắ"=>"ắ","Ậ"=>"ậ","Ẫ"=>"ẫ","Ẩ"=>"ẩ", 1161 "Ầ"=>"ầ","Ấ"=>"ấ","Ả"=>"ả","Ạ"=>"ạ","Ṡ"=>"ẛ","Ẕ"=>"ẕ","Ẓ"=>"ẓ","Ẑ"=>"ẑ","Ẏ"=>"ẏ","Ẍ"=>"ẍ", 1162 "Ẋ"=>"ẋ","Ẉ"=>"ẉ","Ẇ"=>"ẇ","Ẅ"=>"ẅ","Ẃ"=>"ẃ","Ẁ"=>"ẁ","Ṿ"=>"ṿ","Ṽ"=>"ṽ","Ṻ"=>"ṻ","Ṹ"=>"ṹ", 1163 "Ṷ"=>"ṷ","Ṵ"=>"ṵ","Ṳ"=>"ṳ","Ṱ"=>"ṱ","Ṯ"=>"ṯ","Ṭ"=>"ṭ","Ṫ"=>"ṫ","Ṩ"=>"ṩ","Ṧ"=>"ṧ","Ṥ"=>"ṥ", 1164 "Ṣ"=>"ṣ","Ṡ"=>"ṡ","Ṟ"=>"ṟ","Ṝ"=>"ṝ","Ṛ"=>"ṛ","Ṙ"=>"ṙ","Ṗ"=>"ṗ","Ṕ"=>"ṕ","Ṓ"=>"ṓ","Ṑ"=>"ṑ", 1165 "Ṏ"=>"ṏ","Ṍ"=>"ṍ","Ṋ"=>"ṋ","Ṉ"=>"ṉ","Ṇ"=>"ṇ","Ṅ"=>"ṅ","Ṃ"=>"ṃ","Ṁ"=>"ṁ","Ḿ"=>"ḿ","Ḽ"=>"ḽ", 1166 "Ḻ"=>"ḻ","Ḹ"=>"ḹ","Ḷ"=>"ḷ","Ḵ"=>"ḵ","Ḳ"=>"ḳ","Ḱ"=>"ḱ","Ḯ"=>"ḯ","Ḭ"=>"ḭ","Ḫ"=>"ḫ","Ḩ"=>"ḩ", 1167 "Ḧ"=>"ḧ","Ḥ"=>"ḥ","Ḣ"=>"ḣ","Ḡ"=>"ḡ","Ḟ"=>"ḟ","Ḝ"=>"ḝ","Ḛ"=>"ḛ","Ḙ"=>"ḙ","Ḗ"=>"ḗ","Ḕ"=>"ḕ", 1168 "Ḓ"=>"ḓ","Ḑ"=>"ḑ","Ḏ"=>"ḏ","Ḍ"=>"ḍ","Ḋ"=>"ḋ","Ḉ"=>"ḉ","Ḇ"=>"ḇ","Ḅ"=>"ḅ","Ḃ"=>"ḃ","Ḁ"=>"ḁ", 1169 "Ֆ"=>"ֆ","Օ"=>"օ","Ք"=>"ք","Փ"=>"փ","Ւ"=>"ւ","Ց"=>"ց","Ր"=>"ր","Տ"=>"տ","Վ"=>"վ","Ս"=>"ս", 1170 "Ռ"=>"ռ","Ջ"=>"ջ","Պ"=>"պ","Չ"=>"չ","Ո"=>"ո","Շ"=>"շ","Ն"=>"ն","Յ"=>"յ","Մ"=>"մ","Ճ"=>"ճ", 1171 "Ղ"=>"ղ","Ձ"=>"ձ","Հ"=>"հ","Կ"=>"կ","Ծ"=>"ծ","Խ"=>"խ","Լ"=>"լ","Ի"=>"ի","Ժ"=>"ժ","Թ"=>"թ", 1172 "Ը"=>"ը","Է"=>"է","Զ"=>"զ","Ե"=>"ե","Դ"=>"դ","Գ"=>"գ","Բ"=>"բ","Ա"=>"ա","Ԏ"=>"ԏ","Ԍ"=>"ԍ", 1173 "Ԋ"=>"ԋ","Ԉ"=>"ԉ","Ԇ"=>"ԇ","Ԅ"=>"ԅ","Ԃ"=>"ԃ","Ԁ"=>"ԁ","Ӹ"=>"ӹ","Ӵ"=>"ӵ","Ӳ"=>"ӳ","Ӱ"=>"ӱ", 1174 "Ӯ"=>"ӯ","Ӭ"=>"ӭ","Ӫ"=>"ӫ","Ө"=>"ө","Ӧ"=>"ӧ","Ӥ"=>"ӥ","Ӣ"=>"ӣ","Ӡ"=>"ӡ","Ӟ"=>"ӟ","Ӝ"=>"ӝ", 1175 "Ӛ"=>"ӛ","Ә"=>"ә","Ӗ"=>"ӗ","Ӕ"=>"ӕ","Ӓ"=>"ӓ","Ӑ"=>"ӑ","Ӎ"=>"ӎ","Ӌ"=>"ӌ","Ӊ"=>"ӊ","Ӈ"=>"ӈ", 1176 "Ӆ"=>"ӆ","Ӄ"=>"ӄ","Ӂ"=>"ӂ","Ҿ"=>"ҿ","Ҽ"=>"ҽ","Һ"=>"һ","Ҹ"=>"ҹ","Ҷ"=>"ҷ","Ҵ"=>"ҵ","Ҳ"=>"ҳ", 1177 "Ұ"=>"ұ","Ү"=>"ү","Ҭ"=>"ҭ","Ҫ"=>"ҫ","Ҩ"=>"ҩ","Ҧ"=>"ҧ","Ҥ"=>"ҥ","Ң"=>"ң","Ҡ"=>"ҡ","Ҟ"=>"ҟ", 1178 "Ҝ"=>"ҝ","Қ"=>"қ","Ҙ"=>"ҙ","Җ"=>"җ","Ҕ"=>"ҕ","Ғ"=>"ғ","Ґ"=>"ґ","Ҏ"=>"ҏ","Ҍ"=>"ҍ","Ҋ"=>"ҋ", 1179 "Ҁ"=>"ҁ","Ѿ"=>"ѿ","Ѽ"=>"ѽ","Ѻ"=>"ѻ","Ѹ"=>"ѹ","Ѷ"=>"ѷ","Ѵ"=>"ѵ","Ѳ"=>"ѳ","Ѱ"=>"ѱ","Ѯ"=>"ѯ", 1180 "Ѭ"=>"ѭ","Ѫ"=>"ѫ","Ѩ"=>"ѩ","Ѧ"=>"ѧ","Ѥ"=>"ѥ","Ѣ"=>"ѣ","Ѡ"=>"ѡ","Џ"=>"џ","Ў"=>"ў","Ѝ"=>"ѝ", 1181 "Ќ"=>"ќ","Ћ"=>"ћ","Њ"=>"њ","Љ"=>"љ","Ј"=>"ј","Ї"=>"ї","І"=>"і","Ѕ"=>"ѕ","Є"=>"є","Ѓ"=>"ѓ", 1182 "Ђ"=>"ђ","Ё"=>"ё","Ѐ"=>"ѐ","Я"=>"я","Ю"=>"ю","Э"=>"э","Ь"=>"ь","Ы"=>"ы","Ъ"=>"ъ","Щ"=>"щ", 1183 "Ш"=>"ш","Ч"=>"ч","Ц"=>"ц","Х"=>"х","Ф"=>"ф","У"=>"у","Т"=>"т","С"=>"с","Р"=>"р","П"=>"п", 1184 "О"=>"о","Н"=>"н","М"=>"м","Л"=>"л","К"=>"к","Й"=>"й","И"=>"и","З"=>"з","Ж"=>"ж","Е"=>"е", 1185 "Д"=>"д","Г"=>"г","В"=>"в","Б"=>"б","А"=>"а","Ε"=>"ϵ","Σ"=>"ϲ","Ρ"=>"ϱ","Κ"=>"ϰ","Ϯ"=>"ϯ", 1186 "Ϭ"=>"ϭ","Ϫ"=>"ϫ","Ϩ"=>"ϩ","Ϧ"=>"ϧ","Ϥ"=>"ϥ","Ϣ"=>"ϣ","Ϡ"=>"ϡ","Ϟ"=>"ϟ","Ϝ"=>"ϝ","Ϛ"=>"ϛ", 1187 "Ϙ"=>"ϙ","Π"=>"ϖ","Φ"=>"ϕ","Θ"=>"ϑ","Β"=>"ϐ","Ώ"=>"ώ","Ύ"=>"ύ","Ό"=>"ό","Ϋ"=>"ϋ","Ϊ"=>"ϊ", 1188 "Ω"=>"ω","Ψ"=>"ψ","Χ"=>"χ","Φ"=>"φ","Υ"=>"υ","Τ"=>"τ","Σ"=>"σ","Σ"=>"ς","Ρ"=>"ρ","Π"=>"π", 1189 "Ο"=>"ο","Ξ"=>"ξ","Ν"=>"ν","Μ"=>"μ","Λ"=>"λ","Κ"=>"κ","Ι"=>"ι","Θ"=>"θ","Η"=>"η","Ζ"=>"ζ", 1190 "Ε"=>"ε","Δ"=>"δ","Γ"=>"γ","Β"=>"β","Α"=>"α","Ί"=>"ί","Ή"=>"ή","Έ"=>"έ","Ά"=>"ά","Ʒ"=>"ʒ", 1191 "Ʋ"=>"ʋ","Ʊ"=>"ʊ","Ʈ"=>"ʈ","Ʃ"=>"ʃ","Ʀ"=>"ʀ","Ɵ"=>"ɵ","Ɲ"=>"ɲ","Ɯ"=>"ɯ","Ɩ"=>"ɩ","Ɨ"=>"ɨ", 1192 "Ɣ"=>"ɣ","Ɛ"=>"ɛ","Ə"=>"ə","Ɗ"=>"ɗ","Ɖ"=>"ɖ","Ɔ"=>"ɔ","Ɓ"=>"ɓ","Ȳ"=>"ȳ","Ȱ"=>"ȱ","Ȯ"=>"ȯ", 1193 "Ȭ"=>"ȭ","Ȫ"=>"ȫ","Ȩ"=>"ȩ","Ȧ"=>"ȧ","Ȥ"=>"ȥ","Ȣ"=>"ȣ","Ȟ"=>"ȟ","Ȝ"=>"ȝ","Ț"=>"ț","Ș"=>"ș", 1194 "Ȗ"=>"ȗ","Ȕ"=>"ȕ","Ȓ"=>"ȓ","Ȑ"=>"ȑ","Ȏ"=>"ȏ","Ȍ"=>"ȍ","Ȋ"=>"ȋ","Ȉ"=>"ȉ","Ȇ"=>"ȇ","Ȅ"=>"ȅ", 1195 "Ȃ"=>"ȃ","Ȁ"=>"ȁ","Ǿ"=>"ǿ","Ǽ"=>"ǽ","Ǻ"=>"ǻ","Ǹ"=>"ǹ","Ǵ"=>"ǵ","Dz"=>"dz","Ǯ"=>"ǯ","Ǭ"=>"ǭ", 1196 "Ǫ"=>"ǫ","Ǩ"=>"ǩ","Ǧ"=>"ǧ","Ǥ"=>"ǥ","Ǣ"=>"ǣ","Ǡ"=>"ǡ","Ǟ"=>"ǟ","Ǝ"=>"ǝ","Ǜ"=>"ǜ","Ǚ"=>"ǚ", 1197 "Ǘ"=>"ǘ","Ǖ"=>"ǖ","Ǔ"=>"ǔ","Ǒ"=>"ǒ","Ǐ"=>"ǐ","Ǎ"=>"ǎ","Nj"=>"nj","Lj"=>"lj","Dž"=>"dž","Ƿ"=>"ƿ", 1198 "Ƽ"=>"ƽ","Ƹ"=>"ƹ","Ƶ"=>"ƶ","Ƴ"=>"ƴ","Ư"=>"ư","Ƭ"=>"ƭ","Ƨ"=>"ƨ","Ƥ"=>"ƥ","Ƣ"=>"ƣ","Ơ"=>"ơ", 1199 "Ƞ"=>"ƞ","Ƙ"=>"ƙ","Ƕ"=>"ƕ","Ƒ"=>"ƒ","Ƌ"=>"ƌ","Ƈ"=>"ƈ","Ƅ"=>"ƅ","Ƃ"=>"ƃ","S"=>"ſ","Ž"=>"ž", 1200 "Ż"=>"ż","Ź"=>"ź","Ŷ"=>"ŷ","Ŵ"=>"ŵ","Ų"=>"ų","Ű"=>"ű","Ů"=>"ů","Ŭ"=>"ŭ","Ū"=>"ū","Ũ"=>"ũ", 1201 "Ŧ"=>"ŧ","Ť"=>"ť","Ţ"=>"ţ","Š"=>"š","Ş"=>"ş","Ŝ"=>"ŝ","Ś"=>"ś","Ř"=>"ř","Ŗ"=>"ŗ","Ŕ"=>"ŕ", 1202 "Œ"=>"œ","Ő"=>"ő","Ŏ"=>"ŏ","Ō"=>"ō","Ŋ"=>"ŋ","Ň"=>"ň","Ņ"=>"ņ","Ń"=>"ń","Ł"=>"ł","Ŀ"=>"ŀ", 1203 "Ľ"=>"ľ","Ļ"=>"ļ","Ĺ"=>"ĺ","Ķ"=>"ķ","Ĵ"=>"ĵ","IJ"=>"ij","I"=>"ı","Į"=>"į","Ĭ"=>"ĭ","Ī"=>"ī", 1204 "Ĩ"=>"ĩ","Ħ"=>"ħ","Ĥ"=>"ĥ","Ģ"=>"ģ","Ġ"=>"ġ","Ğ"=>"ğ","Ĝ"=>"ĝ","Ě"=>"ě","Ę"=>"ę","Ė"=>"ė", 1205 "Ĕ"=>"ĕ","Ē"=>"ē","Đ"=>"đ","Ď"=>"ď","Č"=>"č","Ċ"=>"ċ","Ĉ"=>"ĉ","Ć"=>"ć","Ą"=>"ą","Ă"=>"ă", 1206 "Ā"=>"ā","Ÿ"=>"ÿ","Þ"=>"þ","Ý"=>"ý","Ü"=>"ü","Û"=>"û","Ú"=>"ú","Ù"=>"ù","Ø"=>"ø","Ö"=>"ö", 1207 "Õ"=>"õ","Ô"=>"ô","Ó"=>"ó","Ò"=>"ò","Ñ"=>"ñ","Ð"=>"ð","Ï"=>"ï","Î"=>"î","Í"=>"í","Ì"=>"ì", 1208 "Ë"=>"ë","Ê"=>"ê","É"=>"é","È"=>"è","Ç"=>"ç","Æ"=>"æ","Å"=>"å","Ä"=>"ä","Ã"=>"ã","Â"=>"â", 1209 "Á"=>"á","À"=>"à","Μ"=>"µ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t", 1210 "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j", 1211 "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a" 1212 ); 1213}; // end of case lookup tables 1214 1215/** 1216 * UTF-8 lookup table for lower case accented letters 1217 * 1218 * This lookuptable defines replacements for accented characters from the ASCII-7 1219 * range. This are lower case letters only. 1220 * 1221 * @author Andreas Gohr <andi@splitbrain.org> 1222 * @see utf8_deaccent() 1223 */ 1224global $UTF8_LOWER_ACCENTS; 1225if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array( 1226 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 1227 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 1228 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 1229 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 1230 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 1231 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 1232 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 1233 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 1234 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 1235 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 1236 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 1237 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 1238 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 1239 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 1240 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', 1241); 1242 1243/** 1244 * UTF-8 lookup table for upper case accented letters 1245 * 1246 * This lookuptable defines replacements for accented characters from the ASCII-7 1247 * range. This are upper case letters only. 1248 * 1249 * @author Andreas Gohr <andi@splitbrain.org> 1250 * @see utf8_deaccent() 1251 */ 1252global $UTF8_UPPER_ACCENTS; 1253if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array( 1254 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', 1255 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 1256 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', 1257 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', 1258 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', 1259 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', 1260 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', 1261 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', 1262 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 1263 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', 1264 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', 1265 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 1266 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 1267 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', 1268 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', 1269); 1270 1271/** 1272 * UTF-8 array of common special characters 1273 * 1274 * This array should contain all special characters (not a letter or digit) 1275 * defined in the various local charsets - it's not a complete list of non-alphanum 1276 * characters in UTF-8. It's not perfect but should match most cases of special 1277 * chars. 1278 * 1279 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 1280 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 1281 * 1282 * @author Andreas Gohr <andi@splitbrain.org> 1283 * @see utf8_stripspecials() 1284 */ 1285global $UTF8_SPECIAL_CHARS; 1286if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array( 1287 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 1288 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 1289 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 1290 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 1291 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 1292 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 1293 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 1294 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 1295 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 1296 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 1297 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 1298 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 1299 0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 1300 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 1301 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 1302 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 1303 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 1304 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 1305 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 1306 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 1307 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 1308 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 1309 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 1310 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 1311 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 1312 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 1313 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 1314 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 1315 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 1316 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 1317 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 1318 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 1319 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 1320 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 1321 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 1322 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 1323 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 1324 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 1325 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 1326 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 1327 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 1328 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 1329 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 1330 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 1331 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 1332 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 1333 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017, 1334 0x3018, 0x3019, 0x301a, 0x301b, 0x3036, 1335 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 1336 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 1337 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 1338 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 1339 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 1340 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 1341 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c, 1342 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b, 1343 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 1344 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea, 1345 0xffeb, 0xffec, 0xffed, 0xffee, 1346 0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703, 1347 0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b, 1348 0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713, 1349 0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b, 1350 0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf, 1351); 1352 1353// utf8 version of above data 1354global $UTF8_SPECIAL_CHARS2; 1355if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 = 1356 "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~ �'. 1357 '� ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½�'. 1358 '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. 1359 '�ׁׂ׃׳״،؛؟ـًٌٍَُِّْ٪฿–—―‗‘’‚“”�'. 1360 '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'. 1361 '⇐⇑⇒⇓⇔∀∂∃∅∆∇∈∉∋∏∑−∕∗∙√∝∞∠∧∨�'. 1362 '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'. 1363 '��┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠'. 1364 '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌▐░▒▓■▲▼◆◊●�'. 1365 '�★☎☛☞♠♣♥♦✁✂✃✄✆✇✈✉✌✍✎✏✐✑✒✓✔✕�'. 1366 '��✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✩✪✫✬✭✮✯✰✱'. 1367 '✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋�'. 1368 '�❏❐❑❒❖❘❙❚❛❜❝❞❡❢❣❤❥❦❧❿➉➓➔➘➙➚�'. 1369 '��➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶'. 1370 '➷➸➹➺➻➼➽➾'. 1371 ' 、。〃〈〉《》「」『』【】〒〔〕〖〗〘〙〚〛〶'. 1372 '�'. 1373 '�ﹼﹽ'. 1374 '!"#$%&'()*+,-./:;<=>?@[\]^`{|}~'. 1375 '⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○'. 1376 ''. 1377 ' '; 1378 1379/** 1380 * Romanization lookup table 1381 * 1382 * This lookup tables provides a way to transform strings written in a language 1383 * different from the ones based upon latin letters into plain ASCII. 1384 * 1385 * Please note: this is not a scientific transliteration table. It only works 1386 * oneway from nonlatin to ASCII and it works by simple character replacement 1387 * only. Specialities of each language are not supported. 1388 * 1389 * @author Andreas Gohr <andi@splitbrain.org> 1390 * @author Vitaly Blokhin <vitinfo@vitn.com> 1391 * @link http://www.uconv.com/translit.htm 1392 * @author Bisqwit <bisqwit@iki.fi> 1393 * @link http://kanjidict.stc.cx/hiragana.php?src=2 1394 * @link http://www.translatum.gr/converter/greek-transliteration.htm 1395 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 1396 * @link http://www.btranslations.com/resources/romanization/korean.asp 1397 * @author Arthit Suriyawongkul <arthit@gmail.com> 1398 * @author Denis Scheither <amorphis@uni-bremen.de> 1399 * @author Eivind Morland <eivind.morland@gmail.com> 1400 */ 1401global $UTF8_ROMANIZATION; 1402if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array( 1403 // scandinavian - differs from what we do in deaccent 1404 'å'=>'a','Å'=>'A','ä'=>'a','Ä'=>'A','ö'=>'o','Ö'=>'O', 1405 1406 //russian cyrillic 1407 'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G', 1408 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh', 1409 'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K', 1410 'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Н'=>'N','о'=>'o','О'=>'O', 1411 'п'=>'p','П'=>'P','р'=>'r','Р'=>'R','с'=>'s','С'=>'S','т'=>'t','Т'=>'T', 1412 'у'=>'u','У'=>'U','ф'=>'f','Ф'=>'F','х'=>'x','Х'=>'X','ц'=>'c','Ц'=>'C', 1413 'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'sch','Щ'=>'Sch','ъ'=>'', 1414 'Ъ'=>'','ы'=>'y','Ы'=>'Y','ь'=>'','Ь'=>'','э'=>'eh','Э'=>'Eh','ю'=>'ju', 1415 'Ю'=>'Ju','я'=>'ja','Я'=>'Ja', 1416 // Ukrainian cyrillic 1417 'Ґ'=>'Gh','ґ'=>'gh','Є'=>'Je','є'=>'je','І'=>'I','і'=>'i','Ї'=>'Ji','ї'=>'ji', 1418 // Georgian 1419 'ა'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th', 1420 'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','ო'=>'o','პ'=>'p','ჟ'=>'zh', 1421 'რ'=>'r','ს'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ქ'=>'kh','ღ'=>'gh','ყ'=>'q', 1422 'შ'=>'sh','ჩ'=>'ch','ც'=>'c','ძ'=>'dh','წ'=>'w','ჭ'=>'j','ხ'=>'x','ჯ'=>'jh', 1423 'ჰ'=>'xh', 1424 //Sanskrit 1425 'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry', 1426 'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','ए'=>'e','ऐ'=>'ay','ओ'=>'o','औ'=>'aw', 1427 'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh', 1428 'च'=>'c','छ'=>'ch','ज'=>'j','झ'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh', 1429 'ड'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','थ'=>'th','द'=>'d','ध'=>'dh', 1430 'न'=>'n','प'=>'p','फ'=>'ph','ब'=>'b','भ'=>'bh','म'=>'m','य'=>'z','र'=>'r', 1431 'ल'=>'l','व'=>'v','श'=>'sh','ष'=>'sqh','स'=>'s','ह'=>'x', 1432 //Sanskrit diacritics 1433 'Ā'=>'A','Ī'=>'I','Ū'=>'U','Ṛ'=>'R','Ṝ'=>'R','Ṅ'=>'N','Ñ'=>'N','Ṭ'=>'T', 1434 'Ḍ'=>'D','Ṇ'=>'N','Ś'=>'S','Ṣ'=>'S','Ṁ'=>'M','Ṃ'=>'M','Ḥ'=>'H','Ḷ'=>'L','Ḹ'=>'L', 1435 'ā'=>'a','ī'=>'i','ū'=>'u','ṛ'=>'r','ṝ'=>'r','ṅ'=>'n','ñ'=>'n','ṭ'=>'t', 1436 'ḍ'=>'d','ṇ'=>'n','ś'=>'s','ṣ'=>'s','ṁ'=>'m','ṃ'=>'m','ḥ'=>'h','ḷ'=>'l','ḹ'=>'l', 1437 //Hebrew 1438 'א'=>'a', 'ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'kh','ט'=>'th', 1439 'י'=>'y','ך'=>'h','כ'=>'k','ל'=>'l','ם'=>'m','מ'=>'m','ן'=>'n','נ'=>'n', 1440 'ס'=>'s','ע'=>'ah','ף'=>'f','פ'=>'p','ץ'=>'c','צ'=>'c','ק'=>'q','ר'=>'r', 1441 'ש'=>'sh','ת'=>'t', 1442 //Arabic 1443 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'g','ح'=>'xh','خ'=>'x','د'=>'d', 1444 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s\'','ض'=>'d\'', 1445 'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k', 1446 'ل'=>'l','م'=>'m','ن'=>'n','ه'=>'x\'','و'=>'u','ي'=>'i', 1447 1448 // Japanese characters (last update: 2008-05-09) 1449 1450 // Japanese hiragana 1451 1452 // 3 character syllables, っ doubles the consonant after 1453 'っちゃ'=>'ccha','っちぇ'=>'cche','っちょ'=>'ccho','っちゅ'=>'cchu', 1454 'っびゃ'=>'bbya','っびぇ'=>'bbye','っびぃ'=>'bbyi','っびょ'=>'bbyo','っびゅ'=>'bbyu', 1455 'っぴゃ'=>'ppya','っぴぇ'=>'ppye','っぴぃ'=>'ppyi','っぴょ'=>'ppyo','っぴゅ'=>'ppyu', 1456 'っちゃ'=>'ccha','っちぇ'=>'cche','っち'=>'cchi','っちょ'=>'ccho','っちゅ'=>'cchu', 1457 // 'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu', 1458 'っきゃ'=>'kkya','っきぇ'=>'kkye','っきぃ'=>'kkyi','っきょ'=>'kkyo','っきゅ'=>'kkyu', 1459 'っぎゃ'=>'ggya','っぎぇ'=>'ggye','っぎぃ'=>'ggyi','っぎょ'=>'ggyo','っぎゅ'=>'ggyu', 1460 'っみゃ'=>'mmya','っみぇ'=>'mmye','っみぃ'=>'mmyi','っみょ'=>'mmyo','っみゅ'=>'mmyu', 1461 'っにゃ'=>'nnya','っにぇ'=>'nnye','っにぃ'=>'nnyi','っにょ'=>'nnyo','っにゅ'=>'nnyu', 1462 'っりゃ'=>'rrya','っりぇ'=>'rrye','っりぃ'=>'rryi','っりょ'=>'rryo','っりゅ'=>'rryu', 1463 'っしゃ'=>'ssha','っしぇ'=>'sshe','っし'=>'sshi','っしょ'=>'ssho','っしゅ'=>'sshu', 1464 1465 // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway) 1466 'んあ'=>'n_a','んえ'=>'n_e','んい'=>'n_i','んお'=>'n_o','んう'=>'n_u', 1467 'んや'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu', 1468 1469 // 2 character syllables - normal 1470 'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo', 1471 'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu', 1472 'ひゃ'=>'hya','ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu', 1473 'びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu', 1474 'ぴゃ'=>'pya','ぴぇ'=>'pye','ぴぃ'=>'pyi','ぴょ'=>'pyo','ぴゅ'=>'pyu', 1475 'きゃ'=>'kya','きぇ'=>'kye','きぃ'=>'kyi','きょ'=>'kyo','きゅ'=>'kyu', 1476 'ぎゃ'=>'gya','ぎぇ'=>'gye','ぎぃ'=>'gyi','ぎょ'=>'gyo','ぎゅ'=>'gyu', 1477 'みゃ'=>'mya','みぇ'=>'mye','みぃ'=>'myi','みょ'=>'myo','みゅ'=>'myu', 1478 'にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu', 1479 'りゃ'=>'rya','りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu', 1480 'しゃ'=>'sha','しぇ'=>'she','し'=>'shi','しょ'=>'sho','しゅ'=>'shu', 1481 'じゃ'=>'ja','じぇ'=>'je','じょ'=>'jo','じゅ'=>'ju', 1482 'うぇ'=>'we','うぃ'=>'wi', 1483 'いぇ'=>'ye', 1484 1485 // 2 character syllables, っ doubles the consonant after 1486 'っば'=>'bba','っべ'=>'bbe','っび'=>'bbi','っぼ'=>'bbo','っぶ'=>'bbu', 1487 'っぱ'=>'ppa','っぺ'=>'ppe','っぴ'=>'ppi','っぽ'=>'ppo','っぷ'=>'ppu', 1488 'った'=>'tta','って'=>'tte','っち'=>'cchi','っと'=>'tto','っつ'=>'ttsu', 1489 'っだ'=>'dda','っで'=>'dde','っぢ'=>'ddi','っど'=>'ddo','っづ'=>'ddu', 1490 'っが'=>'gga','っげ'=>'gge','っぎ'=>'ggi','っご'=>'ggo','っぐ'=>'ggu', 1491 'っか'=>'kka','っけ'=>'kke','っき'=>'kki','っこ'=>'kko','っく'=>'kku', 1492 'っま'=>'mma','っめ'=>'mme','っみ'=>'mmi','っも'=>'mmo','っむ'=>'mmu', 1493 'っな'=>'nna','っね'=>'nne','っに'=>'nni','っの'=>'nno','っぬ'=>'nnu', 1494 'っら'=>'rra','っれ'=>'rre','っり'=>'rri','っろ'=>'rro','っる'=>'rru', 1495 'っさ'=>'ssa','っせ'=>'sse','っし'=>'sshi','っそ'=>'sso','っす'=>'ssu', 1496 'っざ'=>'zza','っぜ'=>'zze','っじ'=>'jji','っぞ'=>'zzo','っず'=>'zzu', 1497 1498 // 1 character syllabels 1499 'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ん'=>'n', 1500 'は'=>'ha','へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'fu', 1501 'ば'=>'ba','べ'=>'be','び'=>'bi','ぼ'=>'bo','ぶ'=>'bu', 1502 'ぱ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po','ぷ'=>'pu', 1503 'た'=>'ta','て'=>'te','ち'=>'chi','と'=>'to','つ'=>'tsu', 1504 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1505 'が'=>'ga','げ'=>'ge','ぎ'=>'gi','ご'=>'go','ぐ'=>'gu', 1506 'か'=>'ka','け'=>'ke','き'=>'ki','こ'=>'ko','く'=>'ku', 1507 'ま'=>'ma','め'=>'me','み'=>'mi','も'=>'mo','む'=>'mu', 1508 'な'=>'na','ね'=>'ne','に'=>'ni','の'=>'no','ぬ'=>'nu', 1509 'ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru', 1510 'さ'=>'sa','せ'=>'se','し'=>'shi','そ'=>'so','す'=>'su', 1511 'わ'=>'wa','を'=>'wo', 1512 'ざ'=>'za','ぜ'=>'ze','じ'=>'ji','ぞ'=>'zo','ず'=>'zu', 1513 'や'=>'ya','よ'=>'yo','ゆ'=>'yu', 1514 // old characters 1515 'ゑ'=>'we','ゐ'=>'wi', 1516 1517 // convert what's left (probably only kicks in when something's missing above) 1518 // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u', 1519 // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu', 1520 1521 // never seen one of those (disabled for the moment) 1522 // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu', 1523 // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu', 1524 // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu', 1525 // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu', 1526 // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu', 1527 // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu', 1528 // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu', 1529 // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu', 1530 // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu', 1531 // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu', 1532 // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu', 1533 // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu', 1534 // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu', 1535 // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu', 1536 1537 // 'spare' characters from other romanization systems 1538 // 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1539 // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu', 1540 // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su', 1541 // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu', 1542 //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu', 1543 //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu', 1544 //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu', 1545 //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu', 1546 //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi', 1547 //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju', 1548 1549 1550 // Japanese katakana 1551 1552 // 4 character syllables: ッ doubles the consonant after, ー doubles the vowel before (usualy written with macron, but we don't want that in our URLs) 1553 'ッビャー'=>'bbyaa','ッビェー'=>'bbyee','ッビィー'=>'bbyii','ッビョー'=>'bbyoo','ッビュー'=>'bbyuu', 1554 'ッピャー'=>'ppyaa','ッピェー'=>'ppyee','ッピィー'=>'ppyii','ッピョー'=>'ppyoo','ッピュー'=>'ppyuu', 1555 'ッキャー'=>'kkyaa','ッキェー'=>'kkyee','ッキィー'=>'kkyii','ッキョー'=>'kkyoo','ッキュー'=>'kkyuu', 1556 'ッギャー'=>'ggyaa','ッギェー'=>'ggyee','ッギィー'=>'ggyii','ッギョー'=>'ggyoo','ッギュー'=>'ggyuu', 1557 'ッミャー'=>'mmyaa','ッミェー'=>'mmyee','ッミィー'=>'mmyii','ッミョー'=>'mmyoo','ッミュー'=>'mmyuu', 1558 'ッニャー'=>'nnyaa','ッニェー'=>'nnyee','ッニィー'=>'nnyii','ッニョー'=>'nnyoo','ッニュー'=>'nnyuu', 1559 'ッリャー'=>'rryaa','ッリェー'=>'rryee','ッリィー'=>'rryii','ッリョー'=>'rryoo','ッリュー'=>'rryuu', 1560 'ッシャー'=>'sshaa','ッシェー'=>'sshee','ッシー'=>'sshii','ッショー'=>'sshoo','ッシュー'=>'sshuu', 1561 'ッチャー'=>'cchaa','ッチェー'=>'cchee','ッチー'=>'cchii','ッチョー'=>'cchoo','ッチュー'=>'cchuu', 1562 'ッティー'=>'ttii', 1563 'ッヂィー'=>'ddii', 1564 1565 // 3 character syllables - doubled vowels 1566 'ファー'=>'faa','フェー'=>'fee','フィー'=>'fii','フォー'=>'foo', 1567 'フャー'=>'fyaa','フェー'=>'fyee','フィー'=>'fyii','フョー'=>'fyoo','フュー'=>'fyuu', 1568 'ヒャー'=>'hyaa','ヒェー'=>'hyee','ヒィー'=>'hyii','ヒョー'=>'hyoo','ヒュー'=>'hyuu', 1569 'ビャー'=>'byaa','ビェー'=>'byee','ビィー'=>'byii','ビョー'=>'byoo','ビュー'=>'byuu', 1570 'ピャー'=>'pyaa','ピェー'=>'pyee','ピィー'=>'pyii','ピョー'=>'pyoo','ピュー'=>'pyuu', 1571 'キャー'=>'kyaa','キェー'=>'kyee','キィー'=>'kyii','キョー'=>'kyoo','キュー'=>'kyuu', 1572 'ギャー'=>'gyaa','ギェー'=>'gyee','ギィー'=>'gyii','ギョー'=>'gyoo','ギュー'=>'gyuu', 1573 'ミャー'=>'myaa','ミェー'=>'myee','ミィー'=>'myii','ミョー'=>'myoo','ミュー'=>'myuu', 1574 'ニャー'=>'nyaa','ニェー'=>'nyee','ニィー'=>'nyii','ニョー'=>'nyoo','ニュー'=>'nyuu', 1575 'リャー'=>'ryaa','リェー'=>'ryee','リィー'=>'ryii','リョー'=>'ryoo','リュー'=>'ryuu', 1576 'シャー'=>'shaa','シェー'=>'shee','シー'=>'shii','ショー'=>'shoo','シュー'=>'shuu', 1577 'ジャー'=>'jaa','ジェー'=>'jee','ジー'=>'jii','ジョー'=>'joo','ジュー'=>'juu', 1578 'スァー'=>'swaa','スェー'=>'swee','スィー'=>'swii','スォー'=>'swoo','スゥー'=>'swuu', 1579 'デァー'=>'daa','デェー'=>'dee','ディー'=>'dii','デォー'=>'doo','デゥー'=>'duu', 1580 'チャー'=>'chaa','チェー'=>'chee','チー'=>'chii','チョー'=>'choo','チュー'=>'chuu', 1581 'ヂャー'=>'dyaa','ヂェー'=>'dyee','ヂィー'=>'dyii','ヂョー'=>'dyoo','ヂュー'=>'dyuu', 1582 'ツャー'=>'tsaa','ツェー'=>'tsee','ツィー'=>'tsii','ツョー'=>'tsoo','ツー'=>'tsuu', 1583 'トァー'=>'twaa','トェー'=>'twee','トィー'=>'twii','トォー'=>'twoo','トゥー'=>'twuu', 1584 'ドァー'=>'dwaa','ドェー'=>'dwee','ドィー'=>'dwii','ドォー'=>'dwoo','ドゥー'=>'dwuu', 1585 'ウァー'=>'whaa','ウェー'=>'whee','ウィー'=>'whii','ウォー'=>'whoo','ウゥー'=>'whuu', 1586 'ヴャー'=>'vyaa','ヴェー'=>'vyee','ヴィー'=>'vyii','ヴョー'=>'vyoo','ヴュー'=>'vyuu', 1587 'ヴァー'=>'vaa','ヴェー'=>'vee','ヴィー'=>'vii','ヴォー'=>'voo','ヴー'=>'vuu', 1588 'ウェー'=>'wee','ウィー'=>'wii', 1589 'イェー'=>'yee', 1590 'ティー'=>'tii', 1591 'ヂィー'=>'dii', 1592 1593 // 3 character syllables - doubled consonants 1594 'ッビャ'=>'bbya','ッビェ'=>'bbye','ッビィ'=>'bbyi','ッビョ'=>'bbyo','ッビュ'=>'bbyu', 1595 'ッピャ'=>'ppya','ッピェ'=>'ppye','ッピィ'=>'ppyi','ッピョ'=>'ppyo','ッピュ'=>'ppyu', 1596 'ッキャ'=>'kkya','ッキェ'=>'kkye','ッキィ'=>'kkyi','ッキョ'=>'kkyo','ッキュ'=>'kkyu', 1597 'ッギャ'=>'ggya','ッギェ'=>'ggye','ッギィ'=>'ggyi','ッギョ'=>'ggyo','ッギュ'=>'ggyu', 1598 'ッミャ'=>'mmya','ッミェ'=>'mmye','ッミィ'=>'mmyi','ッミョ'=>'mmyo','ッミュ'=>'mmyu', 1599 'ッニャ'=>'nnya','ッニェ'=>'nnye','ッニィ'=>'nnyi','ッニョ'=>'nnyo','ッニュ'=>'nnyu', 1600 'ッリャ'=>'rrya','ッリェ'=>'rrye','ッリィ'=>'rryi','ッリョ'=>'rryo','ッリュ'=>'rryu', 1601 'ッシャ'=>'ssha','ッシェ'=>'sshe','ッシ'=>'sshi','ッショ'=>'ssho','ッシュ'=>'sshu', 1602 'ッチャ'=>'ccha','ッチェ'=>'cche','ッチ'=>'cchi','ッチョ'=>'ccho','ッチュ'=>'cchu', 1603 'ッティ'=>'tti', 1604 'ッヂィ'=>'ddi', 1605 1606 // 3 character syllables - doubled vowel and consonants 1607 'ッバー'=>'bbaa','ッベー'=>'bbee','ッビー'=>'bbii','ッボー'=>'bboo','ッブー'=>'bbuu', 1608 'ッパー'=>'ppaa','ッペー'=>'ppee','ッピー'=>'ppii','ッポー'=>'ppoo','ップー'=>'ppuu', 1609 'ッケー'=>'kkee','ッキー'=>'kkii','ッコー'=>'kkoo','ックー'=>'kkuu','ッカー'=>'kkaa', 1610 'ッガー'=>'ggaa','ッゲー'=>'ggee','ッギー'=>'ggii','ッゴー'=>'ggoo','ッグー'=>'gguu', 1611 'ッマー'=>'maa','ッメー'=>'mee','ッミー'=>'mii','ッモー'=>'moo','ッムー'=>'muu', 1612 'ッナー'=>'nnaa','ッネー'=>'nnee','ッニー'=>'nnii','ッノー'=>'nnoo','ッヌー'=>'nnuu', 1613 'ッラー'=>'rraa','ッレー'=>'rree','ッリー'=>'rrii','ッロー'=>'rroo','ッルー'=>'rruu', 1614 'ッサー'=>'ssaa','ッセー'=>'ssee','ッシー'=>'sshii','ッソー'=>'ssoo','ッスー'=>'ssuu', 1615 'ッザー'=>'zzaa','ッゼー'=>'zzee','ッジー'=>'jjii','ッゾー'=>'zzoo','ッズー'=>'zzuu', 1616 'ッター'=>'ttaa','ッテー'=>'ttee','ッチー'=>'chii','ットー'=>'ttoo','ッツー'=>'ttsuu', 1617 'ッダー'=>'ddaa','ッデー'=>'ddee','ッヂー'=>'ddii','ッドー'=>'ddoo','ッヅー'=>'dduu', 1618 1619 // 2 character syllables - normal 1620 'ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フゥ'=>'fu', 1621 // 'フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo','フュ'=>'fyu', 1622 'フャ'=>'fa','フェ'=>'fe','フィ'=>'fi','フョ'=>'fo','フュ'=>'fu', 1623 'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu', 1624 'ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo','ビュ'=>'byu', 1625 'ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu', 1626 'キャ'=>'kya','キェ'=>'kye','キィ'=>'kyi','キョ'=>'kyo','キュ'=>'kyu', 1627 'ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu', 1628 'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu', 1629 'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu', 1630 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu', 1631 'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu', 1632 'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju', 1633 'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu', 1634 'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du', 1635 'チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu', 1636 // 'ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi','ヂョ'=>'dyo','ヂュ'=>'dyu', 1637 'ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu', 1638 'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu', 1639 'ドァ'=>'dwa','ドェ'=>'dwe','ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu', 1640 'ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi','ウォ'=>'who','ウゥ'=>'whu', 1641 'ヴャ'=>'vya','ヴェ'=>'vye','ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu', 1642 'ヴァ'=>'va','ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu', 1643 'ウェ'=>'we','ウィ'=>'wi', 1644 'イェ'=>'ye', 1645 'ティ'=>'ti', 1646 'ヂィ'=>'di', 1647 1648 // 2 character syllables - doubled vocal 1649 'アー'=>'aa','エー'=>'ee','イー'=>'ii','オー'=>'oo','ウー'=>'uu', 1650 'ダー'=>'daa','デー'=>'dee','ヂー'=>'dii','ドー'=>'doo','ヅー'=>'duu', 1651 'ハー'=>'haa','ヘー'=>'hee','ヒー'=>'hii','ホー'=>'hoo','フー'=>'fuu', 1652 'バー'=>'baa','ベー'=>'bee','ビー'=>'bii','ボー'=>'boo','ブー'=>'buu', 1653 'パー'=>'paa','ペー'=>'pee','ピー'=>'pii','ポー'=>'poo','プー'=>'puu', 1654 'ケー'=>'kee','キー'=>'kii','コー'=>'koo','クー'=>'kuu','カー'=>'kaa', 1655 'ガー'=>'gaa','ゲー'=>'gee','ギー'=>'gii','ゴー'=>'goo','グー'=>'guu', 1656 'マー'=>'maa','メー'=>'mee','ミー'=>'mii','モー'=>'moo','ムー'=>'muu', 1657 'ナー'=>'naa','ネー'=>'nee','ニー'=>'nii','ノー'=>'noo','ヌー'=>'nuu', 1658 'ラー'=>'raa','レー'=>'ree','リー'=>'rii','ロー'=>'roo','ルー'=>'ruu', 1659 'サー'=>'saa','セー'=>'see','シー'=>'shii','ソー'=>'soo','スー'=>'suu', 1660 'ザー'=>'zaa','ゼー'=>'zee','ジー'=>'jii','ゾー'=>'zoo','ズー'=>'zuu', 1661 'ター'=>'taa','テー'=>'tee','チー'=>'chii','トー'=>'too','ツー'=>'tsuu', 1662 'ワー'=>'waa','ヲー'=>'woo', 1663 'ヤー'=>'yaa','ヨー'=>'yoo','ユー'=>'yuu', 1664 'ヵー'=>'kaa','ヶー'=>'kee', 1665 // old characters 1666 'ヱー'=>'wee','ヰー'=>'wii', 1667 1668 // seperate katakana 'n' 1669 'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u', 1670 'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンユ'=>'n_yu', 1671 1672 // 2 character syllables - doubled consonants 1673 'ッバ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu', 1674 'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッポ'=>'ppo','ップ'=>'ppu', 1675 'ッケ'=>'kke','ッキ'=>'kki','ッコ'=>'kko','ック'=>'kku','ッカ'=>'kka', 1676 'ッガ'=>'gga','ッゲ'=>'gge','ッギ'=>'ggi','ッゴ'=>'ggo','ッグ'=>'ggu', 1677 'ッマ'=>'ma','ッメ'=>'me','ッミ'=>'mi','ッモ'=>'mo','ッム'=>'mu', 1678 'ッナ'=>'nna','ッネ'=>'nne','ッニ'=>'nni','ッノ'=>'nno','ッヌ'=>'nnu', 1679 'ッラ'=>'rra','ッレ'=>'rre','ッリ'=>'rri','ッロ'=>'rro','ッル'=>'rru', 1680 'ッサ'=>'ssa','ッセ'=>'sse','ッシ'=>'sshi','ッソ'=>'sso','ッス'=>'ssu', 1681 'ッザ'=>'zza','ッゼ'=>'zze','ッジ'=>'jji','ッゾ'=>'zzo','ッズ'=>'zzu', 1682 'ッタ'=>'tta','ッテ'=>'tte','ッチ'=>'cchi','ット'=>'tto','ッツ'=>'ttsu', 1683 'ッダ'=>'dda','ッデ'=>'dde','ッヂ'=>'ddi','ッド'=>'ddo','ッヅ'=>'ddu', 1684 1685 // 1 character syllables 1686 'ア'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','ン'=>'n', 1687 'ハ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho','フ'=>'fu', 1688 'バ'=>'ba','ベ'=>'be','ビ'=>'bi','ボ'=>'bo','ブ'=>'bu', 1689 'パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ポ'=>'po','プ'=>'pu', 1690 'ケ'=>'ke','キ'=>'ki','コ'=>'ko','ク'=>'ku','カ'=>'ka', 1691 'ガ'=>'ga','ゲ'=>'ge','ギ'=>'gi','ゴ'=>'go','グ'=>'gu', 1692 'マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu', 1693 'ナ'=>'na','ネ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu', 1694 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ロ'=>'ro','ル'=>'ru', 1695 'サ'=>'sa','セ'=>'se','シ'=>'shi','ソ'=>'so','ス'=>'su', 1696 'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu', 1697 'タ'=>'ta','テ'=>'te','チ'=>'chi','ト'=>'to','ツ'=>'tsu', 1698 'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du', 1699 'ワ'=>'wa','ヲ'=>'wo', 1700 'ヤ'=>'ya','ヨ'=>'yo','ユ'=>'yu', 1701 'ヵ'=>'ka','ヶ'=>'ke', 1702 // old characters 1703 'ヱ'=>'we','ヰ'=>'wi', 1704 1705 // convert what's left (probably only kicks in when something's missing above) 1706 'ァ'=>'a','ェ'=>'e','ィ'=>'i','ォ'=>'o','ゥ'=>'u', 1707 'ャ'=>'ya','ョ'=>'yo','ュ'=>'yu', 1708 1709 // special characters 1710 '・'=>'_','、'=>'_', 1711 'ー'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise 1712 1713 // 'ラ'=>'la','レ'=>'le','リ'=>'li','ロ'=>'lo','ル'=>'lu', 1714 // 'チャ'=>'cya','チェ'=>'cye','チィ'=>'cyi','チョ'=>'cyo','チュ'=>'cyu', 1715 //'デャ'=>'dha','デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu', 1716 // 'リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi','リョ'=>'lyo','リュ'=>'lyu', 1717 // 'テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho','テュ'=>'thu', 1718 //'ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi','フォ'=>'fwo','フゥ'=>'fwu', 1719 //'チャ'=>'tya','チェ'=>'tye','チィ'=>'tyi','チョ'=>'tyo','チュ'=>'tyu', 1720 // 'ジャ'=>'jya','ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu', 1721 // 'ジャ'=>'zha','ジェ'=>'zhe','ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu', 1722 //'ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi','ジョ'=>'zyo','ジュ'=>'zyu', 1723 //'シャ'=>'sya','シェ'=>'sye','シィ'=>'syi','ショ'=>'syo','シュ'=>'syu', 1724 //'シ'=>'ci','フ'=>'hu',シ'=>'si','チ'=>'ti','ツ'=>'tu','イ'=>'yi','ヂ'=>'dzi', 1725 1726 // "Greeklish" 1727 'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Π'=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps', 1728 'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','π'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps', 1729 1730 // Thai 1731 'ก'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch', 1732 'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','ญ'=>'y','ฎ'=>'d','ฏ'=>'t','ฐ'=>'th', 1733 'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th', 1734 'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','ฝ'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph', 1735 'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue', 1736 'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h', 1737 'ะ'=>'a','ั'=>'a','รร'=>'a','า'=>'a','ๅ'=>'a','ำ'=>'am','ํา'=>'am', 1738 'ิ'=>'i','ี'=>'i','ึ'=>'ue','ี'=>'ue','ุ'=>'u','ู'=>'u', 1739 'เ'=>'e','แ'=>'ae','โ'=>'o','อ'=>'o', 1740 'ียะ'=>'ia','ีย'=>'ia','ือะ'=>'uea','ือ'=>'uea','ัวะ'=>'ua','ัว'=>'ua', 1741 'ใ'=>'ai','ไ'=>'ai','ัย'=>'ai','าย'=>'ai','าว'=>'ao', 1742 'ุย'=>'ui','อย'=>'oi','ือย'=>'ueai','วย'=>'uai', 1743 'ิว'=>'io','็ว'=>'eo','ียว'=>'iao', 1744 '่'=>'','้'=>'','๊'=>'','๋'=>'','็'=>'', 1745 '์'=>'','๎'=>'','ํ'=>'','ฺ'=>'', 1746 'ๆ'=>'2','๏'=>'o','ฯ'=>'-','๚'=>'-','๛'=>'-', 1747 '๐'=>'0','๑'=>'1','๒'=>'2','๓'=>'3','๔'=>'4', 1748 '๕'=>'5','๖'=>'6','๗'=>'7','๘'=>'8','๙'=>'9', 1749 1750 // Korean 1751 'ㄱ'=>'k','ㅋ'=>'kh','ㄲ'=>'kk','ㄷ'=>'t','ㅌ'=>'th','ㄸ'=>'tt','ㅂ'=>'p', 1752 'ㅍ'=>'ph','ㅃ'=>'pp','ㅈ'=>'c','ㅊ'=>'ch','ㅉ'=>'cc','ㅅ'=>'s','ㅆ'=>'ss', 1753 'ㅎ'=>'h','ㅇ'=>'ng','ㄴ'=>'n','ㄹ'=>'l','ㅁ'=>'m', 'ㅏ'=>'a','ㅓ'=>'e','ㅗ'=>'o', 1754 'ㅜ'=>'wu','ㅡ'=>'u','ㅣ'=>'i','ㅐ'=>'ay','ㅔ'=>'ey','ㅚ'=>'oy','ㅘ'=>'wa','ㅝ'=>'we', 1755 'ㅟ'=>'wi','ㅙ'=>'way','ㅞ'=>'wey','ㅢ'=>'uy','ㅑ'=>'ya','ㅕ'=>'ye','ㅛ'=>'oy', 1756 'ㅠ'=>'yu','ㅒ'=>'yay','ㅖ'=>'yey', 1757); 1758 1759 1760