1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license LGPL 2.1 (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * check for mb_string support 11 */ 12if(!defined('UTF8_MBSTRING')){ 13 if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){ 14 define('UTF8_MBSTRING',1); 15 }else{ 16 define('UTF8_MBSTRING',0); 17 } 18} 19 20if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } 21 22if(!function_exists('utf8_isASCII')){ 23 /** 24 * Checks if a string contains 7bit ASCII only 25 * 26 * @author Andreas Haerter <andreas.haerter@dev.mail-node.com> 27 */ 28 function utf8_isASCII($str){ 29 return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); 30 } 31} 32 33if(!function_exists('utf8_strip')){ 34 /** 35 * Strips all highbyte chars 36 * 37 * Returns a pure ASCII7 string 38 * 39 * @author Andreas Gohr <andi@splitbrain.org> 40 */ 41 function utf8_strip($str){ 42 $ascii = ''; 43 $len = strlen($str); 44 for($i=0; $i<$len; $i++){ 45 if(ord($str{$i}) <128){ 46 $ascii .= $str{$i}; 47 } 48 } 49 return $ascii; 50 } 51} 52 53if(!function_exists('utf8_check')){ 54 /** 55 * Tries to detect if a string is in Unicode encoding 56 * 57 * @author <bmorel@ssi.fr> 58 * @link http://www.php.net/manual/en/function.utf8-encode.php 59 */ 60 function utf8_check($Str) { 61 $len = strlen($Str); 62 for ($i=0; $i<$len; $i++) { 63 $b = ord($Str[$i]); 64 if ($b < 0x80) continue; # 0bbbbbbb 65 elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb 66 elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb 67 elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb 68 elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb 69 elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b 70 else return false; # Does not match any model 71 72 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 73 if ((++$i == $len) || ((ord($Str[$i]) & 0xC0) != 0x80)) 74 return false; 75 } 76 } 77 return true; 78 } 79} 80 81if(!function_exists('utf8_basename')){ 82 /** 83 * A locale independent basename() implementation 84 * 85 * works around a bug in PHP's basename() implementation 86 * 87 * @see basename() 88 * @link https://bugs.php.net/bug.php?id=37738 89 * @param string $path A path 90 * @param string $suffix If the name component ends in suffix this will also be cut off 91 * @return string 92 */ 93 function utf8_basename($path, $suffix=''){ 94 $rpos = max(strrpos($path, '/'), strrpos($path, '\\')); 95 $file = substr($path, $rpos+1); 96 97 $suflen = strlen($suffix); 98 if($suflen && (substr($file, -$suflen) == $suffix)){ 99 $file = substr($file, 0, -$suflen); 100 } 101 102 return $file; 103 } 104} 105 106if(!function_exists('utf8_strlen')){ 107 /** 108 * Unicode aware replacement for strlen() 109 * 110 * utf8_decode() converts characters that are not in ISO-8859-1 111 * to '?', which, for the purpose of counting, is alright - It's 112 * even faster than mb_strlen. 113 * 114 * @author <chernyshevsky at hotmail dot com> 115 * @see strlen() 116 * @see utf8_decode() 117 */ 118 function utf8_strlen($string){ 119 return strlen(utf8_decode($string)); 120 } 121} 122 123if(!function_exists('utf8_substr')){ 124 /** 125 * UTF-8 aware alternative to substr 126 * 127 * Return part of a string given character offset (and optionally length) 128 * 129 * @author Harry Fuecks <hfuecks@gmail.com> 130 * @author Chris Smith <chris@jalakai.co.uk> 131 * @param string $str 132 * @param int $offset number of UTF-8 characters offset (from left) 133 * @param int $length (optional) length in UTF-8 characters from offset 134 * @return mixed string or false if failure 135 */ 136 function utf8_substr($str, $offset, $length = null) { 137 if(UTF8_MBSTRING){ 138 if( $length === null ){ 139 return mb_substr($str, $offset); 140 }else{ 141 return mb_substr($str, $offset, $length); 142 } 143 } 144 145 /* 146 * Notes: 147 * 148 * no mb string support, so we'll use pcre regex's with 'u' flag 149 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 150 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 151 * 152 * substr documentation states false can be returned in some cases (e.g. offset > string length) 153 * mb_substr never returns false, it will return an empty string instead. 154 * 155 * calculating the number of characters in the string is a relatively expensive operation, so 156 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 157 */ 158 159 // cast parameters to appropriate types to avoid multiple notices/warnings 160 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 161 $offset = (int)$offset; 162 if (!is_null($length)) $length = (int)$length; 163 164 // handle trivial cases 165 if ($length === 0) return ''; 166 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 167 168 $offset_pattern = ''; 169 $length_pattern = ''; 170 171 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 172 if ($offset < 0) { 173 $strlen = strlen(utf8_decode($str)); // see notes 174 $offset = $strlen + $offset; 175 if ($offset < 0) $offset = 0; 176 } 177 178 // establish a pattern for offset, a non-captured group equal in length to offset 179 if ($offset > 0) { 180 $Ox = (int)($offset/65535); 181 $Oy = $offset%65535; 182 183 if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}'; 184 $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})'; 185 } else { 186 $offset_pattern = '^'; // offset == 0; just anchor the pattern 187 } 188 189 // establish a pattern for length 190 if (is_null($length)) { 191 $length_pattern = '(.*)$'; // the rest of the string 192 } else { 193 194 if (!isset($strlen)) $strlen = strlen(utf8_decode($str)); // see notes 195 if ($offset > $strlen) return ''; // another trivial case 196 197 if ($length > 0) { 198 199 $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string 200 201 $Lx = (int)($length/65535); 202 $Ly = $length%65535; 203 204 // +ve length requires ... a captured group of length characters 205 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 206 $length_pattern = '('.$length_pattern.'.{'.$Ly.'})'; 207 208 } else if ($length < 0) { 209 210 if ($length < ($offset - $strlen)) return ''; 211 212 $Lx = (int)((-$length)/65535); 213 $Ly = (-$length)%65535; 214 215 // -ve length requires ... capture everything except a group of -length characters 216 // anchored at the tail-end of the string 217 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 218 $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$'; 219 } 220 } 221 222 if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return ''; 223 return $match[1]; 224 } 225} 226 227if(!function_exists('utf8_substr_replace')){ 228 /** 229 * Unicode aware replacement for substr_replace() 230 * 231 * @author Andreas Gohr <andi@splitbrain.org> 232 * @see substr_replace() 233 */ 234 function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 235 $ret = ''; 236 if($start>0) $ret .= utf8_substr($string, 0, $start); 237 $ret .= $replacement; 238 $ret .= utf8_substr($string, $start+$length); 239 return $ret; 240 } 241} 242 243if(!function_exists('utf8_ltrim')){ 244 /** 245 * Unicode aware replacement for ltrim() 246 * 247 * @author Andreas Gohr <andi@splitbrain.org> 248 * @see ltrim() 249 * @param string $str 250 * @param string $charlist 251 * @return string 252 */ 253 function utf8_ltrim($str,$charlist=''){ 254 if($charlist == '') return ltrim($str); 255 256 //quote charlist for use in a characterclass 257 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 258 259 return preg_replace('/^['.$charlist.']+/u','',$str); 260 } 261} 262 263if(!function_exists('utf8_rtrim')){ 264 /** 265 * Unicode aware replacement for rtrim() 266 * 267 * @author Andreas Gohr <andi@splitbrain.org> 268 * @see rtrim() 269 * @param string $str 270 * @param string $charlist 271 * @return string 272 */ 273 function utf8_rtrim($str,$charlist=''){ 274 if($charlist == '') return rtrim($str); 275 276 //quote charlist for use in a characterclass 277 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 278 279 return preg_replace('/['.$charlist.']+$/u','',$str); 280 } 281} 282 283if(!function_exists('utf8_trim')){ 284 /** 285 * Unicode aware replacement for trim() 286 * 287 * @author Andreas Gohr <andi@splitbrain.org> 288 * @see trim() 289 * @param string $str 290 * @param string $charlist 291 * @return string 292 */ 293 function utf8_trim($str,$charlist='') { 294 if($charlist == '') return trim($str); 295 296 return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist); 297 } 298} 299 300if(!function_exists('utf8_strtolower')){ 301 /** 302 * This is a unicode aware replacement for strtolower() 303 * 304 * Uses mb_string extension if available 305 * 306 * @author Leo Feyer <leo@typolight.org> 307 * @see strtolower() 308 * @see utf8_strtoupper() 309 */ 310 function utf8_strtolower($string){ 311 if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8'); 312 313 global $UTF8_UPPER_TO_LOWER; 314 return strtr($string,$UTF8_UPPER_TO_LOWER); 315 } 316} 317 318if(!function_exists('utf8_strtoupper')){ 319 /** 320 * This is a unicode aware replacement for strtoupper() 321 * 322 * Uses mb_string extension if available 323 * 324 * @author Leo Feyer <leo@typolight.org> 325 * @see strtoupper() 326 * @see utf8_strtoupper() 327 */ 328 function utf8_strtoupper($string){ 329 if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); 330 331 global $UTF8_LOWER_TO_UPPER; 332 return strtr($string,$UTF8_LOWER_TO_UPPER); 333 } 334} 335 336if(!function_exists('utf8_ucfirst')){ 337 /** 338 * UTF-8 aware alternative to ucfirst 339 * Make a string's first character uppercase 340 * 341 * @author Harry Fuecks 342 * @param string 343 * @return string with first character as upper case (if applicable) 344 */ 345 function utf8_ucfirst($str){ 346 switch ( utf8_strlen($str) ) { 347 case 0: 348 return ''; 349 case 1: 350 return utf8_strtoupper($str); 351 default: 352 preg_match('/^(.{1})(.*)$/us', $str, $matches); 353 return utf8_strtoupper($matches[1]).$matches[2]; 354 } 355 } 356} 357 358if(!function_exists('utf8_ucwords')){ 359 /** 360 * UTF-8 aware alternative to ucwords 361 * Uppercase the first character of each word in a string 362 * 363 * @author Harry Fuecks 364 * @param string 365 * @return string with first char of each word uppercase 366 * @see http://www.php.net/ucwords 367 */ 368 function utf8_ucwords($str) { 369 // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; 370 // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns 371 // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords 372 $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; 373 374 return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str); 375 } 376 377 /** 378 * Callback function for preg_replace_callback call in utf8_ucwords 379 * You don't need to call this yourself 380 * 381 * @author Harry Fuecks 382 * @param array $matches matches corresponding to a single word 383 * @return string with first char of the word in uppercase 384 * @see utf8_ucwords 385 * @see utf8_strtoupper 386 */ 387 function utf8_ucwords_callback($matches) { 388 $leadingws = $matches[2]; 389 $ucfirst = utf8_strtoupper($matches[3]); 390 $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1); 391 return $leadingws . $ucword; 392 } 393} 394 395if(!function_exists('utf8_deaccent')){ 396 /** 397 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 398 * 399 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 400 * letters. Default is to deaccent both cases ($case = 0) 401 * 402 * @author Andreas Gohr <andi@splitbrain.org> 403 */ 404 function utf8_deaccent($string,$case=0){ 405 if($case <= 0){ 406 global $UTF8_LOWER_ACCENTS; 407 $string = strtr($string,$UTF8_LOWER_ACCENTS); 408 } 409 if($case >= 0){ 410 global $UTF8_UPPER_ACCENTS; 411 $string = strtr($string,$UTF8_UPPER_ACCENTS); 412 } 413 return $string; 414 } 415} 416 417if(!function_exists('utf8_romanize')){ 418 /** 419 * Romanize a non-latin string 420 * 421 * @author Andreas Gohr <andi@splitbrain.org> 422 */ 423 function utf8_romanize($string){ 424 if(utf8_isASCII($string)) return $string; //nothing to do 425 426 global $UTF8_ROMANIZATION; 427 return strtr($string,$UTF8_ROMANIZATION); 428 } 429} 430 431if(!function_exists('utf8_stripspecials')){ 432 /** 433 * Removes special characters (nonalphanumeric) from a UTF-8 string 434 * 435 * This function adds the controlchars 0x00 to 0x19 to the array of 436 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 437 * 438 * @author Andreas Gohr <andi@splitbrain.org> 439 * @param string $string The UTF8 string to strip of special chars 440 * @param string $repl Replace special with this string 441 * @param string $additional Additional chars to strip (used in regexp char class) 442 * @return string 443 */ 444 function utf8_stripspecials($string,$repl='',$additional=''){ 445 global $UTF8_SPECIAL_CHARS2; 446 447 static $specials = null; 448 if(is_null($specials)){ 449 #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 450 $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/'); 451 } 452 453 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 454 } 455} 456 457if(!function_exists('utf8_strpos')){ 458 /** 459 * This is an Unicode aware replacement for strpos 460 * 461 * @author Leo Feyer <leo@typolight.org> 462 * @see strpos() 463 * @param string 464 * @param string 465 * @param integer 466 * @return integer 467 */ 468 function utf8_strpos($haystack, $needle, $offset=0){ 469 $comp = 0; 470 $length = null; 471 472 while (is_null($length) || $length < $offset) { 473 $pos = strpos($haystack, $needle, $offset + $comp); 474 475 if ($pos === false) 476 return false; 477 478 $length = utf8_strlen(substr($haystack, 0, $pos)); 479 480 if ($length < $offset) 481 $comp = $pos - $length; 482 } 483 484 return $length; 485 } 486} 487 488if(!function_exists('utf8_tohtml')){ 489 /** 490 * Encodes UTF-8 characters to HTML entities 491 * 492 * @author Tom N Harris <tnharris@whoopdedo.org> 493 * @author <vpribish at shopping dot com> 494 * @link http://www.php.net/manual/en/function.utf8-decode.php 495 */ 496 function utf8_tohtml ($str) { 497 $ret = ''; 498 foreach (utf8_to_unicode($str) as $cp) { 499 if ($cp < 0x80) 500 $ret .= chr($cp); 501 elseif ($cp < 0x100) 502 $ret .= "&#$cp;"; 503 else 504 $ret .= '&#x'.dechex($cp).';'; 505 } 506 return $ret; 507 } 508} 509 510if(!function_exists('utf8_unhtml')){ 511 /** 512 * Decodes HTML entities to UTF-8 characters 513 * 514 * Convert any &#..; entity to a codepoint, 515 * The entities flag defaults to only decoding numeric entities. 516 * Pass HTML_ENTITIES and named entities, including & < etc. 517 * are handled as well. Avoids the problem that would occur if you 518 * had to decode "&#38;&amp;#38;" 519 * 520 * unhtmlspecialchars(utf8_unhtml($s)) -> "&&" 521 * utf8_unhtml(unhtmlspecialchars($s)) -> "&&#38;" 522 * what it should be -> "&&#38;" 523 * 524 * @author Tom N Harris <tnharris@whoopdedo.org> 525 * @param string $str UTF-8 encoded string 526 * @param boolean $entities Flag controlling decoding of named entities. 527 * @return string UTF-8 encoded string with numeric (and named) entities replaced. 528 */ 529 function utf8_unhtml($str, $entities=null) { 530 static $decoder = null; 531 if (is_null($decoder)) 532 $decoder = new utf8_entity_decoder(); 533 if (is_null($entities)) 534 return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m', 535 'utf8_decode_numeric', $str); 536 else 537 return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m', 538 array(&$decoder, 'decode'), $str); 539 } 540} 541 542if(!function_exists('utf8_decode_numeric')){ 543 /** 544 * Decodes numeric HTML entities to their correct UTF-8 characters 545 * 546 * @param $ent string A numeric entity 547 * @return string 548 */ 549 function utf8_decode_numeric($ent) { 550 switch ($ent[2]) { 551 case 'X': 552 case 'x': 553 $cp = hexdec($ent[3]); 554 break; 555 default: 556 $cp = intval($ent[3]); 557 break; 558 } 559 return unicode_to_utf8(array($cp)); 560 } 561} 562 563if(!class_exists('utf8_entity_decoder')){ 564 /** 565 * Encapsulate HTML entity decoding tables 566 */ 567 class utf8_entity_decoder { 568 var $table; 569 570 /** 571 * Initializes the decoding tables 572 */ 573 function __construct() { 574 $table = get_html_translation_table(HTML_ENTITIES); 575 $table = array_flip($table); 576 $this->table = array_map(array(&$this,'makeutf8'), $table); 577 } 578 579 /** 580 * Wrapper aorund unicode_to_utf8() 581 * 582 * @param $c string 583 * @return mixed 584 */ 585 function makeutf8($c) { 586 return unicode_to_utf8(array(ord($c))); 587 } 588 589 /** 590 * Decodes any HTML entity to it's correct UTF-8 char equivalent 591 * 592 * @param $ent string An entity 593 * @return string 594 */ 595 function decode($ent) { 596 if ($ent[1] == '#') { 597 return utf8_decode_numeric($ent); 598 } elseif (array_key_exists($ent[0],$this->table)) { 599 return $this->table[$ent[0]]; 600 } else { 601 return $ent[0]; 602 } 603 } 604 } 605} 606 607if(!function_exists('utf8_to_unicode')){ 608 /** 609 * Takes an UTF-8 string and returns an array of ints representing the 610 * Unicode characters. Astral planes are supported ie. the ints in the 611 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 612 * are not allowed. 613 * 614 * If $strict is set to true the function returns false if the input 615 * string isn't a valid UTF-8 octet sequence and raises a PHP error at 616 * level E_USER_WARNING 617 * 618 * Note: this function has been modified slightly in this library to 619 * trigger errors on encountering bad bytes 620 * 621 * @author <hsivonen@iki.fi> 622 * @author Harry Fuecks <hfuecks@gmail.com> 623 * @param string $str UTF-8 encoded string 624 * @param boolean $strict Check for invalid sequences? 625 * @return mixed array of unicode code points or false if UTF-8 invalid 626 * @see unicode_to_utf8 627 * @link http://hsivonen.iki.fi/php-utf8/ 628 * @link http://sourceforge.net/projects/phputf8/ 629 */ 630 function utf8_to_unicode($str,$strict=false) { 631 $mState = 0; // cached expected number of octets after the current octet 632 // until the beginning of the next UTF8 character sequence 633 $mUcs4 = 0; // cached Unicode character 634 $mBytes = 1; // cached expected number of octets in the current sequence 635 636 $out = array(); 637 638 $len = strlen($str); 639 640 for($i = 0; $i < $len; $i++) { 641 642 $in = ord($str{$i}); 643 644 if ( $mState == 0) { 645 646 // When mState is zero we expect either a US-ASCII character or a 647 // multi-octet sequence. 648 if (0 == (0x80 & ($in))) { 649 // US-ASCII, pass straight through. 650 $out[] = $in; 651 $mBytes = 1; 652 653 } else if (0xC0 == (0xE0 & ($in))) { 654 // First octet of 2 octet sequence 655 $mUcs4 = ($in); 656 $mUcs4 = ($mUcs4 & 0x1F) << 6; 657 $mState = 1; 658 $mBytes = 2; 659 660 } else if (0xE0 == (0xF0 & ($in))) { 661 // First octet of 3 octet sequence 662 $mUcs4 = ($in); 663 $mUcs4 = ($mUcs4 & 0x0F) << 12; 664 $mState = 2; 665 $mBytes = 3; 666 667 } else if (0xF0 == (0xF8 & ($in))) { 668 // First octet of 4 octet sequence 669 $mUcs4 = ($in); 670 $mUcs4 = ($mUcs4 & 0x07) << 18; 671 $mState = 3; 672 $mBytes = 4; 673 674 } else if (0xF8 == (0xFC & ($in))) { 675 /* First octet of 5 octet sequence. 676 * 677 * This is illegal because the encoded codepoint must be either 678 * (a) not the shortest form or 679 * (b) outside the Unicode range of 0-0x10FFFF. 680 * Rather than trying to resynchronize, we will carry on until the end 681 * of the sequence and let the later error handling code catch it. 682 */ 683 $mUcs4 = ($in); 684 $mUcs4 = ($mUcs4 & 0x03) << 24; 685 $mState = 4; 686 $mBytes = 5; 687 688 } else if (0xFC == (0xFE & ($in))) { 689 // First octet of 6 octet sequence, see comments for 5 octet sequence. 690 $mUcs4 = ($in); 691 $mUcs4 = ($mUcs4 & 1) << 30; 692 $mState = 5; 693 $mBytes = 6; 694 695 } elseif($strict) { 696 /* Current octet is neither in the US-ASCII range nor a legal first 697 * octet of a multi-octet sequence. 698 */ 699 trigger_error( 700 'utf8_to_unicode: Illegal sequence identifier '. 701 'in UTF-8 at byte '.$i, 702 E_USER_WARNING 703 ); 704 return false; 705 706 } 707 708 } else { 709 710 // When mState is non-zero, we expect a continuation of the multi-octet 711 // sequence 712 if (0x80 == (0xC0 & ($in))) { 713 714 // Legal continuation. 715 $shift = ($mState - 1) * 6; 716 $tmp = $in; 717 $tmp = ($tmp & 0x0000003F) << $shift; 718 $mUcs4 |= $tmp; 719 720 /** 721 * End of the multi-octet sequence. mUcs4 now contains the final 722 * Unicode codepoint to be output 723 */ 724 if (0 == --$mState) { 725 726 /* 727 * Check for illegal sequences and codepoints. 728 */ 729 // From Unicode 3.1, non-shortest form is illegal 730 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 731 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 732 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 733 (4 < $mBytes) || 734 // From Unicode 3.2, surrogate characters are illegal 735 (($mUcs4 & 0xFFFFF800) == 0xD800) || 736 // Codepoints outside the Unicode range are illegal 737 ($mUcs4 > 0x10FFFF)) { 738 739 if($strict){ 740 trigger_error( 741 'utf8_to_unicode: Illegal sequence or codepoint '. 742 'in UTF-8 at byte '.$i, 743 E_USER_WARNING 744 ); 745 746 return false; 747 } 748 749 } 750 751 if (0xFEFF != $mUcs4) { 752 // BOM is legal but we don't want to output it 753 $out[] = $mUcs4; 754 } 755 756 //initialize UTF8 cache 757 $mState = 0; 758 $mUcs4 = 0; 759 $mBytes = 1; 760 } 761 762 } elseif($strict) { 763 /** 764 *((0xC0 & (*in) != 0x80) && (mState != 0)) 765 * Incomplete multi-octet sequence. 766 */ 767 trigger_error( 768 'utf8_to_unicode: Incomplete multi-octet '. 769 ' sequence in UTF-8 at byte '.$i, 770 E_USER_WARNING 771 ); 772 773 return false; 774 } 775 } 776 } 777 return $out; 778 } 779} 780 781if(!function_exists('unicode_to_utf8')){ 782 /** 783 * Takes an array of ints representing the Unicode characters and returns 784 * a UTF-8 string. Astral planes are supported ie. the ints in the 785 * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 786 * are not allowed. 787 * 788 * If $strict is set to true the function returns false if the input 789 * array contains ints that represent surrogates or are outside the 790 * Unicode range and raises a PHP error at level E_USER_WARNING 791 * 792 * Note: this function has been modified slightly in this library to use 793 * output buffering to concatenate the UTF-8 string (faster) as well as 794 * reference the array by it's keys 795 * 796 * @param array $arr of unicode code points representing a string 797 * @param boolean $strict Check for invalid sequences? 798 * @return mixed UTF-8 string or false if array contains invalid code points 799 * @author <hsivonen@iki.fi> 800 * @author Harry Fuecks <hfuecks@gmail.com> 801 * @see utf8_to_unicode 802 * @link http://hsivonen.iki.fi/php-utf8/ 803 * @link http://sourceforge.net/projects/phputf8/ 804 */ 805 function unicode_to_utf8($arr,$strict=false) { 806 if (!is_array($arr)) return ''; 807 ob_start(); 808 809 foreach (array_keys($arr) as $k) { 810 811 if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { 812 # ASCII range (including control chars) 813 814 echo chr($arr[$k]); 815 816 } else if ($arr[$k] <= 0x07ff) { 817 # 2 byte sequence 818 819 echo chr(0xc0 | ($arr[$k] >> 6)); 820 echo chr(0x80 | ($arr[$k] & 0x003f)); 821 822 } else if($arr[$k] == 0xFEFF) { 823 # Byte order mark (skip) 824 825 // nop -- zap the BOM 826 827 } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { 828 # Test for illegal surrogates 829 830 // found a surrogate 831 if($strict){ 832 trigger_error( 833 'unicode_to_utf8: Illegal surrogate '. 834 'at index: '.$k.', value: '.$arr[$k], 835 E_USER_WARNING 836 ); 837 return false; 838 } 839 840 } else if ($arr[$k] <= 0xffff) { 841 # 3 byte sequence 842 843 echo chr(0xe0 | ($arr[$k] >> 12)); 844 echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); 845 echo chr(0x80 | ($arr[$k] & 0x003f)); 846 847 } else if ($arr[$k] <= 0x10ffff) { 848 # 4 byte sequence 849 850 echo chr(0xf0 | ($arr[$k] >> 18)); 851 echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); 852 echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); 853 echo chr(0x80 | ($arr[$k] & 0x3f)); 854 855 } elseif($strict) { 856 857 trigger_error( 858 'unicode_to_utf8: Codepoint out of Unicode range '. 859 'at index: '.$k.', value: '.$arr[$k], 860 E_USER_WARNING 861 ); 862 863 // out of range 864 return false; 865 } 866 } 867 868 $result = ob_get_contents(); 869 ob_end_clean(); 870 return $result; 871 } 872} 873 874if(!function_exists('utf8_to_utf16be')){ 875 /** 876 * UTF-8 to UTF-16BE conversion. 877 * 878 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 879 */ 880 function utf8_to_utf16be(&$str, $bom = false) { 881 $out = $bom ? "\xFE\xFF" : ''; 882 if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 883 884 $uni = utf8_to_unicode($str); 885 foreach($uni as $cp){ 886 $out .= pack('n',$cp); 887 } 888 return $out; 889 } 890} 891 892if(!function_exists('utf16be_to_utf8')){ 893 /** 894 * UTF-8 to UTF-16BE conversion. 895 * 896 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 897 */ 898 function utf16be_to_utf8(&$str) { 899 $uni = unpack('n*',$str); 900 return unicode_to_utf8($uni); 901 } 902} 903 904if(!function_exists('utf8_bad_replace')){ 905 /** 906 * Replace bad bytes with an alternative character 907 * 908 * ASCII character is recommended for replacement char 909 * 910 * PCRE Pattern to locate bad bytes in a UTF-8 string 911 * Comes from W3 FAQ: Multilingual Forms 912 * Note: modified to include full ASCII range including control chars 913 * 914 * @author Harry Fuecks <hfuecks@gmail.com> 915 * @see http://www.w3.org/International/questions/qa-forms-utf-8 916 * @param string $str to search 917 * @param string $replace to replace bad bytes with (defaults to '?') - use ASCII 918 * @return string 919 */ 920 function utf8_bad_replace($str, $replace = '') { 921 $UTF8_BAD = 922 '([\x00-\x7F]'. # ASCII (including control chars) 923 '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte 924 '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs 925 '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte 926 '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates 927 '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 928 '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 929 '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 930 '|(.{1}))'; # invalid byte 931 ob_start(); 932 while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { 933 if ( !isset($matches[2])) { 934 echo $matches[0]; 935 } else { 936 echo $replace; 937 } 938 $str = substr($str,strlen($matches[0])); 939 } 940 $result = ob_get_contents(); 941 ob_end_clean(); 942 return $result; 943 } 944} 945 946if(!function_exists('utf8_correctIdx')){ 947 /** 948 * adjust a byte index into a utf8 string to a utf8 character boundary 949 * 950 * @param $str string utf8 character string 951 * @param $i int byte index into $str 952 * @param $next bool direction to search for boundary, 953 * false = up (current character) 954 * true = down (next character) 955 * 956 * @return int byte index into $str now pointing to a utf8 character boundary 957 * 958 * @author chris smith <chris@jalakai.co.uk> 959 */ 960 function utf8_correctIdx(&$str,$i,$next=false) { 961 962 if ($i <= 0) return 0; 963 964 $limit = strlen($str); 965 if ($i>=$limit) return $limit; 966 967 if ($next) { 968 while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; 969 } else { 970 while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; 971 } 972 973 return $i; 974 } 975} 976 977// only needed if no mb_string available 978if(!UTF8_MBSTRING){ 979 /** 980 * UTF-8 Case lookup table 981 * 982 * This lookuptable defines the upper case letters to their correspponding 983 * lower case letter in UTF-8 984 * 985 * @author Andreas Gohr <andi@splitbrain.org> 986 */ 987 global $UTF8_LOWER_TO_UPPER; 988 if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array( 989 "z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T","s"=>"S","r"=>"R","q"=>"Q", 990 "p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J","i"=>"I","h"=>"H","g"=>"G", 991 "f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A","ῳ"=>"ῼ","ῥ"=>"Ῥ","ῡ"=>"Ῡ","ῑ"=>"Ῑ", 992 "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ", 993 "ᾣ"=>"ᾫ","ᾢ"=>"ᾪ","ᾡ"=>"ᾩ","ᾗ"=>"ᾟ","ᾖ"=>"ᾞ","ᾕ"=>"ᾝ","ᾔ"=>"ᾜ","ᾓ"=>"ᾛ","ᾒ"=>"ᾚ","ᾑ"=>"ᾙ", 994 "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ", 995 "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ", 996 "ὲ"=>"Ὲ","ά"=>"Ά","ὰ"=>"Ὰ","ὧ"=>"Ὧ","ὦ"=>"Ὦ","ὥ"=>"Ὥ","ὤ"=>"Ὤ","ὣ"=>"Ὣ","ὢ"=>"Ὢ","ὡ"=>"Ὡ", 997 "ὗ"=>"Ὗ","ὕ"=>"Ὕ","ὓ"=>"Ὓ","ὑ"=>"Ὑ","ὅ"=>"Ὅ","ὄ"=>"Ὄ","ὃ"=>"Ὃ","ὂ"=>"Ὂ","ὁ"=>"Ὁ","ὀ"=>"Ὀ", 998 "ἷ"=>"Ἷ","ἶ"=>"Ἶ","ἵ"=>"Ἵ","ἴ"=>"Ἴ","ἳ"=>"Ἳ","ἲ"=>"Ἲ","ἱ"=>"Ἱ","ἰ"=>"Ἰ","ἧ"=>"Ἧ","ἦ"=>"Ἦ", 999 "ἥ"=>"Ἥ","ἤ"=>"Ἤ","ἣ"=>"Ἣ","ἢ"=>"Ἢ","ἡ"=>"Ἡ","ἕ"=>"Ἕ","ἔ"=>"Ἔ","ἓ"=>"Ἓ","ἒ"=>"Ἒ","ἑ"=>"Ἑ", 1000 "ἐ"=>"Ἐ","ἇ"=>"Ἇ","ἆ"=>"Ἆ","ἅ"=>"Ἅ","ἄ"=>"Ἄ","ἃ"=>"Ἃ","ἂ"=>"Ἂ","ἁ"=>"Ἁ","ἀ"=>"Ἀ","ỹ"=>"Ỹ", 1001 "ỷ"=>"Ỷ","ỵ"=>"Ỵ","ỳ"=>"Ỳ","ự"=>"Ự","ữ"=>"Ữ","ử"=>"Ử","ừ"=>"Ừ","ứ"=>"Ứ","ủ"=>"Ủ","ụ"=>"Ụ", 1002 "ợ"=>"Ợ","ỡ"=>"Ỡ","ở"=>"Ở","ờ"=>"Ờ","ớ"=>"Ớ","ộ"=>"Ộ","ỗ"=>"Ỗ","ổ"=>"Ổ","ồ"=>"Ồ","ố"=>"Ố", 1003 "ỏ"=>"Ỏ","ọ"=>"Ọ","ị"=>"Ị","ỉ"=>"Ỉ","ệ"=>"Ệ","ễ"=>"Ễ","ể"=>"Ể","ề"=>"Ề","ế"=>"Ế","ẽ"=>"Ẽ", 1004 "ẻ"=>"Ẻ","ẹ"=>"Ẹ","ặ"=>"Ặ","ẵ"=>"Ẵ","ẳ"=>"Ẳ","ằ"=>"Ằ","ắ"=>"Ắ","ậ"=>"Ậ","ẫ"=>"Ẫ","ẩ"=>"Ẩ", 1005 "ầ"=>"Ầ","ấ"=>"Ấ","ả"=>"Ả","ạ"=>"Ạ","ẛ"=>"Ṡ","ẕ"=>"Ẕ","ẓ"=>"Ẓ","ẑ"=>"Ẑ","ẏ"=>"Ẏ","ẍ"=>"Ẍ", 1006 "ẋ"=>"Ẋ","ẉ"=>"Ẉ","ẇ"=>"Ẇ","ẅ"=>"Ẅ","ẃ"=>"Ẃ","ẁ"=>"Ẁ","ṿ"=>"Ṿ","ṽ"=>"Ṽ","ṻ"=>"Ṻ","ṹ"=>"Ṹ", 1007 "ṷ"=>"Ṷ","ṵ"=>"Ṵ","ṳ"=>"Ṳ","ṱ"=>"Ṱ","ṯ"=>"Ṯ","ṭ"=>"Ṭ","ṫ"=>"Ṫ","ṩ"=>"Ṩ","ṧ"=>"Ṧ","ṥ"=>"Ṥ", 1008 "ṣ"=>"Ṣ","ṡ"=>"Ṡ","ṟ"=>"Ṟ","ṝ"=>"Ṝ","ṛ"=>"Ṛ","ṙ"=>"Ṙ","ṗ"=>"Ṗ","ṕ"=>"Ṕ","ṓ"=>"Ṓ","ṑ"=>"Ṑ", 1009 "ṏ"=>"Ṏ","ṍ"=>"Ṍ","ṋ"=>"Ṋ","ṉ"=>"Ṉ","ṇ"=>"Ṇ","ṅ"=>"Ṅ","ṃ"=>"Ṃ","ṁ"=>"Ṁ","ḿ"=>"Ḿ","ḽ"=>"Ḽ", 1010 "ḻ"=>"Ḻ","ḹ"=>"Ḹ","ḷ"=>"Ḷ","ḵ"=>"Ḵ","ḳ"=>"Ḳ","ḱ"=>"Ḱ","ḯ"=>"Ḯ","ḭ"=>"Ḭ","ḫ"=>"Ḫ","ḩ"=>"Ḩ", 1011 "ḧ"=>"Ḧ","ḥ"=>"Ḥ","ḣ"=>"Ḣ","ḡ"=>"Ḡ","ḟ"=>"Ḟ","ḝ"=>"Ḝ","ḛ"=>"Ḛ","ḙ"=>"Ḙ","ḗ"=>"Ḗ","ḕ"=>"Ḕ", 1012 "ḓ"=>"Ḓ","ḑ"=>"Ḑ","ḏ"=>"Ḏ","ḍ"=>"Ḍ","ḋ"=>"Ḋ","ḉ"=>"Ḉ","ḇ"=>"Ḇ","ḅ"=>"Ḅ","ḃ"=>"Ḃ","ḁ"=>"Ḁ", 1013 "ֆ"=>"Ֆ","օ"=>"Օ","ք"=>"Ք","փ"=>"Փ","ւ"=>"Ւ","ց"=>"Ց","ր"=>"Ր","տ"=>"Տ","վ"=>"Վ","ս"=>"Ս", 1014 "ռ"=>"Ռ","ջ"=>"Ջ","պ"=>"Պ","չ"=>"Չ","ո"=>"Ո","շ"=>"Շ","ն"=>"Ն","յ"=>"Յ","մ"=>"Մ","ճ"=>"Ճ", 1015 "ղ"=>"Ղ","ձ"=>"Ձ","հ"=>"Հ","կ"=>"Կ","ծ"=>"Ծ","խ"=>"Խ","լ"=>"Լ","ի"=>"Ի","ժ"=>"Ժ","թ"=>"Թ", 1016 "ը"=>"Ը","է"=>"Է","զ"=>"Զ","ե"=>"Ե","դ"=>"Դ","գ"=>"Գ","բ"=>"Բ","ա"=>"Ա","ԏ"=>"Ԏ","ԍ"=>"Ԍ", 1017 "ԋ"=>"Ԋ","ԉ"=>"Ԉ","ԇ"=>"Ԇ","ԅ"=>"Ԅ","ԃ"=>"Ԃ","ԁ"=>"Ԁ","ӹ"=>"Ӹ","ӵ"=>"Ӵ","ӳ"=>"Ӳ","ӱ"=>"Ӱ", 1018 "ӯ"=>"Ӯ","ӭ"=>"Ӭ","ӫ"=>"Ӫ","ө"=>"Ө","ӧ"=>"Ӧ","ӥ"=>"Ӥ","ӣ"=>"Ӣ","ӡ"=>"Ӡ","ӟ"=>"Ӟ","ӝ"=>"Ӝ", 1019 "ӛ"=>"Ӛ","ә"=>"Ә","ӗ"=>"Ӗ","ӕ"=>"Ӕ","ӓ"=>"Ӓ","ӑ"=>"Ӑ","ӎ"=>"Ӎ","ӌ"=>"Ӌ","ӊ"=>"Ӊ","ӈ"=>"Ӈ", 1020 "ӆ"=>"Ӆ","ӄ"=>"Ӄ","ӂ"=>"Ӂ","ҿ"=>"Ҿ","ҽ"=>"Ҽ","һ"=>"Һ","ҹ"=>"Ҹ","ҷ"=>"Ҷ","ҵ"=>"Ҵ","ҳ"=>"Ҳ", 1021 "ұ"=>"Ұ","ү"=>"Ү","ҭ"=>"Ҭ","ҫ"=>"Ҫ","ҩ"=>"Ҩ","ҧ"=>"Ҧ","ҥ"=>"Ҥ","ң"=>"Ң","ҡ"=>"Ҡ","ҟ"=>"Ҟ", 1022 "ҝ"=>"Ҝ","қ"=>"Қ","ҙ"=>"Ҙ","җ"=>"Җ","ҕ"=>"Ҕ","ғ"=>"Ғ","ґ"=>"Ґ","ҏ"=>"Ҏ","ҍ"=>"Ҍ","ҋ"=>"Ҋ", 1023 "ҁ"=>"Ҁ","ѿ"=>"Ѿ","ѽ"=>"Ѽ","ѻ"=>"Ѻ","ѹ"=>"Ѹ","ѷ"=>"Ѷ","ѵ"=>"Ѵ","ѳ"=>"Ѳ","ѱ"=>"Ѱ","ѯ"=>"Ѯ", 1024 "ѭ"=>"Ѭ","ѫ"=>"Ѫ","ѩ"=>"Ѩ","ѧ"=>"Ѧ","ѥ"=>"Ѥ","ѣ"=>"Ѣ","ѡ"=>"Ѡ","џ"=>"Џ","ў"=>"Ў","ѝ"=>"Ѝ", 1025 "ќ"=>"Ќ","ћ"=>"Ћ","њ"=>"Њ","љ"=>"Љ","ј"=>"Ј","ї"=>"Ї","і"=>"І","ѕ"=>"Ѕ","є"=>"Є","ѓ"=>"Ѓ", 1026 "ђ"=>"Ђ","ё"=>"Ё","ѐ"=>"Ѐ","я"=>"Я","ю"=>"Ю","э"=>"Э","ь"=>"Ь","ы"=>"Ы","ъ"=>"Ъ","щ"=>"Щ", 1027 "ш"=>"Ш","ч"=>"Ч","ц"=>"Ц","х"=>"Х","ф"=>"Ф","у"=>"У","т"=>"Т","с"=>"С","р"=>"Р","п"=>"П", 1028 "о"=>"О","н"=>"Н","м"=>"М","л"=>"Л","к"=>"К","й"=>"Й","и"=>"И","з"=>"З","ж"=>"Ж","е"=>"Е", 1029 "д"=>"Д","г"=>"Г","в"=>"В","б"=>"Б","а"=>"А","ϵ"=>"Ε","ϲ"=>"Σ","ϱ"=>"Ρ","ϰ"=>"Κ","ϯ"=>"Ϯ", 1030 "ϭ"=>"Ϭ","ϫ"=>"Ϫ","ϩ"=>"Ϩ","ϧ"=>"Ϧ","ϥ"=>"Ϥ","ϣ"=>"Ϣ","ϡ"=>"Ϡ","ϟ"=>"Ϟ","ϝ"=>"Ϝ","ϛ"=>"Ϛ", 1031 "ϙ"=>"Ϙ","ϖ"=>"Π","ϕ"=>"Φ","ϑ"=>"Θ","ϐ"=>"Β","ώ"=>"Ώ","ύ"=>"Ύ","ό"=>"Ό","ϋ"=>"Ϋ","ϊ"=>"Ϊ", 1032 "ω"=>"Ω","ψ"=>"Ψ","χ"=>"Χ","φ"=>"Φ","υ"=>"Υ","τ"=>"Τ","σ"=>"Σ","ς"=>"Σ","ρ"=>"Ρ","π"=>"Π", 1033 "ο"=>"Ο","ξ"=>"Ξ","ν"=>"Ν","μ"=>"Μ","λ"=>"Λ","κ"=>"Κ","ι"=>"Ι","θ"=>"Θ","η"=>"Η","ζ"=>"Ζ", 1034 "ε"=>"Ε","δ"=>"Δ","γ"=>"Γ","β"=>"Β","α"=>"Α","ί"=>"Ί","ή"=>"Ή","έ"=>"Έ","ά"=>"Ά","ʒ"=>"Ʒ", 1035 "ʋ"=>"Ʋ","ʊ"=>"Ʊ","ʈ"=>"Ʈ","ʃ"=>"Ʃ","ʀ"=>"Ʀ","ɵ"=>"Ɵ","ɲ"=>"Ɲ","ɯ"=>"Ɯ","ɩ"=>"Ɩ","ɨ"=>"Ɨ", 1036 "ɣ"=>"Ɣ","ɛ"=>"Ɛ","ə"=>"Ə","ɗ"=>"Ɗ","ɖ"=>"Ɖ","ɔ"=>"Ɔ","ɓ"=>"Ɓ","ȳ"=>"Ȳ","ȱ"=>"Ȱ","ȯ"=>"Ȯ", 1037 "ȭ"=>"Ȭ","ȫ"=>"Ȫ","ȩ"=>"Ȩ","ȧ"=>"Ȧ","ȥ"=>"Ȥ","ȣ"=>"Ȣ","ȟ"=>"Ȟ","ȝ"=>"Ȝ","ț"=>"Ț","ș"=>"Ș", 1038 "ȗ"=>"Ȗ","ȕ"=>"Ȕ","ȓ"=>"Ȓ","ȑ"=>"Ȑ","ȏ"=>"Ȏ","ȍ"=>"Ȍ","ȋ"=>"Ȋ","ȉ"=>"Ȉ","ȇ"=>"Ȇ","ȅ"=>"Ȅ", 1039 "ȃ"=>"Ȃ","ȁ"=>"Ȁ","ǿ"=>"Ǿ","ǽ"=>"Ǽ","ǻ"=>"Ǻ","ǹ"=>"Ǹ","ǵ"=>"Ǵ","dz"=>"Dz","ǯ"=>"Ǯ","ǭ"=>"Ǭ", 1040 "ǫ"=>"Ǫ","ǩ"=>"Ǩ","ǧ"=>"Ǧ","ǥ"=>"Ǥ","ǣ"=>"Ǣ","ǡ"=>"Ǡ","ǟ"=>"Ǟ","ǝ"=>"Ǝ","ǜ"=>"Ǜ","ǚ"=>"Ǚ", 1041 "ǘ"=>"Ǘ","ǖ"=>"Ǖ","ǔ"=>"Ǔ","ǒ"=>"Ǒ","ǐ"=>"Ǐ","ǎ"=>"Ǎ","nj"=>"Nj","lj"=>"Lj","dž"=>"Dž","ƿ"=>"Ƿ", 1042 "ƽ"=>"Ƽ","ƹ"=>"Ƹ","ƶ"=>"Ƶ","ƴ"=>"Ƴ","ư"=>"Ư","ƭ"=>"Ƭ","ƨ"=>"Ƨ","ƥ"=>"Ƥ","ƣ"=>"Ƣ","ơ"=>"Ơ", 1043 "ƞ"=>"Ƞ","ƙ"=>"Ƙ","ƕ"=>"Ƕ","ƒ"=>"Ƒ","ƌ"=>"Ƌ","ƈ"=>"Ƈ","ƅ"=>"Ƅ","ƃ"=>"Ƃ","ſ"=>"S","ž"=>"Ž", 1044 "ż"=>"Ż","ź"=>"Ź","ŷ"=>"Ŷ","ŵ"=>"Ŵ","ų"=>"Ų","ű"=>"Ű","ů"=>"Ů","ŭ"=>"Ŭ","ū"=>"Ū","ũ"=>"Ũ", 1045 "ŧ"=>"Ŧ","ť"=>"Ť","ţ"=>"Ţ","š"=>"Š","ş"=>"Ş","ŝ"=>"Ŝ","ś"=>"Ś","ř"=>"Ř","ŗ"=>"Ŗ","ŕ"=>"Ŕ", 1046 "œ"=>"Œ","ő"=>"Ő","ŏ"=>"Ŏ","ō"=>"Ō","ŋ"=>"Ŋ","ň"=>"Ň","ņ"=>"Ņ","ń"=>"Ń","ł"=>"Ł","ŀ"=>"Ŀ", 1047 "ľ"=>"Ľ","ļ"=>"Ļ","ĺ"=>"Ĺ","ķ"=>"Ķ","ĵ"=>"Ĵ","ij"=>"IJ","ı"=>"I","į"=>"Į","ĭ"=>"Ĭ","ī"=>"Ī", 1048 "ĩ"=>"Ĩ","ħ"=>"Ħ","ĥ"=>"Ĥ","ģ"=>"Ģ","ġ"=>"Ġ","ğ"=>"Ğ","ĝ"=>"Ĝ","ě"=>"Ě","ę"=>"Ę","ė"=>"Ė", 1049 "ĕ"=>"Ĕ","ē"=>"Ē","đ"=>"Đ","ď"=>"Ď","č"=>"Č","ċ"=>"Ċ","ĉ"=>"Ĉ","ć"=>"Ć","ą"=>"Ą","ă"=>"Ă", 1050 "ā"=>"Ā","ÿ"=>"Ÿ","þ"=>"Þ","ý"=>"Ý","ü"=>"Ü","û"=>"Û","ú"=>"Ú","ù"=>"Ù","ø"=>"Ø","ö"=>"Ö", 1051 "õ"=>"Õ","ô"=>"Ô","ó"=>"Ó","ò"=>"Ò","ñ"=>"Ñ","ð"=>"Ð","ï"=>"Ï","î"=>"Î","í"=>"Í","ì"=>"Ì", 1052 "ë"=>"Ë","ê"=>"Ê","é"=>"É","è"=>"È","ç"=>"Ç","æ"=>"Æ","å"=>"Å","ä"=>"Ä","ã"=>"Ã","â"=>"Â", 1053 "á"=>"Á","à"=>"À","µ"=>"Μ","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T", 1054 "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J", 1055 "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A" 1056 ); 1057 1058 /** 1059 * UTF-8 Case lookup table 1060 * 1061 * This lookuptable defines the lower case letters to their corresponding 1062 * upper case letter in UTF-8 1063 * 1064 * @author Andreas Gohr <andi@splitbrain.org> 1065 */ 1066 global $UTF8_UPPER_TO_LOWER; 1067 if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array ( 1068 "Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t","S"=>"s","R"=>"r","Q"=>"q", 1069 "P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j","I"=>"i","H"=>"h","G"=>"g", 1070 "F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a","ῼ"=>"ῳ","Ῥ"=>"ῥ","Ῡ"=>"ῡ","Ῑ"=>"ῑ", 1071 "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ", 1072 "ᾫ"=>"ᾣ","ᾪ"=>"ᾢ","ᾩ"=>"ᾡ","ᾟ"=>"ᾗ","ᾞ"=>"ᾖ","ᾝ"=>"ᾕ","ᾜ"=>"ᾔ","ᾛ"=>"ᾓ","ᾚ"=>"ᾒ","ᾙ"=>"ᾑ", 1073 "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ", 1074 "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ", 1075 "Ὲ"=>"ὲ","Ά"=>"ά","Ὰ"=>"ὰ","Ὧ"=>"ὧ","Ὦ"=>"ὦ","Ὥ"=>"ὥ","Ὤ"=>"ὤ","Ὣ"=>"ὣ","Ὢ"=>"ὢ","Ὡ"=>"ὡ", 1076 "Ὗ"=>"ὗ","Ὕ"=>"ὕ","Ὓ"=>"ὓ","Ὑ"=>"ὑ","Ὅ"=>"ὅ","Ὄ"=>"ὄ","Ὃ"=>"ὃ","Ὂ"=>"ὂ","Ὁ"=>"ὁ","Ὀ"=>"ὀ", 1077 "Ἷ"=>"ἷ","Ἶ"=>"ἶ","Ἵ"=>"ἵ","Ἴ"=>"ἴ","Ἳ"=>"ἳ","Ἲ"=>"ἲ","Ἱ"=>"ἱ","Ἰ"=>"ἰ","Ἧ"=>"ἧ","Ἦ"=>"ἦ", 1078 "Ἥ"=>"ἥ","Ἤ"=>"ἤ","Ἣ"=>"ἣ","Ἢ"=>"ἢ","Ἡ"=>"ἡ","Ἕ"=>"ἕ","Ἔ"=>"ἔ","Ἓ"=>"ἓ","Ἒ"=>"ἒ","Ἑ"=>"ἑ", 1079 "Ἐ"=>"ἐ","Ἇ"=>"ἇ","Ἆ"=>"ἆ","Ἅ"=>"ἅ","Ἄ"=>"ἄ","Ἃ"=>"ἃ","Ἂ"=>"ἂ","Ἁ"=>"ἁ","Ἀ"=>"ἀ","Ỹ"=>"ỹ", 1080 "Ỷ"=>"ỷ","Ỵ"=>"ỵ","Ỳ"=>"ỳ","Ự"=>"ự","Ữ"=>"ữ","Ử"=>"ử","Ừ"=>"ừ","Ứ"=>"ứ","Ủ"=>"ủ","Ụ"=>"ụ", 1081 "Ợ"=>"ợ","Ỡ"=>"ỡ","Ở"=>"ở","Ờ"=>"ờ","Ớ"=>"ớ","Ộ"=>"ộ","Ỗ"=>"ỗ","Ổ"=>"ổ","Ồ"=>"ồ","Ố"=>"ố", 1082 "Ỏ"=>"ỏ","Ọ"=>"ọ","Ị"=>"ị","Ỉ"=>"ỉ","Ệ"=>"ệ","Ễ"=>"ễ","Ể"=>"ể","Ề"=>"ề","Ế"=>"ế","Ẽ"=>"ẽ", 1083 "Ẻ"=>"ẻ","Ẹ"=>"ẹ","Ặ"=>"ặ","Ẵ"=>"ẵ","Ẳ"=>"ẳ","Ằ"=>"ằ","Ắ"=>"ắ","Ậ"=>"ậ","Ẫ"=>"ẫ","Ẩ"=>"ẩ", 1084 "Ầ"=>"ầ","Ấ"=>"ấ","Ả"=>"ả","Ạ"=>"ạ","Ṡ"=>"ẛ","Ẕ"=>"ẕ","Ẓ"=>"ẓ","Ẑ"=>"ẑ","Ẏ"=>"ẏ","Ẍ"=>"ẍ", 1085 "Ẋ"=>"ẋ","Ẉ"=>"ẉ","Ẇ"=>"ẇ","Ẅ"=>"ẅ","Ẃ"=>"ẃ","Ẁ"=>"ẁ","Ṿ"=>"ṿ","Ṽ"=>"ṽ","Ṻ"=>"ṻ","Ṹ"=>"ṹ", 1086 "Ṷ"=>"ṷ","Ṵ"=>"ṵ","Ṳ"=>"ṳ","Ṱ"=>"ṱ","Ṯ"=>"ṯ","Ṭ"=>"ṭ","Ṫ"=>"ṫ","Ṩ"=>"ṩ","Ṧ"=>"ṧ","Ṥ"=>"ṥ", 1087 "Ṣ"=>"ṣ","Ṡ"=>"ṡ","Ṟ"=>"ṟ","Ṝ"=>"ṝ","Ṛ"=>"ṛ","Ṙ"=>"ṙ","Ṗ"=>"ṗ","Ṕ"=>"ṕ","Ṓ"=>"ṓ","Ṑ"=>"ṑ", 1088 "Ṏ"=>"ṏ","Ṍ"=>"ṍ","Ṋ"=>"ṋ","Ṉ"=>"ṉ","Ṇ"=>"ṇ","Ṅ"=>"ṅ","Ṃ"=>"ṃ","Ṁ"=>"ṁ","Ḿ"=>"ḿ","Ḽ"=>"ḽ", 1089 "Ḻ"=>"ḻ","Ḹ"=>"ḹ","Ḷ"=>"ḷ","Ḵ"=>"ḵ","Ḳ"=>"ḳ","Ḱ"=>"ḱ","Ḯ"=>"ḯ","Ḭ"=>"ḭ","Ḫ"=>"ḫ","Ḩ"=>"ḩ", 1090 "Ḧ"=>"ḧ","Ḥ"=>"ḥ","Ḣ"=>"ḣ","Ḡ"=>"ḡ","Ḟ"=>"ḟ","Ḝ"=>"ḝ","Ḛ"=>"ḛ","Ḙ"=>"ḙ","Ḗ"=>"ḗ","Ḕ"=>"ḕ", 1091 "Ḓ"=>"ḓ","Ḑ"=>"ḑ","Ḏ"=>"ḏ","Ḍ"=>"ḍ","Ḋ"=>"ḋ","Ḉ"=>"ḉ","Ḇ"=>"ḇ","Ḅ"=>"ḅ","Ḃ"=>"ḃ","Ḁ"=>"ḁ", 1092 "Ֆ"=>"ֆ","Օ"=>"օ","Ք"=>"ք","Փ"=>"փ","Ւ"=>"ւ","Ց"=>"ց","Ր"=>"ր","Տ"=>"տ","Վ"=>"վ","Ս"=>"ս", 1093 "Ռ"=>"ռ","Ջ"=>"ջ","Պ"=>"պ","Չ"=>"չ","Ո"=>"ո","Շ"=>"շ","Ն"=>"ն","Յ"=>"յ","Մ"=>"մ","Ճ"=>"ճ", 1094 "Ղ"=>"ղ","Ձ"=>"ձ","Հ"=>"հ","Կ"=>"կ","Ծ"=>"ծ","Խ"=>"խ","Լ"=>"լ","Ի"=>"ի","Ժ"=>"ժ","Թ"=>"թ", 1095 "Ը"=>"ը","Է"=>"է","Զ"=>"զ","Ե"=>"ե","Դ"=>"դ","Գ"=>"գ","Բ"=>"բ","Ա"=>"ա","Ԏ"=>"ԏ","Ԍ"=>"ԍ", 1096 "Ԋ"=>"ԋ","Ԉ"=>"ԉ","Ԇ"=>"ԇ","Ԅ"=>"ԅ","Ԃ"=>"ԃ","Ԁ"=>"ԁ","Ӹ"=>"ӹ","Ӵ"=>"ӵ","Ӳ"=>"ӳ","Ӱ"=>"ӱ", 1097 "Ӯ"=>"ӯ","Ӭ"=>"ӭ","Ӫ"=>"ӫ","Ө"=>"ө","Ӧ"=>"ӧ","Ӥ"=>"ӥ","Ӣ"=>"ӣ","Ӡ"=>"ӡ","Ӟ"=>"ӟ","Ӝ"=>"ӝ", 1098 "Ӛ"=>"ӛ","Ә"=>"ә","Ӗ"=>"ӗ","Ӕ"=>"ӕ","Ӓ"=>"ӓ","Ӑ"=>"ӑ","Ӎ"=>"ӎ","Ӌ"=>"ӌ","Ӊ"=>"ӊ","Ӈ"=>"ӈ", 1099 "Ӆ"=>"ӆ","Ӄ"=>"ӄ","Ӂ"=>"ӂ","Ҿ"=>"ҿ","Ҽ"=>"ҽ","Һ"=>"һ","Ҹ"=>"ҹ","Ҷ"=>"ҷ","Ҵ"=>"ҵ","Ҳ"=>"ҳ", 1100 "Ұ"=>"ұ","Ү"=>"ү","Ҭ"=>"ҭ","Ҫ"=>"ҫ","Ҩ"=>"ҩ","Ҧ"=>"ҧ","Ҥ"=>"ҥ","Ң"=>"ң","Ҡ"=>"ҡ","Ҟ"=>"ҟ", 1101 "Ҝ"=>"ҝ","Қ"=>"қ","Ҙ"=>"ҙ","Җ"=>"җ","Ҕ"=>"ҕ","Ғ"=>"ғ","Ґ"=>"ґ","Ҏ"=>"ҏ","Ҍ"=>"ҍ","Ҋ"=>"ҋ", 1102 "Ҁ"=>"ҁ","Ѿ"=>"ѿ","Ѽ"=>"ѽ","Ѻ"=>"ѻ","Ѹ"=>"ѹ","Ѷ"=>"ѷ","Ѵ"=>"ѵ","Ѳ"=>"ѳ","Ѱ"=>"ѱ","Ѯ"=>"ѯ", 1103 "Ѭ"=>"ѭ","Ѫ"=>"ѫ","Ѩ"=>"ѩ","Ѧ"=>"ѧ","Ѥ"=>"ѥ","Ѣ"=>"ѣ","Ѡ"=>"ѡ","Џ"=>"џ","Ў"=>"ў","Ѝ"=>"ѝ", 1104 "Ќ"=>"ќ","Ћ"=>"ћ","Њ"=>"њ","Љ"=>"љ","Ј"=>"ј","Ї"=>"ї","І"=>"і","Ѕ"=>"ѕ","Є"=>"є","Ѓ"=>"ѓ", 1105 "Ђ"=>"ђ","Ё"=>"ё","Ѐ"=>"ѐ","Я"=>"я","Ю"=>"ю","Э"=>"э","Ь"=>"ь","Ы"=>"ы","Ъ"=>"ъ","Щ"=>"щ", 1106 "Ш"=>"ш","Ч"=>"ч","Ц"=>"ц","Х"=>"х","Ф"=>"ф","У"=>"у","Т"=>"т","С"=>"с","Р"=>"р","П"=>"п", 1107 "О"=>"о","Н"=>"н","М"=>"м","Л"=>"л","К"=>"к","Й"=>"й","И"=>"и","З"=>"з","Ж"=>"ж","Е"=>"е", 1108 "Д"=>"д","Г"=>"г","В"=>"в","Б"=>"б","А"=>"а","Ε"=>"ϵ","Σ"=>"ϲ","Ρ"=>"ϱ","Κ"=>"ϰ","Ϯ"=>"ϯ", 1109 "Ϭ"=>"ϭ","Ϫ"=>"ϫ","Ϩ"=>"ϩ","Ϧ"=>"ϧ","Ϥ"=>"ϥ","Ϣ"=>"ϣ","Ϡ"=>"ϡ","Ϟ"=>"ϟ","Ϝ"=>"ϝ","Ϛ"=>"ϛ", 1110 "Ϙ"=>"ϙ","Π"=>"ϖ","Φ"=>"ϕ","Θ"=>"ϑ","Β"=>"ϐ","Ώ"=>"ώ","Ύ"=>"ύ","Ό"=>"ό","Ϋ"=>"ϋ","Ϊ"=>"ϊ", 1111 "Ω"=>"ω","Ψ"=>"ψ","Χ"=>"χ","Φ"=>"φ","Υ"=>"υ","Τ"=>"τ","Σ"=>"σ","Σ"=>"ς","Ρ"=>"ρ","Π"=>"π", 1112 "Ο"=>"ο","Ξ"=>"ξ","Ν"=>"ν","Μ"=>"μ","Λ"=>"λ","Κ"=>"κ","Ι"=>"ι","Θ"=>"θ","Η"=>"η","Ζ"=>"ζ", 1113 "Ε"=>"ε","Δ"=>"δ","Γ"=>"γ","Β"=>"β","Α"=>"α","Ί"=>"ί","Ή"=>"ή","Έ"=>"έ","Ά"=>"ά","Ʒ"=>"ʒ", 1114 "Ʋ"=>"ʋ","Ʊ"=>"ʊ","Ʈ"=>"ʈ","Ʃ"=>"ʃ","Ʀ"=>"ʀ","Ɵ"=>"ɵ","Ɲ"=>"ɲ","Ɯ"=>"ɯ","Ɩ"=>"ɩ","Ɨ"=>"ɨ", 1115 "Ɣ"=>"ɣ","Ɛ"=>"ɛ","Ə"=>"ə","Ɗ"=>"ɗ","Ɖ"=>"ɖ","Ɔ"=>"ɔ","Ɓ"=>"ɓ","Ȳ"=>"ȳ","Ȱ"=>"ȱ","Ȯ"=>"ȯ", 1116 "Ȭ"=>"ȭ","Ȫ"=>"ȫ","Ȩ"=>"ȩ","Ȧ"=>"ȧ","Ȥ"=>"ȥ","Ȣ"=>"ȣ","Ȟ"=>"ȟ","Ȝ"=>"ȝ","Ț"=>"ț","Ș"=>"ș", 1117 "Ȗ"=>"ȗ","Ȕ"=>"ȕ","Ȓ"=>"ȓ","Ȑ"=>"ȑ","Ȏ"=>"ȏ","Ȍ"=>"ȍ","Ȋ"=>"ȋ","Ȉ"=>"ȉ","Ȇ"=>"ȇ","Ȅ"=>"ȅ", 1118 "Ȃ"=>"ȃ","Ȁ"=>"ȁ","Ǿ"=>"ǿ","Ǽ"=>"ǽ","Ǻ"=>"ǻ","Ǹ"=>"ǹ","Ǵ"=>"ǵ","Dz"=>"dz","Ǯ"=>"ǯ","Ǭ"=>"ǭ", 1119 "Ǫ"=>"ǫ","Ǩ"=>"ǩ","Ǧ"=>"ǧ","Ǥ"=>"ǥ","Ǣ"=>"ǣ","Ǡ"=>"ǡ","Ǟ"=>"ǟ","Ǝ"=>"ǝ","Ǜ"=>"ǜ","Ǚ"=>"ǚ", 1120 "Ǘ"=>"ǘ","Ǖ"=>"ǖ","Ǔ"=>"ǔ","Ǒ"=>"ǒ","Ǐ"=>"ǐ","Ǎ"=>"ǎ","Nj"=>"nj","Lj"=>"lj","Dž"=>"dž","Ƿ"=>"ƿ", 1121 "Ƽ"=>"ƽ","Ƹ"=>"ƹ","Ƶ"=>"ƶ","Ƴ"=>"ƴ","Ư"=>"ư","Ƭ"=>"ƭ","Ƨ"=>"ƨ","Ƥ"=>"ƥ","Ƣ"=>"ƣ","Ơ"=>"ơ", 1122 "Ƞ"=>"ƞ","Ƙ"=>"ƙ","Ƕ"=>"ƕ","Ƒ"=>"ƒ","Ƌ"=>"ƌ","Ƈ"=>"ƈ","Ƅ"=>"ƅ","Ƃ"=>"ƃ","S"=>"ſ","Ž"=>"ž", 1123 "Ż"=>"ż","Ź"=>"ź","Ŷ"=>"ŷ","Ŵ"=>"ŵ","Ų"=>"ų","Ű"=>"ű","Ů"=>"ů","Ŭ"=>"ŭ","Ū"=>"ū","Ũ"=>"ũ", 1124 "Ŧ"=>"ŧ","Ť"=>"ť","Ţ"=>"ţ","Š"=>"š","Ş"=>"ş","Ŝ"=>"ŝ","Ś"=>"ś","Ř"=>"ř","Ŗ"=>"ŗ","Ŕ"=>"ŕ", 1125 "Œ"=>"œ","Ő"=>"ő","Ŏ"=>"ŏ","Ō"=>"ō","Ŋ"=>"ŋ","Ň"=>"ň","Ņ"=>"ņ","Ń"=>"ń","Ł"=>"ł","Ŀ"=>"ŀ", 1126 "Ľ"=>"ľ","Ļ"=>"ļ","Ĺ"=>"ĺ","Ķ"=>"ķ","Ĵ"=>"ĵ","IJ"=>"ij","I"=>"ı","Į"=>"į","Ĭ"=>"ĭ","Ī"=>"ī", 1127 "Ĩ"=>"ĩ","Ħ"=>"ħ","Ĥ"=>"ĥ","Ģ"=>"ģ","Ġ"=>"ġ","Ğ"=>"ğ","Ĝ"=>"ĝ","Ě"=>"ě","Ę"=>"ę","Ė"=>"ė", 1128 "Ĕ"=>"ĕ","Ē"=>"ē","Đ"=>"đ","Ď"=>"ď","Č"=>"č","Ċ"=>"ċ","Ĉ"=>"ĉ","Ć"=>"ć","Ą"=>"ą","Ă"=>"ă", 1129 "Ā"=>"ā","Ÿ"=>"ÿ","Þ"=>"þ","Ý"=>"ý","Ü"=>"ü","Û"=>"û","Ú"=>"ú","Ù"=>"ù","Ø"=>"ø","Ö"=>"ö", 1130 "Õ"=>"õ","Ô"=>"ô","Ó"=>"ó","Ò"=>"ò","Ñ"=>"ñ","Ð"=>"ð","Ï"=>"ï","Î"=>"î","Í"=>"í","Ì"=>"ì", 1131 "Ë"=>"ë","Ê"=>"ê","É"=>"é","È"=>"è","Ç"=>"ç","Æ"=>"æ","Å"=>"å","Ä"=>"ä","Ã"=>"ã","Â"=>"â", 1132 "Á"=>"á","À"=>"à","Μ"=>"µ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t", 1133 "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j", 1134 "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a" 1135 ); 1136}; // end of case lookup tables 1137 1138/** 1139 * UTF-8 lookup table for lower case accented letters 1140 * 1141 * This lookuptable defines replacements for accented characters from the ASCII-7 1142 * range. This are lower case letters only. 1143 * 1144 * @author Andreas Gohr <andi@splitbrain.org> 1145 * @see utf8_deaccent() 1146 */ 1147global $UTF8_LOWER_ACCENTS; 1148if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array( 1149 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 1150 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 1151 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 1152 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 1153 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 1154 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 1155 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 1156 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 1157 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 1158 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 1159 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 1160 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 1161 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 1162 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 1163 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', 1164); 1165 1166/** 1167 * UTF-8 lookup table for upper case accented letters 1168 * 1169 * This lookuptable defines replacements for accented characters from the ASCII-7 1170 * range. This are upper case letters only. 1171 * 1172 * @author Andreas Gohr <andi@splitbrain.org> 1173 * @see utf8_deaccent() 1174 */ 1175global $UTF8_UPPER_ACCENTS; 1176if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array( 1177 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', 1178 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 1179 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', 1180 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', 1181 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', 1182 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', 1183 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', 1184 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', 1185 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 1186 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', 1187 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', 1188 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 1189 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 1190 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', 1191 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', 1192); 1193 1194/** 1195 * UTF-8 array of common special characters 1196 * 1197 * This array should contain all special characters (not a letter or digit) 1198 * defined in the various local charsets - it's not a complete list of non-alphanum 1199 * characters in UTF-8. It's not perfect but should match most cases of special 1200 * chars. 1201 * 1202 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 1203 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 1204 * 1205 * @author Andreas Gohr <andi@splitbrain.org> 1206 * @see utf8_stripspecials() 1207 */ 1208global $UTF8_SPECIAL_CHARS; 1209if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array( 1210 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 1211 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 1212 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 1213 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 1214 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 1215 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 1216 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 1217 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 1218 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 1219 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 1220 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 1221 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 1222 0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 1223 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 1224 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 1225 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 1226 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 1227 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 1228 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 1229 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 1230 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 1231 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 1232 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 1233 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 1234 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 1235 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 1236 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 1237 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 1238 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 1239 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 1240 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 1241 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 1242 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 1243 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 1244 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 1245 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 1246 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 1247 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 1248 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 1249 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 1250 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 1251 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 1252 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 1253 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 1254 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 1255 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 1256 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017, 1257 0x3018, 0x3019, 0x301a, 0x301b, 0x3036, 1258 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 1259 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 1260 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 1261 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 1262 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 1263 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 1264 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c, 1265 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b, 1266 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 1267 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea, 1268 0xffeb, 0xffec, 0xffed, 0xffee, 1269 0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703, 1270 0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b, 1271 0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713, 1272 0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b, 1273 0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf, 1274); 1275 1276// utf8 version of above data 1277global $UTF8_SPECIAL_CHARS2; 1278if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 = 1279 "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~ �'. 1280 '� ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½�'. 1281 '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. 1282 '�ׁׂ׃׳״،؛؟ـًٌٍَُِّْ٪฿–—―‗‘’‚“”�'. 1283 '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'. 1284 '⇐⇑⇒⇓⇔∀∂∃∅∆∇∈∉∋∏∑−∕∗∙√∝∞∠∧∨�'. 1285 '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'. 1286 '��┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠'. 1287 '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌▐░▒▓■▲▼◆◊●�'. 1288 '�★☎☛☞♠♣♥♦✁✂✃✄✆✇✈✉✌✍✎✏✐✑✒✓✔✕�'. 1289 '��✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✩✪✫✬✭✮✯✰✱'. 1290 '✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋�'. 1291 '�❏❐❑❒❖❘❙❚❛❜❝❞❡❢❣❤❥❦❧❿➉➓➔➘➙➚�'. 1292 '��➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶'. 1293 '➷➸➹➺➻➼➽➾'. 1294 ' 、。〃〈〉《》「」『』【】〒〔〕〖〗〘〙〚〛〶'. 1295 '�'. 1296 '�ﹼﹽ'. 1297 '!"#$%&'()*+,-./:;<=>?@[\]^`{|}~'. 1298 '⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○'. 1299 ''. 1300 ' '; 1301 1302/** 1303 * Romanization lookup table 1304 * 1305 * This lookup tables provides a way to transform strings written in a language 1306 * different from the ones based upon latin letters into plain ASCII. 1307 * 1308 * Please note: this is not a scientific transliteration table. It only works 1309 * oneway from nonlatin to ASCII and it works by simple character replacement 1310 * only. Specialities of each language are not supported. 1311 * 1312 * @author Andreas Gohr <andi@splitbrain.org> 1313 * @author Vitaly Blokhin <vitinfo@vitn.com> 1314 * @link http://www.uconv.com/translit.htm 1315 * @author Bisqwit <bisqwit@iki.fi> 1316 * @link http://kanjidict.stc.cx/hiragana.php?src=2 1317 * @link http://www.translatum.gr/converter/greek-transliteration.htm 1318 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 1319 * @link http://www.btranslations.com/resources/romanization/korean.asp 1320 * @author Arthit Suriyawongkul <arthit@gmail.com> 1321 * @author Denis Scheither <amorphis@uni-bremen.de> 1322 * @author Eivind Morland <eivind.morland@gmail.com> 1323 */ 1324global $UTF8_ROMANIZATION; 1325if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array( 1326 // scandinavian - differs from what we do in deaccent 1327 'å'=>'a','Å'=>'A','ä'=>'a','Ä'=>'A','ö'=>'o','Ö'=>'O', 1328 1329 //russian cyrillic 1330 'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G', 1331 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh', 1332 'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K', 1333 'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Н'=>'N','о'=>'o','О'=>'O', 1334 'п'=>'p','П'=>'P','р'=>'r','Р'=>'R','с'=>'s','С'=>'S','т'=>'t','Т'=>'T', 1335 'у'=>'u','У'=>'U','ф'=>'f','Ф'=>'F','х'=>'x','Х'=>'X','ц'=>'c','Ц'=>'C', 1336 'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'sch','Щ'=>'Sch','ъ'=>'', 1337 'Ъ'=>'','ы'=>'y','Ы'=>'Y','ь'=>'','Ь'=>'','э'=>'eh','Э'=>'Eh','ю'=>'ju', 1338 'Ю'=>'Ju','я'=>'ja','Я'=>'Ja', 1339 // Ukrainian cyrillic 1340 'Ґ'=>'Gh','ґ'=>'gh','Є'=>'Je','є'=>'je','І'=>'I','і'=>'i','Ї'=>'Ji','ї'=>'ji', 1341 // Georgian 1342 'ა'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th', 1343 'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','ო'=>'o','პ'=>'p','ჟ'=>'zh', 1344 'რ'=>'r','ს'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ქ'=>'kh','ღ'=>'gh','ყ'=>'q', 1345 'შ'=>'sh','ჩ'=>'ch','ც'=>'c','ძ'=>'dh','წ'=>'w','ჭ'=>'j','ხ'=>'x','ჯ'=>'jh', 1346 'ჰ'=>'xh', 1347 //Sanskrit 1348 'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry', 1349 'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','ए'=>'e','ऐ'=>'ay','ओ'=>'o','औ'=>'aw', 1350 'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh', 1351 'च'=>'c','छ'=>'ch','ज'=>'j','झ'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh', 1352 'ड'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','थ'=>'th','द'=>'d','ध'=>'dh', 1353 'न'=>'n','प'=>'p','फ'=>'ph','ब'=>'b','भ'=>'bh','म'=>'m','य'=>'z','र'=>'r', 1354 'ल'=>'l','व'=>'v','श'=>'sh','ष'=>'sqh','स'=>'s','ह'=>'x', 1355 //Sanskrit diacritics 1356 'Ā'=>'A','Ī'=>'I','Ū'=>'U','Ṛ'=>'R','Ṝ'=>'R','Ṅ'=>'N','Ñ'=>'N','Ṭ'=>'T', 1357 'Ḍ'=>'D','Ṇ'=>'N','Ś'=>'S','Ṣ'=>'S','Ṁ'=>'M','Ṃ'=>'M','Ḥ'=>'H','Ḷ'=>'L','Ḹ'=>'L', 1358 'ā'=>'a','ī'=>'i','ū'=>'u','ṛ'=>'r','ṝ'=>'r','ṅ'=>'n','ñ'=>'n','ṭ'=>'t', 1359 'ḍ'=>'d','ṇ'=>'n','ś'=>'s','ṣ'=>'s','ṁ'=>'m','ṃ'=>'m','ḥ'=>'h','ḷ'=>'l','ḹ'=>'l', 1360 //Hebrew 1361 'א'=>'a', 'ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'kh','ט'=>'th', 1362 'י'=>'y','ך'=>'h','כ'=>'k','ל'=>'l','ם'=>'m','מ'=>'m','ן'=>'n','נ'=>'n', 1363 'ס'=>'s','ע'=>'ah','ף'=>'f','פ'=>'p','ץ'=>'c','צ'=>'c','ק'=>'q','ר'=>'r', 1364 'ש'=>'sh','ת'=>'t', 1365 //Arabic 1366 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'g','ح'=>'xh','خ'=>'x','د'=>'d', 1367 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s\'','ض'=>'d\'', 1368 'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k', 1369 'ل'=>'l','م'=>'m','ن'=>'n','ه'=>'x\'','و'=>'u','ي'=>'i', 1370 1371 // Japanese characters (last update: 2008-05-09) 1372 1373 // Japanese hiragana 1374 1375 // 3 character syllables, っ doubles the consonant after 1376 'っちゃ'=>'ccha','っちぇ'=>'cche','っちょ'=>'ccho','っちゅ'=>'cchu', 1377 'っびゃ'=>'bbya','っびぇ'=>'bbye','っびぃ'=>'bbyi','っびょ'=>'bbyo','っびゅ'=>'bbyu', 1378 'っぴゃ'=>'ppya','っぴぇ'=>'ppye','っぴぃ'=>'ppyi','っぴょ'=>'ppyo','っぴゅ'=>'ppyu', 1379 'っちゃ'=>'ccha','っちぇ'=>'cche','っち'=>'cchi','っちょ'=>'ccho','っちゅ'=>'cchu', 1380 // 'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu', 1381 'っきゃ'=>'kkya','っきぇ'=>'kkye','っきぃ'=>'kkyi','っきょ'=>'kkyo','っきゅ'=>'kkyu', 1382 'っぎゃ'=>'ggya','っぎぇ'=>'ggye','っぎぃ'=>'ggyi','っぎょ'=>'ggyo','っぎゅ'=>'ggyu', 1383 'っみゃ'=>'mmya','っみぇ'=>'mmye','っみぃ'=>'mmyi','っみょ'=>'mmyo','っみゅ'=>'mmyu', 1384 'っにゃ'=>'nnya','っにぇ'=>'nnye','っにぃ'=>'nnyi','っにょ'=>'nnyo','っにゅ'=>'nnyu', 1385 'っりゃ'=>'rrya','っりぇ'=>'rrye','っりぃ'=>'rryi','っりょ'=>'rryo','っりゅ'=>'rryu', 1386 'っしゃ'=>'ssha','っしぇ'=>'sshe','っし'=>'sshi','っしょ'=>'ssho','っしゅ'=>'sshu', 1387 1388 // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway) 1389 'んあ'=>'n_a','んえ'=>'n_e','んい'=>'n_i','んお'=>'n_o','んう'=>'n_u', 1390 'んや'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu', 1391 1392 // 2 character syllables - normal 1393 'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo', 1394 'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu', 1395 'ひゃ'=>'hya','ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu', 1396 'びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu', 1397 'ぴゃ'=>'pya','ぴぇ'=>'pye','ぴぃ'=>'pyi','ぴょ'=>'pyo','ぴゅ'=>'pyu', 1398 'きゃ'=>'kya','きぇ'=>'kye','きぃ'=>'kyi','きょ'=>'kyo','きゅ'=>'kyu', 1399 'ぎゃ'=>'gya','ぎぇ'=>'gye','ぎぃ'=>'gyi','ぎょ'=>'gyo','ぎゅ'=>'gyu', 1400 'みゃ'=>'mya','みぇ'=>'mye','みぃ'=>'myi','みょ'=>'myo','みゅ'=>'myu', 1401 'にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu', 1402 'りゃ'=>'rya','りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu', 1403 'しゃ'=>'sha','しぇ'=>'she','し'=>'shi','しょ'=>'sho','しゅ'=>'shu', 1404 'じゃ'=>'ja','じぇ'=>'je','じょ'=>'jo','じゅ'=>'ju', 1405 'うぇ'=>'we','うぃ'=>'wi', 1406 'いぇ'=>'ye', 1407 1408 // 2 character syllables, っ doubles the consonant after 1409 'っば'=>'bba','っべ'=>'bbe','っび'=>'bbi','っぼ'=>'bbo','っぶ'=>'bbu', 1410 'っぱ'=>'ppa','っぺ'=>'ppe','っぴ'=>'ppi','っぽ'=>'ppo','っぷ'=>'ppu', 1411 'った'=>'tta','って'=>'tte','っち'=>'cchi','っと'=>'tto','っつ'=>'ttsu', 1412 'っだ'=>'dda','っで'=>'dde','っぢ'=>'ddi','っど'=>'ddo','っづ'=>'ddu', 1413 'っが'=>'gga','っげ'=>'gge','っぎ'=>'ggi','っご'=>'ggo','っぐ'=>'ggu', 1414 'っか'=>'kka','っけ'=>'kke','っき'=>'kki','っこ'=>'kko','っく'=>'kku', 1415 'っま'=>'mma','っめ'=>'mme','っみ'=>'mmi','っも'=>'mmo','っむ'=>'mmu', 1416 'っな'=>'nna','っね'=>'nne','っに'=>'nni','っの'=>'nno','っぬ'=>'nnu', 1417 'っら'=>'rra','っれ'=>'rre','っり'=>'rri','っろ'=>'rro','っる'=>'rru', 1418 'っさ'=>'ssa','っせ'=>'sse','っし'=>'sshi','っそ'=>'sso','っす'=>'ssu', 1419 'っざ'=>'zza','っぜ'=>'zze','っじ'=>'jji','っぞ'=>'zzo','っず'=>'zzu', 1420 1421 // 1 character syllabels 1422 'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ん'=>'n', 1423 'は'=>'ha','へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'fu', 1424 'ば'=>'ba','べ'=>'be','び'=>'bi','ぼ'=>'bo','ぶ'=>'bu', 1425 'ぱ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po','ぷ'=>'pu', 1426 'た'=>'ta','て'=>'te','ち'=>'chi','と'=>'to','つ'=>'tsu', 1427 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1428 'が'=>'ga','げ'=>'ge','ぎ'=>'gi','ご'=>'go','ぐ'=>'gu', 1429 'か'=>'ka','け'=>'ke','き'=>'ki','こ'=>'ko','く'=>'ku', 1430 'ま'=>'ma','め'=>'me','み'=>'mi','も'=>'mo','む'=>'mu', 1431 'な'=>'na','ね'=>'ne','に'=>'ni','の'=>'no','ぬ'=>'nu', 1432 'ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru', 1433 'さ'=>'sa','せ'=>'se','し'=>'shi','そ'=>'so','す'=>'su', 1434 'わ'=>'wa','を'=>'wo', 1435 'ざ'=>'za','ぜ'=>'ze','じ'=>'ji','ぞ'=>'zo','ず'=>'zu', 1436 'や'=>'ya','よ'=>'yo','ゆ'=>'yu', 1437 // old characters 1438 'ゑ'=>'we','ゐ'=>'wi', 1439 1440 // convert what's left (probably only kicks in when something's missing above) 1441 // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u', 1442 // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu', 1443 1444 // never seen one of those (disabled for the moment) 1445 // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu', 1446 // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu', 1447 // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu', 1448 // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu', 1449 // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu', 1450 // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu', 1451 // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu', 1452 // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu', 1453 // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu', 1454 // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu', 1455 // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu', 1456 // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu', 1457 // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu', 1458 // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu', 1459 1460 // 'spare' characters from other romanization systems 1461 // 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1462 // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu', 1463 // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su', 1464 // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu', 1465 //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu', 1466 //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu', 1467 //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu', 1468 //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu', 1469 //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi', 1470 //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju', 1471 1472 1473 // Japanese katakana 1474 1475 // 4 character syllables: ッ doubles the consonant after, ー doubles the vowel before (usualy written with macron, but we don't want that in our URLs) 1476 'ッビャー'=>'bbyaa','ッビェー'=>'bbyee','ッビィー'=>'bbyii','ッビョー'=>'bbyoo','ッビュー'=>'bbyuu', 1477 'ッピャー'=>'ppyaa','ッピェー'=>'ppyee','ッピィー'=>'ppyii','ッピョー'=>'ppyoo','ッピュー'=>'ppyuu', 1478 'ッキャー'=>'kkyaa','ッキェー'=>'kkyee','ッキィー'=>'kkyii','ッキョー'=>'kkyoo','ッキュー'=>'kkyuu', 1479 'ッギャー'=>'ggyaa','ッギェー'=>'ggyee','ッギィー'=>'ggyii','ッギョー'=>'ggyoo','ッギュー'=>'ggyuu', 1480 'ッミャー'=>'mmyaa','ッミェー'=>'mmyee','ッミィー'=>'mmyii','ッミョー'=>'mmyoo','ッミュー'=>'mmyuu', 1481 'ッニャー'=>'nnyaa','ッニェー'=>'nnyee','ッニィー'=>'nnyii','ッニョー'=>'nnyoo','ッニュー'=>'nnyuu', 1482 'ッリャー'=>'rryaa','ッリェー'=>'rryee','ッリィー'=>'rryii','ッリョー'=>'rryoo','ッリュー'=>'rryuu', 1483 'ッシャー'=>'sshaa','ッシェー'=>'sshee','ッシー'=>'sshii','ッショー'=>'sshoo','ッシュー'=>'sshuu', 1484 'ッチャー'=>'cchaa','ッチェー'=>'cchee','ッチー'=>'cchii','ッチョー'=>'cchoo','ッチュー'=>'cchuu', 1485 'ッティー'=>'ttii', 1486 'ッヂィー'=>'ddii', 1487 1488 // 3 character syllables - doubled vowels 1489 'ファー'=>'faa','フェー'=>'fee','フィー'=>'fii','フォー'=>'foo', 1490 'フャー'=>'fyaa','フェー'=>'fyee','フィー'=>'fyii','フョー'=>'fyoo','フュー'=>'fyuu', 1491 'ヒャー'=>'hyaa','ヒェー'=>'hyee','ヒィー'=>'hyii','ヒョー'=>'hyoo','ヒュー'=>'hyuu', 1492 'ビャー'=>'byaa','ビェー'=>'byee','ビィー'=>'byii','ビョー'=>'byoo','ビュー'=>'byuu', 1493 'ピャー'=>'pyaa','ピェー'=>'pyee','ピィー'=>'pyii','ピョー'=>'pyoo','ピュー'=>'pyuu', 1494 'キャー'=>'kyaa','キェー'=>'kyee','キィー'=>'kyii','キョー'=>'kyoo','キュー'=>'kyuu', 1495 'ギャー'=>'gyaa','ギェー'=>'gyee','ギィー'=>'gyii','ギョー'=>'gyoo','ギュー'=>'gyuu', 1496 'ミャー'=>'myaa','ミェー'=>'myee','ミィー'=>'myii','ミョー'=>'myoo','ミュー'=>'myuu', 1497 'ニャー'=>'nyaa','ニェー'=>'nyee','ニィー'=>'nyii','ニョー'=>'nyoo','ニュー'=>'nyuu', 1498 'リャー'=>'ryaa','リェー'=>'ryee','リィー'=>'ryii','リョー'=>'ryoo','リュー'=>'ryuu', 1499 'シャー'=>'shaa','シェー'=>'shee','シー'=>'shii','ショー'=>'shoo','シュー'=>'shuu', 1500 'ジャー'=>'jaa','ジェー'=>'jee','ジー'=>'jii','ジョー'=>'joo','ジュー'=>'juu', 1501 'スァー'=>'swaa','スェー'=>'swee','スィー'=>'swii','スォー'=>'swoo','スゥー'=>'swuu', 1502 'デァー'=>'daa','デェー'=>'dee','ディー'=>'dii','デォー'=>'doo','デゥー'=>'duu', 1503 'チャー'=>'chaa','チェー'=>'chee','チー'=>'chii','チョー'=>'choo','チュー'=>'chuu', 1504 'ヂャー'=>'dyaa','ヂェー'=>'dyee','ヂィー'=>'dyii','ヂョー'=>'dyoo','ヂュー'=>'dyuu', 1505 'ツャー'=>'tsaa','ツェー'=>'tsee','ツィー'=>'tsii','ツョー'=>'tsoo','ツー'=>'tsuu', 1506 'トァー'=>'twaa','トェー'=>'twee','トィー'=>'twii','トォー'=>'twoo','トゥー'=>'twuu', 1507 'ドァー'=>'dwaa','ドェー'=>'dwee','ドィー'=>'dwii','ドォー'=>'dwoo','ドゥー'=>'dwuu', 1508 'ウァー'=>'whaa','ウェー'=>'whee','ウィー'=>'whii','ウォー'=>'whoo','ウゥー'=>'whuu', 1509 'ヴャー'=>'vyaa','ヴェー'=>'vyee','ヴィー'=>'vyii','ヴョー'=>'vyoo','ヴュー'=>'vyuu', 1510 'ヴァー'=>'vaa','ヴェー'=>'vee','ヴィー'=>'vii','ヴォー'=>'voo','ヴー'=>'vuu', 1511 'ウェー'=>'wee','ウィー'=>'wii', 1512 'イェー'=>'yee', 1513 'ティー'=>'tii', 1514 'ヂィー'=>'dii', 1515 1516 // 3 character syllables - doubled consonants 1517 'ッビャ'=>'bbya','ッビェ'=>'bbye','ッビィ'=>'bbyi','ッビョ'=>'bbyo','ッビュ'=>'bbyu', 1518 'ッピャ'=>'ppya','ッピェ'=>'ppye','ッピィ'=>'ppyi','ッピョ'=>'ppyo','ッピュ'=>'ppyu', 1519 'ッキャ'=>'kkya','ッキェ'=>'kkye','ッキィ'=>'kkyi','ッキョ'=>'kkyo','ッキュ'=>'kkyu', 1520 'ッギャ'=>'ggya','ッギェ'=>'ggye','ッギィ'=>'ggyi','ッギョ'=>'ggyo','ッギュ'=>'ggyu', 1521 'ッミャ'=>'mmya','ッミェ'=>'mmye','ッミィ'=>'mmyi','ッミョ'=>'mmyo','ッミュ'=>'mmyu', 1522 'ッニャ'=>'nnya','ッニェ'=>'nnye','ッニィ'=>'nnyi','ッニョ'=>'nnyo','ッニュ'=>'nnyu', 1523 'ッリャ'=>'rrya','ッリェ'=>'rrye','ッリィ'=>'rryi','ッリョ'=>'rryo','ッリュ'=>'rryu', 1524 'ッシャ'=>'ssha','ッシェ'=>'sshe','ッシ'=>'sshi','ッショ'=>'ssho','ッシュ'=>'sshu', 1525 'ッチャ'=>'ccha','ッチェ'=>'cche','ッチ'=>'cchi','ッチョ'=>'ccho','ッチュ'=>'cchu', 1526 'ッティ'=>'tti', 1527 'ッヂィ'=>'ddi', 1528 1529 // 3 character syllables - doubled vowel and consonants 1530 'ッバー'=>'bbaa','ッベー'=>'bbee','ッビー'=>'bbii','ッボー'=>'bboo','ッブー'=>'bbuu', 1531 'ッパー'=>'ppaa','ッペー'=>'ppee','ッピー'=>'ppii','ッポー'=>'ppoo','ップー'=>'ppuu', 1532 'ッケー'=>'kkee','ッキー'=>'kkii','ッコー'=>'kkoo','ックー'=>'kkuu','ッカー'=>'kkaa', 1533 'ッガー'=>'ggaa','ッゲー'=>'ggee','ッギー'=>'ggii','ッゴー'=>'ggoo','ッグー'=>'gguu', 1534 'ッマー'=>'maa','ッメー'=>'mee','ッミー'=>'mii','ッモー'=>'moo','ッムー'=>'muu', 1535 'ッナー'=>'nnaa','ッネー'=>'nnee','ッニー'=>'nnii','ッノー'=>'nnoo','ッヌー'=>'nnuu', 1536 'ッラー'=>'rraa','ッレー'=>'rree','ッリー'=>'rrii','ッロー'=>'rroo','ッルー'=>'rruu', 1537 'ッサー'=>'ssaa','ッセー'=>'ssee','ッシー'=>'sshii','ッソー'=>'ssoo','ッスー'=>'ssuu', 1538 'ッザー'=>'zzaa','ッゼー'=>'zzee','ッジー'=>'jjii','ッゾー'=>'zzoo','ッズー'=>'zzuu', 1539 'ッター'=>'ttaa','ッテー'=>'ttee','ッチー'=>'chii','ットー'=>'ttoo','ッツー'=>'ttsuu', 1540 'ッダー'=>'ddaa','ッデー'=>'ddee','ッヂー'=>'ddii','ッドー'=>'ddoo','ッヅー'=>'dduu', 1541 1542 // 2 character syllables - normal 1543 'ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フゥ'=>'fu', 1544 // 'フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo','フュ'=>'fyu', 1545 'フャ'=>'fa','フェ'=>'fe','フィ'=>'fi','フョ'=>'fo','フュ'=>'fu', 1546 'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu', 1547 'ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo','ビュ'=>'byu', 1548 'ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu', 1549 'キャ'=>'kya','キェ'=>'kye','キィ'=>'kyi','キョ'=>'kyo','キュ'=>'kyu', 1550 'ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu', 1551 'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu', 1552 'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu', 1553 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu', 1554 'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu', 1555 'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju', 1556 'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu', 1557 'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du', 1558 'チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu', 1559 // 'ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi','ヂョ'=>'dyo','ヂュ'=>'dyu', 1560 'ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu', 1561 'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu', 1562 'ドァ'=>'dwa','ドェ'=>'dwe','ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu', 1563 'ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi','ウォ'=>'who','ウゥ'=>'whu', 1564 'ヴャ'=>'vya','ヴェ'=>'vye','ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu', 1565 'ヴァ'=>'va','ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu', 1566 'ウェ'=>'we','ウィ'=>'wi', 1567 'イェ'=>'ye', 1568 'ティ'=>'ti', 1569 'ヂィ'=>'di', 1570 1571 // 2 character syllables - doubled vocal 1572 'アー'=>'aa','エー'=>'ee','イー'=>'ii','オー'=>'oo','ウー'=>'uu', 1573 'ダー'=>'daa','デー'=>'dee','ヂー'=>'dii','ドー'=>'doo','ヅー'=>'duu', 1574 'ハー'=>'haa','ヘー'=>'hee','ヒー'=>'hii','ホー'=>'hoo','フー'=>'fuu', 1575 'バー'=>'baa','ベー'=>'bee','ビー'=>'bii','ボー'=>'boo','ブー'=>'buu', 1576 'パー'=>'paa','ペー'=>'pee','ピー'=>'pii','ポー'=>'poo','プー'=>'puu', 1577 'ケー'=>'kee','キー'=>'kii','コー'=>'koo','クー'=>'kuu','カー'=>'kaa', 1578 'ガー'=>'gaa','ゲー'=>'gee','ギー'=>'gii','ゴー'=>'goo','グー'=>'guu', 1579 'マー'=>'maa','メー'=>'mee','ミー'=>'mii','モー'=>'moo','ムー'=>'muu', 1580 'ナー'=>'naa','ネー'=>'nee','ニー'=>'nii','ノー'=>'noo','ヌー'=>'nuu', 1581 'ラー'=>'raa','レー'=>'ree','リー'=>'rii','ロー'=>'roo','ルー'=>'ruu', 1582 'サー'=>'saa','セー'=>'see','シー'=>'shii','ソー'=>'soo','スー'=>'suu', 1583 'ザー'=>'zaa','ゼー'=>'zee','ジー'=>'jii','ゾー'=>'zoo','ズー'=>'zuu', 1584 'ター'=>'taa','テー'=>'tee','チー'=>'chii','トー'=>'too','ツー'=>'tsuu', 1585 'ワー'=>'waa','ヲー'=>'woo', 1586 'ヤー'=>'yaa','ヨー'=>'yoo','ユー'=>'yuu', 1587 'ヵー'=>'kaa','ヶー'=>'kee', 1588 // old characters 1589 'ヱー'=>'wee','ヰー'=>'wii', 1590 1591 // seperate katakana 'n' 1592 'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u', 1593 'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンユ'=>'n_yu', 1594 1595 // 2 character syllables - doubled consonants 1596 'ッバ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu', 1597 'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッポ'=>'ppo','ップ'=>'ppu', 1598 'ッケ'=>'kke','ッキ'=>'kki','ッコ'=>'kko','ック'=>'kku','ッカ'=>'kka', 1599 'ッガ'=>'gga','ッゲ'=>'gge','ッギ'=>'ggi','ッゴ'=>'ggo','ッグ'=>'ggu', 1600 'ッマ'=>'ma','ッメ'=>'me','ッミ'=>'mi','ッモ'=>'mo','ッム'=>'mu', 1601 'ッナ'=>'nna','ッネ'=>'nne','ッニ'=>'nni','ッノ'=>'nno','ッヌ'=>'nnu', 1602 'ッラ'=>'rra','ッレ'=>'rre','ッリ'=>'rri','ッロ'=>'rro','ッル'=>'rru', 1603 'ッサ'=>'ssa','ッセ'=>'sse','ッシ'=>'sshi','ッソ'=>'sso','ッス'=>'ssu', 1604 'ッザ'=>'zza','ッゼ'=>'zze','ッジ'=>'jji','ッゾ'=>'zzo','ッズ'=>'zzu', 1605 'ッタ'=>'tta','ッテ'=>'tte','ッチ'=>'cchi','ット'=>'tto','ッツ'=>'ttsu', 1606 'ッダ'=>'dda','ッデ'=>'dde','ッヂ'=>'ddi','ッド'=>'ddo','ッヅ'=>'ddu', 1607 1608 // 1 character syllables 1609 'ア'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','ン'=>'n', 1610 'ハ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho','フ'=>'fu', 1611 'バ'=>'ba','ベ'=>'be','ビ'=>'bi','ボ'=>'bo','ブ'=>'bu', 1612 'パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ポ'=>'po','プ'=>'pu', 1613 'ケ'=>'ke','キ'=>'ki','コ'=>'ko','ク'=>'ku','カ'=>'ka', 1614 'ガ'=>'ga','ゲ'=>'ge','ギ'=>'gi','ゴ'=>'go','グ'=>'gu', 1615 'マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu', 1616 'ナ'=>'na','ネ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu', 1617 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ロ'=>'ro','ル'=>'ru', 1618 'サ'=>'sa','セ'=>'se','シ'=>'shi','ソ'=>'so','ス'=>'su', 1619 'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu', 1620 'タ'=>'ta','テ'=>'te','チ'=>'chi','ト'=>'to','ツ'=>'tsu', 1621 'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du', 1622 'ワ'=>'wa','ヲ'=>'wo', 1623 'ヤ'=>'ya','ヨ'=>'yo','ユ'=>'yu', 1624 'ヵ'=>'ka','ヶ'=>'ke', 1625 // old characters 1626 'ヱ'=>'we','ヰ'=>'wi', 1627 1628 // convert what's left (probably only kicks in when something's missing above) 1629 'ァ'=>'a','ェ'=>'e','ィ'=>'i','ォ'=>'o','ゥ'=>'u', 1630 'ャ'=>'ya','ョ'=>'yo','ュ'=>'yu', 1631 1632 // special characters 1633 '・'=>'_','、'=>'_', 1634 'ー'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise 1635 1636 // 'ラ'=>'la','レ'=>'le','リ'=>'li','ロ'=>'lo','ル'=>'lu', 1637 // 'チャ'=>'cya','チェ'=>'cye','チィ'=>'cyi','チョ'=>'cyo','チュ'=>'cyu', 1638 //'デャ'=>'dha','デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu', 1639 // 'リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi','リョ'=>'lyo','リュ'=>'lyu', 1640 // 'テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho','テュ'=>'thu', 1641 //'ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi','フォ'=>'fwo','フゥ'=>'fwu', 1642 //'チャ'=>'tya','チェ'=>'tye','チィ'=>'tyi','チョ'=>'tyo','チュ'=>'tyu', 1643 // 'ジャ'=>'jya','ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu', 1644 // 'ジャ'=>'zha','ジェ'=>'zhe','ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu', 1645 //'ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi','ジョ'=>'zyo','ジュ'=>'zyu', 1646 //'シャ'=>'sya','シェ'=>'sye','シィ'=>'syi','ショ'=>'syo','シュ'=>'syu', 1647 //'シ'=>'ci','フ'=>'hu',シ'=>'si','チ'=>'ti','ツ'=>'tu','イ'=>'yi','ヂ'=>'dzi', 1648 1649 // "Greeklish" 1650 'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Π'=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps', 1651 'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','π'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps', 1652 1653 // Thai 1654 'ก'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch', 1655 'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','ญ'=>'y','ฎ'=>'d','ฏ'=>'t','ฐ'=>'th', 1656 'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th', 1657 'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','ฝ'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph', 1658 'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue', 1659 'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h', 1660 'ะ'=>'a','ั'=>'a','รร'=>'a','า'=>'a','ๅ'=>'a','ำ'=>'am','ํา'=>'am', 1661 'ิ'=>'i','ี'=>'i','ึ'=>'ue','ี'=>'ue','ุ'=>'u','ู'=>'u', 1662 'เ'=>'e','แ'=>'ae','โ'=>'o','อ'=>'o', 1663 'ียะ'=>'ia','ีย'=>'ia','ือะ'=>'uea','ือ'=>'uea','ัวะ'=>'ua','ัว'=>'ua', 1664 'ใ'=>'ai','ไ'=>'ai','ัย'=>'ai','าย'=>'ai','าว'=>'ao', 1665 'ุย'=>'ui','อย'=>'oi','ือย'=>'ueai','วย'=>'uai', 1666 'ิว'=>'io','็ว'=>'eo','ียว'=>'iao', 1667 '่'=>'','้'=>'','๊'=>'','๋'=>'','็'=>'', 1668 '์'=>'','๎'=>'','ํ'=>'','ฺ'=>'', 1669 'ๆ'=>'2','๏'=>'o','ฯ'=>'-','๚'=>'-','๛'=>'-', 1670 '๐'=>'0','๑'=>'1','๒'=>'2','๓'=>'3','๔'=>'4', 1671 '๕'=>'5','๖'=>'6','๗'=>'7','๘'=>'8','๙'=>'9', 1672 1673 // Korean 1674 'ㄱ'=>'k','ㅋ'=>'kh','ㄲ'=>'kk','ㄷ'=>'t','ㅌ'=>'th','ㄸ'=>'tt','ㅂ'=>'p', 1675 'ㅍ'=>'ph','ㅃ'=>'pp','ㅈ'=>'c','ㅊ'=>'ch','ㅉ'=>'cc','ㅅ'=>'s','ㅆ'=>'ss', 1676 'ㅎ'=>'h','ㅇ'=>'ng','ㄴ'=>'n','ㄹ'=>'l','ㅁ'=>'m', 'ㅏ'=>'a','ㅓ'=>'e','ㅗ'=>'o', 1677 'ㅜ'=>'wu','ㅡ'=>'u','ㅣ'=>'i','ㅐ'=>'ay','ㅔ'=>'ey','ㅚ'=>'oy','ㅘ'=>'wa','ㅝ'=>'we', 1678 'ㅟ'=>'wi','ㅙ'=>'way','ㅞ'=>'wey','ㅢ'=>'uy','ㅑ'=>'ya','ㅕ'=>'ye','ㅛ'=>'oy', 1679 'ㅠ'=>'yu','ㅒ'=>'yay','ㅖ'=>'yey', 1680); 1681 1682 1683