1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license LGPL 2.1 (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * check for mb_string support 11 */ 12if(!defined('UTF8_MBSTRING')){ 13 if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){ 14 define('UTF8_MBSTRING',1); 15 }else{ 16 define('UTF8_MBSTRING',0); 17 } 18} 19 20if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } 21 22if(!function_exists('utf8_isASCII')){ 23 /** 24 * Checks if a string contains 7bit ASCII only 25 * 26 * @author Andreas Haerter <andreas.haerter@dev.mail-node.com> 27 */ 28 function utf8_isASCII($str){ 29 return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); 30 } 31} 32 33if(!function_exists('utf8_strip')){ 34 /** 35 * Strips all highbyte chars 36 * 37 * Returns a pure ASCII7 string 38 * 39 * @author Andreas Gohr <andi@splitbrain.org> 40 */ 41 function utf8_strip($str){ 42 $ascii = ''; 43 $len = strlen($str); 44 for($i=0; $i<$len; $i++){ 45 if(ord($str{$i}) <128){ 46 $ascii .= $str{$i}; 47 } 48 } 49 return $ascii; 50 } 51} 52 53if(!function_exists('utf8_check')){ 54 /** 55 * Tries to detect if a string is in Unicode encoding 56 * 57 * @author <bmorel@ssi.fr> 58 * @link http://www.php.net/manual/en/function.utf8-encode.php 59 */ 60 function utf8_check($Str) { 61 $len = strlen($Str); 62 for ($i=0; $i<$len; $i++) { 63 $b = ord($Str[$i]); 64 if ($b < 0x80) continue; # 0bbbbbbb 65 elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb 66 elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb 67 elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb 68 elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb 69 elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b 70 else return false; # Does not match any model 71 72 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 73 if ((++$i == $len) || ((ord($Str[$i]) & 0xC0) != 0x80)) 74 return false; 75 } 76 } 77 return true; 78 } 79} 80 81if(!function_exists('utf8_basename')){ 82 /** 83 * A locale independent basename() implementation 84 * 85 * works around a bug in PHP's basename() implementation 86 * 87 * @see basename() 88 * @link https://bugs.php.net/bug.php?id=37738 89 * @param string $path A path 90 * @param string $suffix If the name component ends in suffix this will also be cut off 91 * @return string 92 */ 93 function utf8_basename($path, $suffix=''){ 94 $path = trim($path,'\\/'); 95 $rpos = max(strrpos($path, '/'), strrpos($path, '\\')); 96 if($rpos) $path = substr($path, $rpos+1); 97 98 $suflen = strlen($suffix); 99 if($suflen && (substr($path, -$suflen) == $suffix)){ 100 $path = substr($path, 0, -$suflen); 101 } 102 103 return $path; 104 } 105} 106 107if(!function_exists('utf8_strlen')){ 108 /** 109 * Unicode aware replacement for strlen() 110 * 111 * utf8_decode() converts characters that are not in ISO-8859-1 112 * to '?', which, for the purpose of counting, is alright - It's 113 * even faster than mb_strlen. 114 * 115 * @author <chernyshevsky at hotmail dot com> 116 * @see strlen() 117 * @see utf8_decode() 118 */ 119 function utf8_strlen($string){ 120 return strlen(utf8_decode($string)); 121 } 122} 123 124if(!function_exists('utf8_substr')){ 125 /** 126 * UTF-8 aware alternative to substr 127 * 128 * Return part of a string given character offset (and optionally length) 129 * 130 * @author Harry Fuecks <hfuecks@gmail.com> 131 * @author Chris Smith <chris@jalakai.co.uk> 132 * @param string $str 133 * @param int $offset number of UTF-8 characters offset (from left) 134 * @param int $length (optional) length in UTF-8 characters from offset 135 * @return mixed string or false if failure 136 */ 137 function utf8_substr($str, $offset, $length = null) { 138 if(UTF8_MBSTRING){ 139 if( $length === null ){ 140 return mb_substr($str, $offset); 141 }else{ 142 return mb_substr($str, $offset, $length); 143 } 144 } 145 146 /* 147 * Notes: 148 * 149 * no mb string support, so we'll use pcre regex's with 'u' flag 150 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 151 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 152 * 153 * substr documentation states false can be returned in some cases (e.g. offset > string length) 154 * mb_substr never returns false, it will return an empty string instead. 155 * 156 * calculating the number of characters in the string is a relatively expensive operation, so 157 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 158 */ 159 160 // cast parameters to appropriate types to avoid multiple notices/warnings 161 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 162 $offset = (int)$offset; 163 if (!is_null($length)) $length = (int)$length; 164 165 // handle trivial cases 166 if ($length === 0) return ''; 167 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 168 169 $offset_pattern = ''; 170 $length_pattern = ''; 171 172 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 173 if ($offset < 0) { 174 $strlen = strlen(utf8_decode($str)); // see notes 175 $offset = $strlen + $offset; 176 if ($offset < 0) $offset = 0; 177 } 178 179 // establish a pattern for offset, a non-captured group equal in length to offset 180 if ($offset > 0) { 181 $Ox = (int)($offset/65535); 182 $Oy = $offset%65535; 183 184 if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}'; 185 $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})'; 186 } else { 187 $offset_pattern = '^'; // offset == 0; just anchor the pattern 188 } 189 190 // establish a pattern for length 191 if (is_null($length)) { 192 $length_pattern = '(.*)$'; // the rest of the string 193 } else { 194 195 if (!isset($strlen)) $strlen = strlen(utf8_decode($str)); // see notes 196 if ($offset > $strlen) return ''; // another trivial case 197 198 if ($length > 0) { 199 200 $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string 201 202 $Lx = (int)($length/65535); 203 $Ly = $length%65535; 204 205 // +ve length requires ... a captured group of length characters 206 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 207 $length_pattern = '('.$length_pattern.'.{'.$Ly.'})'; 208 209 } else if ($length < 0) { 210 211 if ($length < ($offset - $strlen)) return ''; 212 213 $Lx = (int)((-$length)/65535); 214 $Ly = (-$length)%65535; 215 216 // -ve length requires ... capture everything except a group of -length characters 217 // anchored at the tail-end of the string 218 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 219 $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$'; 220 } 221 } 222 223 if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return ''; 224 return $match[1]; 225 } 226} 227 228if(!function_exists('utf8_substr_replace')){ 229 /** 230 * Unicode aware replacement for substr_replace() 231 * 232 * @author Andreas Gohr <andi@splitbrain.org> 233 * @see substr_replace() 234 */ 235 function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 236 $ret = ''; 237 if($start>0) $ret .= utf8_substr($string, 0, $start); 238 $ret .= $replacement; 239 $ret .= utf8_substr($string, $start+$length); 240 return $ret; 241 } 242} 243 244if(!function_exists('utf8_ltrim')){ 245 /** 246 * Unicode aware replacement for ltrim() 247 * 248 * @author Andreas Gohr <andi@splitbrain.org> 249 * @see ltrim() 250 * @param string $str 251 * @param string $charlist 252 * @return string 253 */ 254 function utf8_ltrim($str,$charlist=''){ 255 if($charlist == '') return ltrim($str); 256 257 //quote charlist for use in a characterclass 258 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 259 260 return preg_replace('/^['.$charlist.']+/u','',$str); 261 } 262} 263 264if(!function_exists('utf8_rtrim')){ 265 /** 266 * Unicode aware replacement for rtrim() 267 * 268 * @author Andreas Gohr <andi@splitbrain.org> 269 * @see rtrim() 270 * @param string $str 271 * @param string $charlist 272 * @return string 273 */ 274 function utf8_rtrim($str,$charlist=''){ 275 if($charlist == '') return rtrim($str); 276 277 //quote charlist for use in a characterclass 278 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 279 280 return preg_replace('/['.$charlist.']+$/u','',$str); 281 } 282} 283 284if(!function_exists('utf8_trim')){ 285 /** 286 * Unicode aware replacement for trim() 287 * 288 * @author Andreas Gohr <andi@splitbrain.org> 289 * @see trim() 290 * @param string $str 291 * @param string $charlist 292 * @return string 293 */ 294 function utf8_trim($str,$charlist='') { 295 if($charlist == '') return trim($str); 296 297 return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist); 298 } 299} 300 301if(!function_exists('utf8_strtolower')){ 302 /** 303 * This is a unicode aware replacement for strtolower() 304 * 305 * Uses mb_string extension if available 306 * 307 * @author Leo Feyer <leo@typolight.org> 308 * @see strtolower() 309 * @see utf8_strtoupper() 310 */ 311 function utf8_strtolower($string){ 312 if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8'); 313 314 global $UTF8_UPPER_TO_LOWER; 315 return strtr($string,$UTF8_UPPER_TO_LOWER); 316 } 317} 318 319if(!function_exists('utf8_strtoupper')){ 320 /** 321 * This is a unicode aware replacement for strtoupper() 322 * 323 * Uses mb_string extension if available 324 * 325 * @author Leo Feyer <leo@typolight.org> 326 * @see strtoupper() 327 * @see utf8_strtoupper() 328 */ 329 function utf8_strtoupper($string){ 330 if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); 331 332 global $UTF8_LOWER_TO_UPPER; 333 return strtr($string,$UTF8_LOWER_TO_UPPER); 334 } 335} 336 337if(!function_exists('utf8_ucfirst')){ 338 /** 339 * UTF-8 aware alternative to ucfirst 340 * Make a string's first character uppercase 341 * 342 * @author Harry Fuecks 343 * @param string 344 * @return string with first character as upper case (if applicable) 345 */ 346 function utf8_ucfirst($str){ 347 switch ( utf8_strlen($str) ) { 348 case 0: 349 return ''; 350 case 1: 351 return utf8_strtoupper($str); 352 default: 353 preg_match('/^(.{1})(.*)$/us', $str, $matches); 354 return utf8_strtoupper($matches[1]).$matches[2]; 355 } 356 } 357} 358 359if(!function_exists('utf8_ucwords')){ 360 /** 361 * UTF-8 aware alternative to ucwords 362 * Uppercase the first character of each word in a string 363 * 364 * @author Harry Fuecks 365 * @param string 366 * @return string with first char of each word uppercase 367 * @see http://www.php.net/ucwords 368 */ 369 function utf8_ucwords($str) { 370 // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; 371 // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns 372 // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords 373 $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; 374 375 return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str); 376 } 377 378 /** 379 * Callback function for preg_replace_callback call in utf8_ucwords 380 * You don't need to call this yourself 381 * 382 * @author Harry Fuecks 383 * @param array $matches matches corresponding to a single word 384 * @return string with first char of the word in uppercase 385 * @see utf8_ucwords 386 * @see utf8_strtoupper 387 */ 388 function utf8_ucwords_callback($matches) { 389 $leadingws = $matches[2]; 390 $ucfirst = utf8_strtoupper($matches[3]); 391 $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1); 392 return $leadingws . $ucword; 393 } 394} 395 396if(!function_exists('utf8_deaccent')){ 397 /** 398 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 399 * 400 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 401 * letters. Default is to deaccent both cases ($case = 0) 402 * 403 * @author Andreas Gohr <andi@splitbrain.org> 404 */ 405 function utf8_deaccent($string,$case=0){ 406 if($case <= 0){ 407 global $UTF8_LOWER_ACCENTS; 408 $string = strtr($string,$UTF8_LOWER_ACCENTS); 409 } 410 if($case >= 0){ 411 global $UTF8_UPPER_ACCENTS; 412 $string = strtr($string,$UTF8_UPPER_ACCENTS); 413 } 414 return $string; 415 } 416} 417 418if(!function_exists('utf8_romanize')){ 419 /** 420 * Romanize a non-latin string 421 * 422 * @author Andreas Gohr <andi@splitbrain.org> 423 */ 424 function utf8_romanize($string){ 425 if(utf8_isASCII($string)) return $string; //nothing to do 426 427 global $UTF8_ROMANIZATION; 428 return strtr($string,$UTF8_ROMANIZATION); 429 } 430} 431 432if(!function_exists('utf8_stripspecials')){ 433 /** 434 * Removes special characters (nonalphanumeric) from a UTF-8 string 435 * 436 * This function adds the controlchars 0x00 to 0x19 to the array of 437 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 438 * 439 * @author Andreas Gohr <andi@splitbrain.org> 440 * @param string $string The UTF8 string to strip of special chars 441 * @param string $repl Replace special with this string 442 * @param string $additional Additional chars to strip (used in regexp char class) 443 * @return string 444 */ 445 function utf8_stripspecials($string,$repl='',$additional=''){ 446 global $UTF8_SPECIAL_CHARS2; 447 448 static $specials = null; 449 if(is_null($specials)){ 450 #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 451 $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/'); 452 } 453 454 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 455 } 456} 457 458if(!function_exists('utf8_strpos')){ 459 /** 460 * This is an Unicode aware replacement for strpos 461 * 462 * @author Leo Feyer <leo@typolight.org> 463 * @see strpos() 464 * @param string 465 * @param string 466 * @param integer 467 * @return integer 468 */ 469 function utf8_strpos($haystack, $needle, $offset=0){ 470 $comp = 0; 471 $length = null; 472 473 while (is_null($length) || $length < $offset) { 474 $pos = strpos($haystack, $needle, $offset + $comp); 475 476 if ($pos === false) 477 return false; 478 479 $length = utf8_strlen(substr($haystack, 0, $pos)); 480 481 if ($length < $offset) 482 $comp = $pos - $length; 483 } 484 485 return $length; 486 } 487} 488 489if(!function_exists('utf8_tohtml')){ 490 /** 491 * Encodes UTF-8 characters to HTML entities 492 * 493 * @author Tom N Harris <tnharris@whoopdedo.org> 494 * @author <vpribish at shopping dot com> 495 * @link http://www.php.net/manual/en/function.utf8-decode.php 496 */ 497 function utf8_tohtml ($str) { 498 $ret = ''; 499 foreach (utf8_to_unicode($str) as $cp) { 500 if ($cp < 0x80) 501 $ret .= chr($cp); 502 elseif ($cp < 0x100) 503 $ret .= "&#$cp;"; 504 else 505 $ret .= '&#x'.dechex($cp).';'; 506 } 507 return $ret; 508 } 509} 510 511if(!function_exists('utf8_unhtml')){ 512 /** 513 * Decodes HTML entities to UTF-8 characters 514 * 515 * Convert any &#..; entity to a codepoint, 516 * The entities flag defaults to only decoding numeric entities. 517 * Pass HTML_ENTITIES and named entities, including & < etc. 518 * are handled as well. Avoids the problem that would occur if you 519 * had to decode "&#38;&amp;#38;" 520 * 521 * unhtmlspecialchars(utf8_unhtml($s)) -> "&&" 522 * utf8_unhtml(unhtmlspecialchars($s)) -> "&&#38;" 523 * what it should be -> "&&#38;" 524 * 525 * @author Tom N Harris <tnharris@whoopdedo.org> 526 * @param string $str UTF-8 encoded string 527 * @param boolean $entities Flag controlling decoding of named entities. 528 * @return string UTF-8 encoded string with numeric (and named) entities replaced. 529 */ 530 function utf8_unhtml($str, $entities=null) { 531 static $decoder = null; 532 if (is_null($decoder)) 533 $decoder = new utf8_entity_decoder(); 534 if (is_null($entities)) 535 return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m', 536 'utf8_decode_numeric', $str); 537 else 538 return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m', 539 array(&$decoder, 'decode'), $str); 540 } 541} 542 543if(!function_exists('utf8_decode_numeric')){ 544 /** 545 * Decodes numeric HTML entities to their correct UTF-8 characters 546 * 547 * @param $ent string A numeric entity 548 * @return string 549 */ 550 function utf8_decode_numeric($ent) { 551 switch ($ent[2]) { 552 case 'X': 553 case 'x': 554 $cp = hexdec($ent[3]); 555 break; 556 default: 557 $cp = intval($ent[3]); 558 break; 559 } 560 return unicode_to_utf8(array($cp)); 561 } 562} 563 564if(!class_exists('utf8_entity_decoder')){ 565 /** 566 * Encapsulate HTML entity decoding tables 567 */ 568 class utf8_entity_decoder { 569 var $table; 570 571 /** 572 * Initializes the decoding tables 573 */ 574 function __construct() { 575 $table = get_html_translation_table(HTML_ENTITIES); 576 $table = array_flip($table); 577 $this->table = array_map(array(&$this,'makeutf8'), $table); 578 } 579 580 /** 581 * Wrapper aorund unicode_to_utf8() 582 * 583 * @param $c string 584 * @return mixed 585 */ 586 function makeutf8($c) { 587 return unicode_to_utf8(array(ord($c))); 588 } 589 590 /** 591 * Decodes any HTML entity to it's correct UTF-8 char equivalent 592 * 593 * @param $ent string An entity 594 * @return string 595 */ 596 function decode($ent) { 597 if ($ent[1] == '#') { 598 return utf8_decode_numeric($ent); 599 } elseif (array_key_exists($ent[0],$this->table)) { 600 return $this->table[$ent[0]]; 601 } else { 602 return $ent[0]; 603 } 604 } 605 } 606} 607 608if(!function_exists('utf8_to_unicode')){ 609 /** 610 * Takes an UTF-8 string and returns an array of ints representing the 611 * Unicode characters. Astral planes are supported ie. the ints in the 612 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 613 * are not allowed. 614 * 615 * If $strict is set to true the function returns false if the input 616 * string isn't a valid UTF-8 octet sequence and raises a PHP error at 617 * level E_USER_WARNING 618 * 619 * Note: this function has been modified slightly in this library to 620 * trigger errors on encountering bad bytes 621 * 622 * @author <hsivonen@iki.fi> 623 * @author Harry Fuecks <hfuecks@gmail.com> 624 * @param string $str UTF-8 encoded string 625 * @param boolean $strict Check for invalid sequences? 626 * @return mixed array of unicode code points or false if UTF-8 invalid 627 * @see unicode_to_utf8 628 * @link http://hsivonen.iki.fi/php-utf8/ 629 * @link http://sourceforge.net/projects/phputf8/ 630 */ 631 function utf8_to_unicode($str,$strict=false) { 632 $mState = 0; // cached expected number of octets after the current octet 633 // until the beginning of the next UTF8 character sequence 634 $mUcs4 = 0; // cached Unicode character 635 $mBytes = 1; // cached expected number of octets in the current sequence 636 637 $out = array(); 638 639 $len = strlen($str); 640 641 for($i = 0; $i < $len; $i++) { 642 643 $in = ord($str{$i}); 644 645 if ( $mState == 0) { 646 647 // When mState is zero we expect either a US-ASCII character or a 648 // multi-octet sequence. 649 if (0 == (0x80 & ($in))) { 650 // US-ASCII, pass straight through. 651 $out[] = $in; 652 $mBytes = 1; 653 654 } else if (0xC0 == (0xE0 & ($in))) { 655 // First octet of 2 octet sequence 656 $mUcs4 = ($in); 657 $mUcs4 = ($mUcs4 & 0x1F) << 6; 658 $mState = 1; 659 $mBytes = 2; 660 661 } else if (0xE0 == (0xF0 & ($in))) { 662 // First octet of 3 octet sequence 663 $mUcs4 = ($in); 664 $mUcs4 = ($mUcs4 & 0x0F) << 12; 665 $mState = 2; 666 $mBytes = 3; 667 668 } else if (0xF0 == (0xF8 & ($in))) { 669 // First octet of 4 octet sequence 670 $mUcs4 = ($in); 671 $mUcs4 = ($mUcs4 & 0x07) << 18; 672 $mState = 3; 673 $mBytes = 4; 674 675 } else if (0xF8 == (0xFC & ($in))) { 676 /* First octet of 5 octet sequence. 677 * 678 * This is illegal because the encoded codepoint must be either 679 * (a) not the shortest form or 680 * (b) outside the Unicode range of 0-0x10FFFF. 681 * Rather than trying to resynchronize, we will carry on until the end 682 * of the sequence and let the later error handling code catch it. 683 */ 684 $mUcs4 = ($in); 685 $mUcs4 = ($mUcs4 & 0x03) << 24; 686 $mState = 4; 687 $mBytes = 5; 688 689 } else if (0xFC == (0xFE & ($in))) { 690 // First octet of 6 octet sequence, see comments for 5 octet sequence. 691 $mUcs4 = ($in); 692 $mUcs4 = ($mUcs4 & 1) << 30; 693 $mState = 5; 694 $mBytes = 6; 695 696 } elseif($strict) { 697 /* Current octet is neither in the US-ASCII range nor a legal first 698 * octet of a multi-octet sequence. 699 */ 700 trigger_error( 701 'utf8_to_unicode: Illegal sequence identifier '. 702 'in UTF-8 at byte '.$i, 703 E_USER_WARNING 704 ); 705 return false; 706 707 } 708 709 } else { 710 711 // When mState is non-zero, we expect a continuation of the multi-octet 712 // sequence 713 if (0x80 == (0xC0 & ($in))) { 714 715 // Legal continuation. 716 $shift = ($mState - 1) * 6; 717 $tmp = $in; 718 $tmp = ($tmp & 0x0000003F) << $shift; 719 $mUcs4 |= $tmp; 720 721 /** 722 * End of the multi-octet sequence. mUcs4 now contains the final 723 * Unicode codepoint to be output 724 */ 725 if (0 == --$mState) { 726 727 /* 728 * Check for illegal sequences and codepoints. 729 */ 730 // From Unicode 3.1, non-shortest form is illegal 731 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 732 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 733 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 734 (4 < $mBytes) || 735 // From Unicode 3.2, surrogate characters are illegal 736 (($mUcs4 & 0xFFFFF800) == 0xD800) || 737 // Codepoints outside the Unicode range are illegal 738 ($mUcs4 > 0x10FFFF)) { 739 740 if($strict){ 741 trigger_error( 742 'utf8_to_unicode: Illegal sequence or codepoint '. 743 'in UTF-8 at byte '.$i, 744 E_USER_WARNING 745 ); 746 747 return false; 748 } 749 750 } 751 752 if (0xFEFF != $mUcs4) { 753 // BOM is legal but we don't want to output it 754 $out[] = $mUcs4; 755 } 756 757 //initialize UTF8 cache 758 $mState = 0; 759 $mUcs4 = 0; 760 $mBytes = 1; 761 } 762 763 } elseif($strict) { 764 /** 765 *((0xC0 & (*in) != 0x80) && (mState != 0)) 766 * Incomplete multi-octet sequence. 767 */ 768 trigger_error( 769 'utf8_to_unicode: Incomplete multi-octet '. 770 ' sequence in UTF-8 at byte '.$i, 771 E_USER_WARNING 772 ); 773 774 return false; 775 } 776 } 777 } 778 return $out; 779 } 780} 781 782if(!function_exists('unicode_to_utf8')){ 783 /** 784 * Takes an array of ints representing the Unicode characters and returns 785 * a UTF-8 string. Astral planes are supported ie. the ints in the 786 * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 787 * are not allowed. 788 * 789 * If $strict is set to true the function returns false if the input 790 * array contains ints that represent surrogates or are outside the 791 * Unicode range and raises a PHP error at level E_USER_WARNING 792 * 793 * Note: this function has been modified slightly in this library to use 794 * output buffering to concatenate the UTF-8 string (faster) as well as 795 * reference the array by it's keys 796 * 797 * @param array $arr of unicode code points representing a string 798 * @param boolean $strict Check for invalid sequences? 799 * @return mixed UTF-8 string or false if array contains invalid code points 800 * @author <hsivonen@iki.fi> 801 * @author Harry Fuecks <hfuecks@gmail.com> 802 * @see utf8_to_unicode 803 * @link http://hsivonen.iki.fi/php-utf8/ 804 * @link http://sourceforge.net/projects/phputf8/ 805 */ 806 function unicode_to_utf8($arr,$strict=false) { 807 if (!is_array($arr)) return ''; 808 ob_start(); 809 810 foreach (array_keys($arr) as $k) { 811 812 if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { 813 # ASCII range (including control chars) 814 815 echo chr($arr[$k]); 816 817 } else if ($arr[$k] <= 0x07ff) { 818 # 2 byte sequence 819 820 echo chr(0xc0 | ($arr[$k] >> 6)); 821 echo chr(0x80 | ($arr[$k] & 0x003f)); 822 823 } else if($arr[$k] == 0xFEFF) { 824 # Byte order mark (skip) 825 826 // nop -- zap the BOM 827 828 } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { 829 # Test for illegal surrogates 830 831 // found a surrogate 832 if($strict){ 833 trigger_error( 834 'unicode_to_utf8: Illegal surrogate '. 835 'at index: '.$k.', value: '.$arr[$k], 836 E_USER_WARNING 837 ); 838 return false; 839 } 840 841 } else if ($arr[$k] <= 0xffff) { 842 # 3 byte sequence 843 844 echo chr(0xe0 | ($arr[$k] >> 12)); 845 echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); 846 echo chr(0x80 | ($arr[$k] & 0x003f)); 847 848 } else if ($arr[$k] <= 0x10ffff) { 849 # 4 byte sequence 850 851 echo chr(0xf0 | ($arr[$k] >> 18)); 852 echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); 853 echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); 854 echo chr(0x80 | ($arr[$k] & 0x3f)); 855 856 } elseif($strict) { 857 858 trigger_error( 859 'unicode_to_utf8: Codepoint out of Unicode range '. 860 'at index: '.$k.', value: '.$arr[$k], 861 E_USER_WARNING 862 ); 863 864 // out of range 865 return false; 866 } 867 } 868 869 $result = ob_get_contents(); 870 ob_end_clean(); 871 return $result; 872 } 873} 874 875if(!function_exists('utf8_to_utf16be')){ 876 /** 877 * UTF-8 to UTF-16BE conversion. 878 * 879 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 880 */ 881 function utf8_to_utf16be(&$str, $bom = false) { 882 $out = $bom ? "\xFE\xFF" : ''; 883 if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 884 885 $uni = utf8_to_unicode($str); 886 foreach($uni as $cp){ 887 $out .= pack('n',$cp); 888 } 889 return $out; 890 } 891} 892 893if(!function_exists('utf16be_to_utf8')){ 894 /** 895 * UTF-8 to UTF-16BE conversion. 896 * 897 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 898 */ 899 function utf16be_to_utf8(&$str) { 900 $uni = unpack('n*',$str); 901 return unicode_to_utf8($uni); 902 } 903} 904 905if(!function_exists('utf8_bad_replace')){ 906 /** 907 * Replace bad bytes with an alternative character 908 * 909 * ASCII character is recommended for replacement char 910 * 911 * PCRE Pattern to locate bad bytes in a UTF-8 string 912 * Comes from W3 FAQ: Multilingual Forms 913 * Note: modified to include full ASCII range including control chars 914 * 915 * @author Harry Fuecks <hfuecks@gmail.com> 916 * @see http://www.w3.org/International/questions/qa-forms-utf-8 917 * @param string $str to search 918 * @param string $replace to replace bad bytes with (defaults to '?') - use ASCII 919 * @return string 920 */ 921 function utf8_bad_replace($str, $replace = '') { 922 $UTF8_BAD = 923 '([\x00-\x7F]'. # ASCII (including control chars) 924 '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte 925 '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs 926 '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte 927 '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates 928 '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 929 '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 930 '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 931 '|(.{1}))'; # invalid byte 932 ob_start(); 933 while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { 934 if ( !isset($matches[2])) { 935 echo $matches[0]; 936 } else { 937 echo $replace; 938 } 939 $str = substr($str,strlen($matches[0])); 940 } 941 $result = ob_get_contents(); 942 ob_end_clean(); 943 return $result; 944 } 945} 946 947if(!function_exists('utf8_correctIdx')){ 948 /** 949 * adjust a byte index into a utf8 string to a utf8 character boundary 950 * 951 * @param $str string utf8 character string 952 * @param $i int byte index into $str 953 * @param $next bool direction to search for boundary, 954 * false = up (current character) 955 * true = down (next character) 956 * 957 * @return int byte index into $str now pointing to a utf8 character boundary 958 * 959 * @author chris smith <chris@jalakai.co.uk> 960 */ 961 function utf8_correctIdx(&$str,$i,$next=false) { 962 963 if ($i <= 0) return 0; 964 965 $limit = strlen($str); 966 if ($i>=$limit) return $limit; 967 968 if ($next) { 969 while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; 970 } else { 971 while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; 972 } 973 974 return $i; 975 } 976} 977 978// only needed if no mb_string available 979if(!UTF8_MBSTRING){ 980 /** 981 * UTF-8 Case lookup table 982 * 983 * This lookuptable defines the upper case letters to their correspponding 984 * lower case letter in UTF-8 985 * 986 * @author Andreas Gohr <andi@splitbrain.org> 987 */ 988 global $UTF8_LOWER_TO_UPPER; 989 if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array( 990 "z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T","s"=>"S","r"=>"R","q"=>"Q", 991 "p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J","i"=>"I","h"=>"H","g"=>"G", 992 "f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A","ῳ"=>"ῼ","ῥ"=>"Ῥ","ῡ"=>"Ῡ","ῑ"=>"Ῑ", 993 "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ", 994 "ᾣ"=>"ᾫ","ᾢ"=>"ᾪ","ᾡ"=>"ᾩ","ᾗ"=>"ᾟ","ᾖ"=>"ᾞ","ᾕ"=>"ᾝ","ᾔ"=>"ᾜ","ᾓ"=>"ᾛ","ᾒ"=>"ᾚ","ᾑ"=>"ᾙ", 995 "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ", 996 "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ", 997 "ὲ"=>"Ὲ","ά"=>"Ά","ὰ"=>"Ὰ","ὧ"=>"Ὧ","ὦ"=>"Ὦ","ὥ"=>"Ὥ","ὤ"=>"Ὤ","ὣ"=>"Ὣ","ὢ"=>"Ὢ","ὡ"=>"Ὡ", 998 "ὗ"=>"Ὗ","ὕ"=>"Ὕ","ὓ"=>"Ὓ","ὑ"=>"Ὑ","ὅ"=>"Ὅ","ὄ"=>"Ὄ","ὃ"=>"Ὃ","ὂ"=>"Ὂ","ὁ"=>"Ὁ","ὀ"=>"Ὀ", 999 "ἷ"=>"Ἷ","ἶ"=>"Ἶ","ἵ"=>"Ἵ","ἴ"=>"Ἴ","ἳ"=>"Ἳ","ἲ"=>"Ἲ","ἱ"=>"Ἱ","ἰ"=>"Ἰ","ἧ"=>"Ἧ","ἦ"=>"Ἦ", 1000 "ἥ"=>"Ἥ","ἤ"=>"Ἤ","ἣ"=>"Ἣ","ἢ"=>"Ἢ","ἡ"=>"Ἡ","ἕ"=>"Ἕ","ἔ"=>"Ἔ","ἓ"=>"Ἓ","ἒ"=>"Ἒ","ἑ"=>"Ἑ", 1001 "ἐ"=>"Ἐ","ἇ"=>"Ἇ","ἆ"=>"Ἆ","ἅ"=>"Ἅ","ἄ"=>"Ἄ","ἃ"=>"Ἃ","ἂ"=>"Ἂ","ἁ"=>"Ἁ","ἀ"=>"Ἀ","ỹ"=>"Ỹ", 1002 "ỷ"=>"Ỷ","ỵ"=>"Ỵ","ỳ"=>"Ỳ","ự"=>"Ự","ữ"=>"Ữ","ử"=>"Ử","ừ"=>"Ừ","ứ"=>"Ứ","ủ"=>"Ủ","ụ"=>"Ụ", 1003 "ợ"=>"Ợ","ỡ"=>"Ỡ","ở"=>"Ở","ờ"=>"Ờ","ớ"=>"Ớ","ộ"=>"Ộ","ỗ"=>"Ỗ","ổ"=>"Ổ","ồ"=>"Ồ","ố"=>"Ố", 1004 "ỏ"=>"Ỏ","ọ"=>"Ọ","ị"=>"Ị","ỉ"=>"Ỉ","ệ"=>"Ệ","ễ"=>"Ễ","ể"=>"Ể","ề"=>"Ề","ế"=>"Ế","ẽ"=>"Ẽ", 1005 "ẻ"=>"Ẻ","ẹ"=>"Ẹ","ặ"=>"Ặ","ẵ"=>"Ẵ","ẳ"=>"Ẳ","ằ"=>"Ằ","ắ"=>"Ắ","ậ"=>"Ậ","ẫ"=>"Ẫ","ẩ"=>"Ẩ", 1006 "ầ"=>"Ầ","ấ"=>"Ấ","ả"=>"Ả","ạ"=>"Ạ","ẛ"=>"Ṡ","ẕ"=>"Ẕ","ẓ"=>"Ẓ","ẑ"=>"Ẑ","ẏ"=>"Ẏ","ẍ"=>"Ẍ", 1007 "ẋ"=>"Ẋ","ẉ"=>"Ẉ","ẇ"=>"Ẇ","ẅ"=>"Ẅ","ẃ"=>"Ẃ","ẁ"=>"Ẁ","ṿ"=>"Ṿ","ṽ"=>"Ṽ","ṻ"=>"Ṻ","ṹ"=>"Ṹ", 1008 "ṷ"=>"Ṷ","ṵ"=>"Ṵ","ṳ"=>"Ṳ","ṱ"=>"Ṱ","ṯ"=>"Ṯ","ṭ"=>"Ṭ","ṫ"=>"Ṫ","ṩ"=>"Ṩ","ṧ"=>"Ṧ","ṥ"=>"Ṥ", 1009 "ṣ"=>"Ṣ","ṡ"=>"Ṡ","ṟ"=>"Ṟ","ṝ"=>"Ṝ","ṛ"=>"Ṛ","ṙ"=>"Ṙ","ṗ"=>"Ṗ","ṕ"=>"Ṕ","ṓ"=>"Ṓ","ṑ"=>"Ṑ", 1010 "ṏ"=>"Ṏ","ṍ"=>"Ṍ","ṋ"=>"Ṋ","ṉ"=>"Ṉ","ṇ"=>"Ṇ","ṅ"=>"Ṅ","ṃ"=>"Ṃ","ṁ"=>"Ṁ","ḿ"=>"Ḿ","ḽ"=>"Ḽ", 1011 "ḻ"=>"Ḻ","ḹ"=>"Ḹ","ḷ"=>"Ḷ","ḵ"=>"Ḵ","ḳ"=>"Ḳ","ḱ"=>"Ḱ","ḯ"=>"Ḯ","ḭ"=>"Ḭ","ḫ"=>"Ḫ","ḩ"=>"Ḩ", 1012 "ḧ"=>"Ḧ","ḥ"=>"Ḥ","ḣ"=>"Ḣ","ḡ"=>"Ḡ","ḟ"=>"Ḟ","ḝ"=>"Ḝ","ḛ"=>"Ḛ","ḙ"=>"Ḙ","ḗ"=>"Ḗ","ḕ"=>"Ḕ", 1013 "ḓ"=>"Ḓ","ḑ"=>"Ḑ","ḏ"=>"Ḏ","ḍ"=>"Ḍ","ḋ"=>"Ḋ","ḉ"=>"Ḉ","ḇ"=>"Ḇ","ḅ"=>"Ḅ","ḃ"=>"Ḃ","ḁ"=>"Ḁ", 1014 "ֆ"=>"Ֆ","օ"=>"Օ","ք"=>"Ք","փ"=>"Փ","ւ"=>"Ւ","ց"=>"Ց","ր"=>"Ր","տ"=>"Տ","վ"=>"Վ","ս"=>"Ս", 1015 "ռ"=>"Ռ","ջ"=>"Ջ","պ"=>"Պ","չ"=>"Չ","ո"=>"Ո","շ"=>"Շ","ն"=>"Ն","յ"=>"Յ","մ"=>"Մ","ճ"=>"Ճ", 1016 "ղ"=>"Ղ","ձ"=>"Ձ","հ"=>"Հ","կ"=>"Կ","ծ"=>"Ծ","խ"=>"Խ","լ"=>"Լ","ի"=>"Ի","ժ"=>"Ժ","թ"=>"Թ", 1017 "ը"=>"Ը","է"=>"Է","զ"=>"Զ","ե"=>"Ե","դ"=>"Դ","գ"=>"Գ","բ"=>"Բ","ա"=>"Ա","ԏ"=>"Ԏ","ԍ"=>"Ԍ", 1018 "ԋ"=>"Ԋ","ԉ"=>"Ԉ","ԇ"=>"Ԇ","ԅ"=>"Ԅ","ԃ"=>"Ԃ","ԁ"=>"Ԁ","ӹ"=>"Ӹ","ӵ"=>"Ӵ","ӳ"=>"Ӳ","ӱ"=>"Ӱ", 1019 "ӯ"=>"Ӯ","ӭ"=>"Ӭ","ӫ"=>"Ӫ","ө"=>"Ө","ӧ"=>"Ӧ","ӥ"=>"Ӥ","ӣ"=>"Ӣ","ӡ"=>"Ӡ","ӟ"=>"Ӟ","ӝ"=>"Ӝ", 1020 "ӛ"=>"Ӛ","ә"=>"Ә","ӗ"=>"Ӗ","ӕ"=>"Ӕ","ӓ"=>"Ӓ","ӑ"=>"Ӑ","ӎ"=>"Ӎ","ӌ"=>"Ӌ","ӊ"=>"Ӊ","ӈ"=>"Ӈ", 1021 "ӆ"=>"Ӆ","ӄ"=>"Ӄ","ӂ"=>"Ӂ","ҿ"=>"Ҿ","ҽ"=>"Ҽ","һ"=>"Һ","ҹ"=>"Ҹ","ҷ"=>"Ҷ","ҵ"=>"Ҵ","ҳ"=>"Ҳ", 1022 "ұ"=>"Ұ","ү"=>"Ү","ҭ"=>"Ҭ","ҫ"=>"Ҫ","ҩ"=>"Ҩ","ҧ"=>"Ҧ","ҥ"=>"Ҥ","ң"=>"Ң","ҡ"=>"Ҡ","ҟ"=>"Ҟ", 1023 "ҝ"=>"Ҝ","қ"=>"Қ","ҙ"=>"Ҙ","җ"=>"Җ","ҕ"=>"Ҕ","ғ"=>"Ғ","ґ"=>"Ґ","ҏ"=>"Ҏ","ҍ"=>"Ҍ","ҋ"=>"Ҋ", 1024 "ҁ"=>"Ҁ","ѿ"=>"Ѿ","ѽ"=>"Ѽ","ѻ"=>"Ѻ","ѹ"=>"Ѹ","ѷ"=>"Ѷ","ѵ"=>"Ѵ","ѳ"=>"Ѳ","ѱ"=>"Ѱ","ѯ"=>"Ѯ", 1025 "ѭ"=>"Ѭ","ѫ"=>"Ѫ","ѩ"=>"Ѩ","ѧ"=>"Ѧ","ѥ"=>"Ѥ","ѣ"=>"Ѣ","ѡ"=>"Ѡ","џ"=>"Џ","ў"=>"Ў","ѝ"=>"Ѝ", 1026 "ќ"=>"Ќ","ћ"=>"Ћ","њ"=>"Њ","љ"=>"Љ","ј"=>"Ј","ї"=>"Ї","і"=>"І","ѕ"=>"Ѕ","є"=>"Є","ѓ"=>"Ѓ", 1027 "ђ"=>"Ђ","ё"=>"Ё","ѐ"=>"Ѐ","я"=>"Я","ю"=>"Ю","э"=>"Э","ь"=>"Ь","ы"=>"Ы","ъ"=>"Ъ","щ"=>"Щ", 1028 "ш"=>"Ш","ч"=>"Ч","ц"=>"Ц","х"=>"Х","ф"=>"Ф","у"=>"У","т"=>"Т","с"=>"С","р"=>"Р","п"=>"П", 1029 "о"=>"О","н"=>"Н","м"=>"М","л"=>"Л","к"=>"К","й"=>"Й","и"=>"И","з"=>"З","ж"=>"Ж","е"=>"Е", 1030 "д"=>"Д","г"=>"Г","в"=>"В","б"=>"Б","а"=>"А","ϵ"=>"Ε","ϲ"=>"Σ","ϱ"=>"Ρ","ϰ"=>"Κ","ϯ"=>"Ϯ", 1031 "ϭ"=>"Ϭ","ϫ"=>"Ϫ","ϩ"=>"Ϩ","ϧ"=>"Ϧ","ϥ"=>"Ϥ","ϣ"=>"Ϣ","ϡ"=>"Ϡ","ϟ"=>"Ϟ","ϝ"=>"Ϝ","ϛ"=>"Ϛ", 1032 "ϙ"=>"Ϙ","ϖ"=>"Π","ϕ"=>"Φ","ϑ"=>"Θ","ϐ"=>"Β","ώ"=>"Ώ","ύ"=>"Ύ","ό"=>"Ό","ϋ"=>"Ϋ","ϊ"=>"Ϊ", 1033 "ω"=>"Ω","ψ"=>"Ψ","χ"=>"Χ","φ"=>"Φ","υ"=>"Υ","τ"=>"Τ","σ"=>"Σ","ς"=>"Σ","ρ"=>"Ρ","π"=>"Π", 1034 "ο"=>"Ο","ξ"=>"Ξ","ν"=>"Ν","μ"=>"Μ","λ"=>"Λ","κ"=>"Κ","ι"=>"Ι","θ"=>"Θ","η"=>"Η","ζ"=>"Ζ", 1035 "ε"=>"Ε","δ"=>"Δ","γ"=>"Γ","β"=>"Β","α"=>"Α","ί"=>"Ί","ή"=>"Ή","έ"=>"Έ","ά"=>"Ά","ʒ"=>"Ʒ", 1036 "ʋ"=>"Ʋ","ʊ"=>"Ʊ","ʈ"=>"Ʈ","ʃ"=>"Ʃ","ʀ"=>"Ʀ","ɵ"=>"Ɵ","ɲ"=>"Ɲ","ɯ"=>"Ɯ","ɩ"=>"Ɩ","ɨ"=>"Ɨ", 1037 "ɣ"=>"Ɣ","ɛ"=>"Ɛ","ə"=>"Ə","ɗ"=>"Ɗ","ɖ"=>"Ɖ","ɔ"=>"Ɔ","ɓ"=>"Ɓ","ȳ"=>"Ȳ","ȱ"=>"Ȱ","ȯ"=>"Ȯ", 1038 "ȭ"=>"Ȭ","ȫ"=>"Ȫ","ȩ"=>"Ȩ","ȧ"=>"Ȧ","ȥ"=>"Ȥ","ȣ"=>"Ȣ","ȟ"=>"Ȟ","ȝ"=>"Ȝ","ț"=>"Ț","ș"=>"Ș", 1039 "ȗ"=>"Ȗ","ȕ"=>"Ȕ","ȓ"=>"Ȓ","ȑ"=>"Ȑ","ȏ"=>"Ȏ","ȍ"=>"Ȍ","ȋ"=>"Ȋ","ȉ"=>"Ȉ","ȇ"=>"Ȇ","ȅ"=>"Ȅ", 1040 "ȃ"=>"Ȃ","ȁ"=>"Ȁ","ǿ"=>"Ǿ","ǽ"=>"Ǽ","ǻ"=>"Ǻ","ǹ"=>"Ǹ","ǵ"=>"Ǵ","dz"=>"Dz","ǯ"=>"Ǯ","ǭ"=>"Ǭ", 1041 "ǫ"=>"Ǫ","ǩ"=>"Ǩ","ǧ"=>"Ǧ","ǥ"=>"Ǥ","ǣ"=>"Ǣ","ǡ"=>"Ǡ","ǟ"=>"Ǟ","ǝ"=>"Ǝ","ǜ"=>"Ǜ","ǚ"=>"Ǚ", 1042 "ǘ"=>"Ǘ","ǖ"=>"Ǖ","ǔ"=>"Ǔ","ǒ"=>"Ǒ","ǐ"=>"Ǐ","ǎ"=>"Ǎ","nj"=>"Nj","lj"=>"Lj","dž"=>"Dž","ƿ"=>"Ƿ", 1043 "ƽ"=>"Ƽ","ƹ"=>"Ƹ","ƶ"=>"Ƶ","ƴ"=>"Ƴ","ư"=>"Ư","ƭ"=>"Ƭ","ƨ"=>"Ƨ","ƥ"=>"Ƥ","ƣ"=>"Ƣ","ơ"=>"Ơ", 1044 "ƞ"=>"Ƞ","ƙ"=>"Ƙ","ƕ"=>"Ƕ","ƒ"=>"Ƒ","ƌ"=>"Ƌ","ƈ"=>"Ƈ","ƅ"=>"Ƅ","ƃ"=>"Ƃ","ſ"=>"S","ž"=>"Ž", 1045 "ż"=>"Ż","ź"=>"Ź","ŷ"=>"Ŷ","ŵ"=>"Ŵ","ų"=>"Ų","ű"=>"Ű","ů"=>"Ů","ŭ"=>"Ŭ","ū"=>"Ū","ũ"=>"Ũ", 1046 "ŧ"=>"Ŧ","ť"=>"Ť","ţ"=>"Ţ","š"=>"Š","ş"=>"Ş","ŝ"=>"Ŝ","ś"=>"Ś","ř"=>"Ř","ŗ"=>"Ŗ","ŕ"=>"Ŕ", 1047 "œ"=>"Œ","ő"=>"Ő","ŏ"=>"Ŏ","ō"=>"Ō","ŋ"=>"Ŋ","ň"=>"Ň","ņ"=>"Ņ","ń"=>"Ń","ł"=>"Ł","ŀ"=>"Ŀ", 1048 "ľ"=>"Ľ","ļ"=>"Ļ","ĺ"=>"Ĺ","ķ"=>"Ķ","ĵ"=>"Ĵ","ij"=>"IJ","ı"=>"I","į"=>"Į","ĭ"=>"Ĭ","ī"=>"Ī", 1049 "ĩ"=>"Ĩ","ħ"=>"Ħ","ĥ"=>"Ĥ","ģ"=>"Ģ","ġ"=>"Ġ","ğ"=>"Ğ","ĝ"=>"Ĝ","ě"=>"Ě","ę"=>"Ę","ė"=>"Ė", 1050 "ĕ"=>"Ĕ","ē"=>"Ē","đ"=>"Đ","ď"=>"Ď","č"=>"Č","ċ"=>"Ċ","ĉ"=>"Ĉ","ć"=>"Ć","ą"=>"Ą","ă"=>"Ă", 1051 "ā"=>"Ā","ÿ"=>"Ÿ","þ"=>"Þ","ý"=>"Ý","ü"=>"Ü","û"=>"Û","ú"=>"Ú","ù"=>"Ù","ø"=>"Ø","ö"=>"Ö", 1052 "õ"=>"Õ","ô"=>"Ô","ó"=>"Ó","ò"=>"Ò","ñ"=>"Ñ","ð"=>"Ð","ï"=>"Ï","î"=>"Î","í"=>"Í","ì"=>"Ì", 1053 "ë"=>"Ë","ê"=>"Ê","é"=>"É","è"=>"È","ç"=>"Ç","æ"=>"Æ","å"=>"Å","ä"=>"Ä","ã"=>"Ã","â"=>"Â", 1054 "á"=>"Á","à"=>"À","µ"=>"Μ","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T", 1055 "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J", 1056 "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A" 1057 ); 1058 1059 /** 1060 * UTF-8 Case lookup table 1061 * 1062 * This lookuptable defines the lower case letters to their corresponding 1063 * upper case letter in UTF-8 1064 * 1065 * @author Andreas Gohr <andi@splitbrain.org> 1066 */ 1067 global $UTF8_UPPER_TO_LOWER; 1068 if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array ( 1069 "Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t","S"=>"s","R"=>"r","Q"=>"q", 1070 "P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j","I"=>"i","H"=>"h","G"=>"g", 1071 "F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a","ῼ"=>"ῳ","Ῥ"=>"ῥ","Ῡ"=>"ῡ","Ῑ"=>"ῑ", 1072 "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ", 1073 "ᾫ"=>"ᾣ","ᾪ"=>"ᾢ","ᾩ"=>"ᾡ","ᾟ"=>"ᾗ","ᾞ"=>"ᾖ","ᾝ"=>"ᾕ","ᾜ"=>"ᾔ","ᾛ"=>"ᾓ","ᾚ"=>"ᾒ","ᾙ"=>"ᾑ", 1074 "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ", 1075 "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ", 1076 "Ὲ"=>"ὲ","Ά"=>"ά","Ὰ"=>"ὰ","Ὧ"=>"ὧ","Ὦ"=>"ὦ","Ὥ"=>"ὥ","Ὤ"=>"ὤ","Ὣ"=>"ὣ","Ὢ"=>"ὢ","Ὡ"=>"ὡ", 1077 "Ὗ"=>"ὗ","Ὕ"=>"ὕ","Ὓ"=>"ὓ","Ὑ"=>"ὑ","Ὅ"=>"ὅ","Ὄ"=>"ὄ","Ὃ"=>"ὃ","Ὂ"=>"ὂ","Ὁ"=>"ὁ","Ὀ"=>"ὀ", 1078 "Ἷ"=>"ἷ","Ἶ"=>"ἶ","Ἵ"=>"ἵ","Ἴ"=>"ἴ","Ἳ"=>"ἳ","Ἲ"=>"ἲ","Ἱ"=>"ἱ","Ἰ"=>"ἰ","Ἧ"=>"ἧ","Ἦ"=>"ἦ", 1079 "Ἥ"=>"ἥ","Ἤ"=>"ἤ","Ἣ"=>"ἣ","Ἢ"=>"ἢ","Ἡ"=>"ἡ","Ἕ"=>"ἕ","Ἔ"=>"ἔ","Ἓ"=>"ἓ","Ἒ"=>"ἒ","Ἑ"=>"ἑ", 1080 "Ἐ"=>"ἐ","Ἇ"=>"ἇ","Ἆ"=>"ἆ","Ἅ"=>"ἅ","Ἄ"=>"ἄ","Ἃ"=>"ἃ","Ἂ"=>"ἂ","Ἁ"=>"ἁ","Ἀ"=>"ἀ","Ỹ"=>"ỹ", 1081 "Ỷ"=>"ỷ","Ỵ"=>"ỵ","Ỳ"=>"ỳ","Ự"=>"ự","Ữ"=>"ữ","Ử"=>"ử","Ừ"=>"ừ","Ứ"=>"ứ","Ủ"=>"ủ","Ụ"=>"ụ", 1082 "Ợ"=>"ợ","Ỡ"=>"ỡ","Ở"=>"ở","Ờ"=>"ờ","Ớ"=>"ớ","Ộ"=>"ộ","Ỗ"=>"ỗ","Ổ"=>"ổ","Ồ"=>"ồ","Ố"=>"ố", 1083 "Ỏ"=>"ỏ","Ọ"=>"ọ","Ị"=>"ị","Ỉ"=>"ỉ","Ệ"=>"ệ","Ễ"=>"ễ","Ể"=>"ể","Ề"=>"ề","Ế"=>"ế","Ẽ"=>"ẽ", 1084 "Ẻ"=>"ẻ","Ẹ"=>"ẹ","Ặ"=>"ặ","Ẵ"=>"ẵ","Ẳ"=>"ẳ","Ằ"=>"ằ","Ắ"=>"ắ","Ậ"=>"ậ","Ẫ"=>"ẫ","Ẩ"=>"ẩ", 1085 "Ầ"=>"ầ","Ấ"=>"ấ","Ả"=>"ả","Ạ"=>"ạ","Ṡ"=>"ẛ","Ẕ"=>"ẕ","Ẓ"=>"ẓ","Ẑ"=>"ẑ","Ẏ"=>"ẏ","Ẍ"=>"ẍ", 1086 "Ẋ"=>"ẋ","Ẉ"=>"ẉ","Ẇ"=>"ẇ","Ẅ"=>"ẅ","Ẃ"=>"ẃ","Ẁ"=>"ẁ","Ṿ"=>"ṿ","Ṽ"=>"ṽ","Ṻ"=>"ṻ","Ṹ"=>"ṹ", 1087 "Ṷ"=>"ṷ","Ṵ"=>"ṵ","Ṳ"=>"ṳ","Ṱ"=>"ṱ","Ṯ"=>"ṯ","Ṭ"=>"ṭ","Ṫ"=>"ṫ","Ṩ"=>"ṩ","Ṧ"=>"ṧ","Ṥ"=>"ṥ", 1088 "Ṣ"=>"ṣ","Ṡ"=>"ṡ","Ṟ"=>"ṟ","Ṝ"=>"ṝ","Ṛ"=>"ṛ","Ṙ"=>"ṙ","Ṗ"=>"ṗ","Ṕ"=>"ṕ","Ṓ"=>"ṓ","Ṑ"=>"ṑ", 1089 "Ṏ"=>"ṏ","Ṍ"=>"ṍ","Ṋ"=>"ṋ","Ṉ"=>"ṉ","Ṇ"=>"ṇ","Ṅ"=>"ṅ","Ṃ"=>"ṃ","Ṁ"=>"ṁ","Ḿ"=>"ḿ","Ḽ"=>"ḽ", 1090 "Ḻ"=>"ḻ","Ḹ"=>"ḹ","Ḷ"=>"ḷ","Ḵ"=>"ḵ","Ḳ"=>"ḳ","Ḱ"=>"ḱ","Ḯ"=>"ḯ","Ḭ"=>"ḭ","Ḫ"=>"ḫ","Ḩ"=>"ḩ", 1091 "Ḧ"=>"ḧ","Ḥ"=>"ḥ","Ḣ"=>"ḣ","Ḡ"=>"ḡ","Ḟ"=>"ḟ","Ḝ"=>"ḝ","Ḛ"=>"ḛ","Ḙ"=>"ḙ","Ḗ"=>"ḗ","Ḕ"=>"ḕ", 1092 "Ḓ"=>"ḓ","Ḑ"=>"ḑ","Ḏ"=>"ḏ","Ḍ"=>"ḍ","Ḋ"=>"ḋ","Ḉ"=>"ḉ","Ḇ"=>"ḇ","Ḅ"=>"ḅ","Ḃ"=>"ḃ","Ḁ"=>"ḁ", 1093 "Ֆ"=>"ֆ","Օ"=>"օ","Ք"=>"ք","Փ"=>"փ","Ւ"=>"ւ","Ց"=>"ց","Ր"=>"ր","Տ"=>"տ","Վ"=>"վ","Ս"=>"ս", 1094 "Ռ"=>"ռ","Ջ"=>"ջ","Պ"=>"պ","Չ"=>"չ","Ո"=>"ո","Շ"=>"շ","Ն"=>"ն","Յ"=>"յ","Մ"=>"մ","Ճ"=>"ճ", 1095 "Ղ"=>"ղ","Ձ"=>"ձ","Հ"=>"հ","Կ"=>"կ","Ծ"=>"ծ","Խ"=>"խ","Լ"=>"լ","Ի"=>"ի","Ժ"=>"ժ","Թ"=>"թ", 1096 "Ը"=>"ը","Է"=>"է","Զ"=>"զ","Ե"=>"ե","Դ"=>"դ","Գ"=>"գ","Բ"=>"բ","Ա"=>"ա","Ԏ"=>"ԏ","Ԍ"=>"ԍ", 1097 "Ԋ"=>"ԋ","Ԉ"=>"ԉ","Ԇ"=>"ԇ","Ԅ"=>"ԅ","Ԃ"=>"ԃ","Ԁ"=>"ԁ","Ӹ"=>"ӹ","Ӵ"=>"ӵ","Ӳ"=>"ӳ","Ӱ"=>"ӱ", 1098 "Ӯ"=>"ӯ","Ӭ"=>"ӭ","Ӫ"=>"ӫ","Ө"=>"ө","Ӧ"=>"ӧ","Ӥ"=>"ӥ","Ӣ"=>"ӣ","Ӡ"=>"ӡ","Ӟ"=>"ӟ","Ӝ"=>"ӝ", 1099 "Ӛ"=>"ӛ","Ә"=>"ә","Ӗ"=>"ӗ","Ӕ"=>"ӕ","Ӓ"=>"ӓ","Ӑ"=>"ӑ","Ӎ"=>"ӎ","Ӌ"=>"ӌ","Ӊ"=>"ӊ","Ӈ"=>"ӈ", 1100 "Ӆ"=>"ӆ","Ӄ"=>"ӄ","Ӂ"=>"ӂ","Ҿ"=>"ҿ","Ҽ"=>"ҽ","Һ"=>"һ","Ҹ"=>"ҹ","Ҷ"=>"ҷ","Ҵ"=>"ҵ","Ҳ"=>"ҳ", 1101 "Ұ"=>"ұ","Ү"=>"ү","Ҭ"=>"ҭ","Ҫ"=>"ҫ","Ҩ"=>"ҩ","Ҧ"=>"ҧ","Ҥ"=>"ҥ","Ң"=>"ң","Ҡ"=>"ҡ","Ҟ"=>"ҟ", 1102 "Ҝ"=>"ҝ","Қ"=>"қ","Ҙ"=>"ҙ","Җ"=>"җ","Ҕ"=>"ҕ","Ғ"=>"ғ","Ґ"=>"ґ","Ҏ"=>"ҏ","Ҍ"=>"ҍ","Ҋ"=>"ҋ", 1103 "Ҁ"=>"ҁ","Ѿ"=>"ѿ","Ѽ"=>"ѽ","Ѻ"=>"ѻ","Ѹ"=>"ѹ","Ѷ"=>"ѷ","Ѵ"=>"ѵ","Ѳ"=>"ѳ","Ѱ"=>"ѱ","Ѯ"=>"ѯ", 1104 "Ѭ"=>"ѭ","Ѫ"=>"ѫ","Ѩ"=>"ѩ","Ѧ"=>"ѧ","Ѥ"=>"ѥ","Ѣ"=>"ѣ","Ѡ"=>"ѡ","Џ"=>"џ","Ў"=>"ў","Ѝ"=>"ѝ", 1105 "Ќ"=>"ќ","Ћ"=>"ћ","Њ"=>"њ","Љ"=>"љ","Ј"=>"ј","Ї"=>"ї","І"=>"і","Ѕ"=>"ѕ","Є"=>"є","Ѓ"=>"ѓ", 1106 "Ђ"=>"ђ","Ё"=>"ё","Ѐ"=>"ѐ","Я"=>"я","Ю"=>"ю","Э"=>"э","Ь"=>"ь","Ы"=>"ы","Ъ"=>"ъ","Щ"=>"щ", 1107 "Ш"=>"ш","Ч"=>"ч","Ц"=>"ц","Х"=>"х","Ф"=>"ф","У"=>"у","Т"=>"т","С"=>"с","Р"=>"р","П"=>"п", 1108 "О"=>"о","Н"=>"н","М"=>"м","Л"=>"л","К"=>"к","Й"=>"й","И"=>"и","З"=>"з","Ж"=>"ж","Е"=>"е", 1109 "Д"=>"д","Г"=>"г","В"=>"в","Б"=>"б","А"=>"а","Ε"=>"ϵ","Σ"=>"ϲ","Ρ"=>"ϱ","Κ"=>"ϰ","Ϯ"=>"ϯ", 1110 "Ϭ"=>"ϭ","Ϫ"=>"ϫ","Ϩ"=>"ϩ","Ϧ"=>"ϧ","Ϥ"=>"ϥ","Ϣ"=>"ϣ","Ϡ"=>"ϡ","Ϟ"=>"ϟ","Ϝ"=>"ϝ","Ϛ"=>"ϛ", 1111 "Ϙ"=>"ϙ","Π"=>"ϖ","Φ"=>"ϕ","Θ"=>"ϑ","Β"=>"ϐ","Ώ"=>"ώ","Ύ"=>"ύ","Ό"=>"ό","Ϋ"=>"ϋ","Ϊ"=>"ϊ", 1112 "Ω"=>"ω","Ψ"=>"ψ","Χ"=>"χ","Φ"=>"φ","Υ"=>"υ","Τ"=>"τ","Σ"=>"σ","Σ"=>"ς","Ρ"=>"ρ","Π"=>"π", 1113 "Ο"=>"ο","Ξ"=>"ξ","Ν"=>"ν","Μ"=>"μ","Λ"=>"λ","Κ"=>"κ","Ι"=>"ι","Θ"=>"θ","Η"=>"η","Ζ"=>"ζ", 1114 "Ε"=>"ε","Δ"=>"δ","Γ"=>"γ","Β"=>"β","Α"=>"α","Ί"=>"ί","Ή"=>"ή","Έ"=>"έ","Ά"=>"ά","Ʒ"=>"ʒ", 1115 "Ʋ"=>"ʋ","Ʊ"=>"ʊ","Ʈ"=>"ʈ","Ʃ"=>"ʃ","Ʀ"=>"ʀ","Ɵ"=>"ɵ","Ɲ"=>"ɲ","Ɯ"=>"ɯ","Ɩ"=>"ɩ","Ɨ"=>"ɨ", 1116 "Ɣ"=>"ɣ","Ɛ"=>"ɛ","Ə"=>"ə","Ɗ"=>"ɗ","Ɖ"=>"ɖ","Ɔ"=>"ɔ","Ɓ"=>"ɓ","Ȳ"=>"ȳ","Ȱ"=>"ȱ","Ȯ"=>"ȯ", 1117 "Ȭ"=>"ȭ","Ȫ"=>"ȫ","Ȩ"=>"ȩ","Ȧ"=>"ȧ","Ȥ"=>"ȥ","Ȣ"=>"ȣ","Ȟ"=>"ȟ","Ȝ"=>"ȝ","Ț"=>"ț","Ș"=>"ș", 1118 "Ȗ"=>"ȗ","Ȕ"=>"ȕ","Ȓ"=>"ȓ","Ȑ"=>"ȑ","Ȏ"=>"ȏ","Ȍ"=>"ȍ","Ȋ"=>"ȋ","Ȉ"=>"ȉ","Ȇ"=>"ȇ","Ȅ"=>"ȅ", 1119 "Ȃ"=>"ȃ","Ȁ"=>"ȁ","Ǿ"=>"ǿ","Ǽ"=>"ǽ","Ǻ"=>"ǻ","Ǹ"=>"ǹ","Ǵ"=>"ǵ","Dz"=>"dz","Ǯ"=>"ǯ","Ǭ"=>"ǭ", 1120 "Ǫ"=>"ǫ","Ǩ"=>"ǩ","Ǧ"=>"ǧ","Ǥ"=>"ǥ","Ǣ"=>"ǣ","Ǡ"=>"ǡ","Ǟ"=>"ǟ","Ǝ"=>"ǝ","Ǜ"=>"ǜ","Ǚ"=>"ǚ", 1121 "Ǘ"=>"ǘ","Ǖ"=>"ǖ","Ǔ"=>"ǔ","Ǒ"=>"ǒ","Ǐ"=>"ǐ","Ǎ"=>"ǎ","Nj"=>"nj","Lj"=>"lj","Dž"=>"dž","Ƿ"=>"ƿ", 1122 "Ƽ"=>"ƽ","Ƹ"=>"ƹ","Ƶ"=>"ƶ","Ƴ"=>"ƴ","Ư"=>"ư","Ƭ"=>"ƭ","Ƨ"=>"ƨ","Ƥ"=>"ƥ","Ƣ"=>"ƣ","Ơ"=>"ơ", 1123 "Ƞ"=>"ƞ","Ƙ"=>"ƙ","Ƕ"=>"ƕ","Ƒ"=>"ƒ","Ƌ"=>"ƌ","Ƈ"=>"ƈ","Ƅ"=>"ƅ","Ƃ"=>"ƃ","S"=>"ſ","Ž"=>"ž", 1124 "Ż"=>"ż","Ź"=>"ź","Ŷ"=>"ŷ","Ŵ"=>"ŵ","Ų"=>"ų","Ű"=>"ű","Ů"=>"ů","Ŭ"=>"ŭ","Ū"=>"ū","Ũ"=>"ũ", 1125 "Ŧ"=>"ŧ","Ť"=>"ť","Ţ"=>"ţ","Š"=>"š","Ş"=>"ş","Ŝ"=>"ŝ","Ś"=>"ś","Ř"=>"ř","Ŗ"=>"ŗ","Ŕ"=>"ŕ", 1126 "Œ"=>"œ","Ő"=>"ő","Ŏ"=>"ŏ","Ō"=>"ō","Ŋ"=>"ŋ","Ň"=>"ň","Ņ"=>"ņ","Ń"=>"ń","Ł"=>"ł","Ŀ"=>"ŀ", 1127 "Ľ"=>"ľ","Ļ"=>"ļ","Ĺ"=>"ĺ","Ķ"=>"ķ","Ĵ"=>"ĵ","IJ"=>"ij","I"=>"ı","Į"=>"į","Ĭ"=>"ĭ","Ī"=>"ī", 1128 "Ĩ"=>"ĩ","Ħ"=>"ħ","Ĥ"=>"ĥ","Ģ"=>"ģ","Ġ"=>"ġ","Ğ"=>"ğ","Ĝ"=>"ĝ","Ě"=>"ě","Ę"=>"ę","Ė"=>"ė", 1129 "Ĕ"=>"ĕ","Ē"=>"ē","Đ"=>"đ","Ď"=>"ď","Č"=>"č","Ċ"=>"ċ","Ĉ"=>"ĉ","Ć"=>"ć","Ą"=>"ą","Ă"=>"ă", 1130 "Ā"=>"ā","Ÿ"=>"ÿ","Þ"=>"þ","Ý"=>"ý","Ü"=>"ü","Û"=>"û","Ú"=>"ú","Ù"=>"ù","Ø"=>"ø","Ö"=>"ö", 1131 "Õ"=>"õ","Ô"=>"ô","Ó"=>"ó","Ò"=>"ò","Ñ"=>"ñ","Ð"=>"ð","Ï"=>"ï","Î"=>"î","Í"=>"í","Ì"=>"ì", 1132 "Ë"=>"ë","Ê"=>"ê","É"=>"é","È"=>"è","Ç"=>"ç","Æ"=>"æ","Å"=>"å","Ä"=>"ä","Ã"=>"ã","Â"=>"â", 1133 "Á"=>"á","À"=>"à","Μ"=>"µ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t", 1134 "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j", 1135 "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a" 1136 ); 1137}; // end of case lookup tables 1138 1139/** 1140 * UTF-8 lookup table for lower case accented letters 1141 * 1142 * This lookuptable defines replacements for accented characters from the ASCII-7 1143 * range. This are lower case letters only. 1144 * 1145 * @author Andreas Gohr <andi@splitbrain.org> 1146 * @see utf8_deaccent() 1147 */ 1148global $UTF8_LOWER_ACCENTS; 1149if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array( 1150 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 1151 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 1152 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 1153 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 1154 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 1155 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 1156 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 1157 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 1158 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 1159 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 1160 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 1161 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 1162 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 1163 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 1164 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', 1165); 1166 1167/** 1168 * UTF-8 lookup table for upper case accented letters 1169 * 1170 * This lookuptable defines replacements for accented characters from the ASCII-7 1171 * range. This are upper case letters only. 1172 * 1173 * @author Andreas Gohr <andi@splitbrain.org> 1174 * @see utf8_deaccent() 1175 */ 1176global $UTF8_UPPER_ACCENTS; 1177if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array( 1178 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', 1179 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 1180 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', 1181 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', 1182 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', 1183 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', 1184 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', 1185 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', 1186 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 1187 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', 1188 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', 1189 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 1190 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 1191 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', 1192 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', 1193); 1194 1195/** 1196 * UTF-8 array of common special characters 1197 * 1198 * This array should contain all special characters (not a letter or digit) 1199 * defined in the various local charsets - it's not a complete list of non-alphanum 1200 * characters in UTF-8. It's not perfect but should match most cases of special 1201 * chars. 1202 * 1203 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 1204 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 1205 * 1206 * @author Andreas Gohr <andi@splitbrain.org> 1207 * @see utf8_stripspecials() 1208 */ 1209global $UTF8_SPECIAL_CHARS; 1210if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array( 1211 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 1212 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 1213 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 1214 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 1215 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 1216 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 1217 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 1218 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 1219 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 1220 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 1221 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 1222 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 1223 0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 1224 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 1225 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 1226 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 1227 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 1228 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 1229 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 1230 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 1231 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 1232 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 1233 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 1234 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 1235 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 1236 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 1237 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 1238 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 1239 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 1240 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 1241 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 1242 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 1243 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 1244 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 1245 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 1246 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 1247 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 1248 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 1249 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 1250 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 1251 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 1252 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 1253 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 1254 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 1255 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 1256 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 1257 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017, 1258 0x3018, 0x3019, 0x301a, 0x301b, 0x3036, 1259 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 1260 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 1261 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 1262 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 1263 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 1264 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 1265 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c, 1266 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b, 1267 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 1268 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea, 1269 0xffeb, 0xffec, 0xffed, 0xffee, 1270 0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703, 1271 0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b, 1272 0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713, 1273 0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b, 1274 0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf, 1275); 1276 1277// utf8 version of above data 1278global $UTF8_SPECIAL_CHARS2; 1279if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 = 1280 "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~ �'. 1281 '� ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½�'. 1282 '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. 1283 '�ׁׂ׃׳״،؛؟ـًٌٍَُِّْ٪฿–—―‗‘’‚“”�'. 1284 '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'. 1285 '⇐⇑⇒⇓⇔∀∂∃∅∆∇∈∉∋∏∑−∕∗∙√∝∞∠∧∨�'. 1286 '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'. 1287 '��┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠'. 1288 '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌▐░▒▓■▲▼◆◊●�'. 1289 '�★☎☛☞♠♣♥♦✁✂✃✄✆✇✈✉✌✍✎✏✐✑✒✓✔✕�'. 1290 '��✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✩✪✫✬✭✮✯✰✱'. 1291 '✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋�'. 1292 '�❏❐❑❒❖❘❙❚❛❜❝❞❡❢❣❤❥❦❧❿➉➓➔➘➙➚�'. 1293 '��➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶'. 1294 '➷➸➹➺➻➼➽➾'. 1295 ' 、。〃〈〉《》「」『』【】〒〔〕〖〗〘〙〚〛〶'. 1296 '�'. 1297 '�ﹼﹽ'. 1298 '!"#$%&'()*+,-./:;<=>?@[\]^`{|}~'. 1299 '⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○'. 1300 ''. 1301 ' '; 1302 1303/** 1304 * Romanization lookup table 1305 * 1306 * This lookup tables provides a way to transform strings written in a language 1307 * different from the ones based upon latin letters into plain ASCII. 1308 * 1309 * Please note: this is not a scientific transliteration table. It only works 1310 * oneway from nonlatin to ASCII and it works by simple character replacement 1311 * only. Specialities of each language are not supported. 1312 * 1313 * @author Andreas Gohr <andi@splitbrain.org> 1314 * @author Vitaly Blokhin <vitinfo@vitn.com> 1315 * @link http://www.uconv.com/translit.htm 1316 * @author Bisqwit <bisqwit@iki.fi> 1317 * @link http://kanjidict.stc.cx/hiragana.php?src=2 1318 * @link http://www.translatum.gr/converter/greek-transliteration.htm 1319 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 1320 * @link http://www.btranslations.com/resources/romanization/korean.asp 1321 * @author Arthit Suriyawongkul <arthit@gmail.com> 1322 * @author Denis Scheither <amorphis@uni-bremen.de> 1323 * @author Eivind Morland <eivind.morland@gmail.com> 1324 */ 1325global $UTF8_ROMANIZATION; 1326if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array( 1327 // scandinavian - differs from what we do in deaccent 1328 'å'=>'a','Å'=>'A','ä'=>'a','Ä'=>'A','ö'=>'o','Ö'=>'O', 1329 1330 //russian cyrillic 1331 'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G', 1332 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh', 1333 'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K', 1334 'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Н'=>'N','о'=>'o','О'=>'O', 1335 'п'=>'p','П'=>'P','р'=>'r','Р'=>'R','с'=>'s','С'=>'S','т'=>'t','Т'=>'T', 1336 'у'=>'u','У'=>'U','ф'=>'f','Ф'=>'F','х'=>'x','Х'=>'X','ц'=>'c','Ц'=>'C', 1337 'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'sch','Щ'=>'Sch','ъ'=>'', 1338 'Ъ'=>'','ы'=>'y','Ы'=>'Y','ь'=>'','Ь'=>'','э'=>'eh','Э'=>'Eh','ю'=>'ju', 1339 'Ю'=>'Ju','я'=>'ja','Я'=>'Ja', 1340 // Ukrainian cyrillic 1341 'Ґ'=>'Gh','ґ'=>'gh','Є'=>'Je','є'=>'je','І'=>'I','і'=>'i','Ї'=>'Ji','ї'=>'ji', 1342 // Georgian 1343 'ა'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th', 1344 'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','ო'=>'o','პ'=>'p','ჟ'=>'zh', 1345 'რ'=>'r','ს'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ქ'=>'kh','ღ'=>'gh','ყ'=>'q', 1346 'შ'=>'sh','ჩ'=>'ch','ც'=>'c','ძ'=>'dh','წ'=>'w','ჭ'=>'j','ხ'=>'x','ჯ'=>'jh', 1347 'ჰ'=>'xh', 1348 //Sanskrit 1349 'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry', 1350 'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','ए'=>'e','ऐ'=>'ay','ओ'=>'o','औ'=>'aw', 1351 'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh', 1352 'च'=>'c','छ'=>'ch','ज'=>'j','झ'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh', 1353 'ड'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','थ'=>'th','द'=>'d','ध'=>'dh', 1354 'न'=>'n','प'=>'p','फ'=>'ph','ब'=>'b','भ'=>'bh','म'=>'m','य'=>'z','र'=>'r', 1355 'ल'=>'l','व'=>'v','श'=>'sh','ष'=>'sqh','स'=>'s','ह'=>'x', 1356 //Sanskrit diacritics 1357 'Ā'=>'A','Ī'=>'I','Ū'=>'U','Ṛ'=>'R','Ṝ'=>'R','Ṅ'=>'N','Ñ'=>'N','Ṭ'=>'T', 1358 'Ḍ'=>'D','Ṇ'=>'N','Ś'=>'S','Ṣ'=>'S','Ṁ'=>'M','Ṃ'=>'M','Ḥ'=>'H','Ḷ'=>'L','Ḹ'=>'L', 1359 'ā'=>'a','ī'=>'i','ū'=>'u','ṛ'=>'r','ṝ'=>'r','ṅ'=>'n','ñ'=>'n','ṭ'=>'t', 1360 'ḍ'=>'d','ṇ'=>'n','ś'=>'s','ṣ'=>'s','ṁ'=>'m','ṃ'=>'m','ḥ'=>'h','ḷ'=>'l','ḹ'=>'l', 1361 //Hebrew 1362 'א'=>'a', 'ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'kh','ט'=>'th', 1363 'י'=>'y','ך'=>'h','כ'=>'k','ל'=>'l','ם'=>'m','מ'=>'m','ן'=>'n','נ'=>'n', 1364 'ס'=>'s','ע'=>'ah','ף'=>'f','פ'=>'p','ץ'=>'c','צ'=>'c','ק'=>'q','ר'=>'r', 1365 'ש'=>'sh','ת'=>'t', 1366 //Arabic 1367 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'g','ح'=>'xh','خ'=>'x','د'=>'d', 1368 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s\'','ض'=>'d\'', 1369 'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k', 1370 'ل'=>'l','م'=>'m','ن'=>'n','ه'=>'x\'','و'=>'u','ي'=>'i', 1371 1372 // Japanese characters (last update: 2008-05-09) 1373 1374 // Japanese hiragana 1375 1376 // 3 character syllables, っ doubles the consonant after 1377 'っちゃ'=>'ccha','っちぇ'=>'cche','っちょ'=>'ccho','っちゅ'=>'cchu', 1378 'っびゃ'=>'bbya','っびぇ'=>'bbye','っびぃ'=>'bbyi','っびょ'=>'bbyo','っびゅ'=>'bbyu', 1379 'っぴゃ'=>'ppya','っぴぇ'=>'ppye','っぴぃ'=>'ppyi','っぴょ'=>'ppyo','っぴゅ'=>'ppyu', 1380 'っちゃ'=>'ccha','っちぇ'=>'cche','っち'=>'cchi','っちょ'=>'ccho','っちゅ'=>'cchu', 1381 // 'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu', 1382 'っきゃ'=>'kkya','っきぇ'=>'kkye','っきぃ'=>'kkyi','っきょ'=>'kkyo','っきゅ'=>'kkyu', 1383 'っぎゃ'=>'ggya','っぎぇ'=>'ggye','っぎぃ'=>'ggyi','っぎょ'=>'ggyo','っぎゅ'=>'ggyu', 1384 'っみゃ'=>'mmya','っみぇ'=>'mmye','っみぃ'=>'mmyi','っみょ'=>'mmyo','っみゅ'=>'mmyu', 1385 'っにゃ'=>'nnya','っにぇ'=>'nnye','っにぃ'=>'nnyi','っにょ'=>'nnyo','っにゅ'=>'nnyu', 1386 'っりゃ'=>'rrya','っりぇ'=>'rrye','っりぃ'=>'rryi','っりょ'=>'rryo','っりゅ'=>'rryu', 1387 'っしゃ'=>'ssha','っしぇ'=>'sshe','っし'=>'sshi','っしょ'=>'ssho','っしゅ'=>'sshu', 1388 1389 // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway) 1390 'んあ'=>'n_a','んえ'=>'n_e','んい'=>'n_i','んお'=>'n_o','んう'=>'n_u', 1391 'んや'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu', 1392 1393 // 2 character syllables - normal 1394 'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo', 1395 'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu', 1396 'ひゃ'=>'hya','ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu', 1397 'びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu', 1398 'ぴゃ'=>'pya','ぴぇ'=>'pye','ぴぃ'=>'pyi','ぴょ'=>'pyo','ぴゅ'=>'pyu', 1399 'きゃ'=>'kya','きぇ'=>'kye','きぃ'=>'kyi','きょ'=>'kyo','きゅ'=>'kyu', 1400 'ぎゃ'=>'gya','ぎぇ'=>'gye','ぎぃ'=>'gyi','ぎょ'=>'gyo','ぎゅ'=>'gyu', 1401 'みゃ'=>'mya','みぇ'=>'mye','みぃ'=>'myi','みょ'=>'myo','みゅ'=>'myu', 1402 'にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu', 1403 'りゃ'=>'rya','りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu', 1404 'しゃ'=>'sha','しぇ'=>'she','し'=>'shi','しょ'=>'sho','しゅ'=>'shu', 1405 'じゃ'=>'ja','じぇ'=>'je','じょ'=>'jo','じゅ'=>'ju', 1406 'うぇ'=>'we','うぃ'=>'wi', 1407 'いぇ'=>'ye', 1408 1409 // 2 character syllables, っ doubles the consonant after 1410 'っば'=>'bba','っべ'=>'bbe','っび'=>'bbi','っぼ'=>'bbo','っぶ'=>'bbu', 1411 'っぱ'=>'ppa','っぺ'=>'ppe','っぴ'=>'ppi','っぽ'=>'ppo','っぷ'=>'ppu', 1412 'った'=>'tta','って'=>'tte','っち'=>'cchi','っと'=>'tto','っつ'=>'ttsu', 1413 'っだ'=>'dda','っで'=>'dde','っぢ'=>'ddi','っど'=>'ddo','っづ'=>'ddu', 1414 'っが'=>'gga','っげ'=>'gge','っぎ'=>'ggi','っご'=>'ggo','っぐ'=>'ggu', 1415 'っか'=>'kka','っけ'=>'kke','っき'=>'kki','っこ'=>'kko','っく'=>'kku', 1416 'っま'=>'mma','っめ'=>'mme','っみ'=>'mmi','っも'=>'mmo','っむ'=>'mmu', 1417 'っな'=>'nna','っね'=>'nne','っに'=>'nni','っの'=>'nno','っぬ'=>'nnu', 1418 'っら'=>'rra','っれ'=>'rre','っり'=>'rri','っろ'=>'rro','っる'=>'rru', 1419 'っさ'=>'ssa','っせ'=>'sse','っし'=>'sshi','っそ'=>'sso','っす'=>'ssu', 1420 'っざ'=>'zza','っぜ'=>'zze','っじ'=>'jji','っぞ'=>'zzo','っず'=>'zzu', 1421 1422 // 1 character syllabels 1423 'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ん'=>'n', 1424 'は'=>'ha','へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'fu', 1425 'ば'=>'ba','べ'=>'be','び'=>'bi','ぼ'=>'bo','ぶ'=>'bu', 1426 'ぱ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po','ぷ'=>'pu', 1427 'た'=>'ta','て'=>'te','ち'=>'chi','と'=>'to','つ'=>'tsu', 1428 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1429 'が'=>'ga','げ'=>'ge','ぎ'=>'gi','ご'=>'go','ぐ'=>'gu', 1430 'か'=>'ka','け'=>'ke','き'=>'ki','こ'=>'ko','く'=>'ku', 1431 'ま'=>'ma','め'=>'me','み'=>'mi','も'=>'mo','む'=>'mu', 1432 'な'=>'na','ね'=>'ne','に'=>'ni','の'=>'no','ぬ'=>'nu', 1433 'ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru', 1434 'さ'=>'sa','せ'=>'se','し'=>'shi','そ'=>'so','す'=>'su', 1435 'わ'=>'wa','を'=>'wo', 1436 'ざ'=>'za','ぜ'=>'ze','じ'=>'ji','ぞ'=>'zo','ず'=>'zu', 1437 'や'=>'ya','よ'=>'yo','ゆ'=>'yu', 1438 // old characters 1439 'ゑ'=>'we','ゐ'=>'wi', 1440 1441 // convert what's left (probably only kicks in when something's missing above) 1442 // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u', 1443 // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu', 1444 1445 // never seen one of those (disabled for the moment) 1446 // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu', 1447 // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu', 1448 // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu', 1449 // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu', 1450 // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu', 1451 // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu', 1452 // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu', 1453 // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu', 1454 // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu', 1455 // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu', 1456 // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu', 1457 // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu', 1458 // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu', 1459 // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu', 1460 1461 // 'spare' characters from other romanization systems 1462 // 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1463 // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu', 1464 // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su', 1465 // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu', 1466 //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu', 1467 //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu', 1468 //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu', 1469 //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu', 1470 //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi', 1471 //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju', 1472 1473 1474 // Japanese katakana 1475 1476 // 4 character syllables: ッ doubles the consonant after, ー doubles the vowel before (usualy written with macron, but we don't want that in our URLs) 1477 'ッビャー'=>'bbyaa','ッビェー'=>'bbyee','ッビィー'=>'bbyii','ッビョー'=>'bbyoo','ッビュー'=>'bbyuu', 1478 'ッピャー'=>'ppyaa','ッピェー'=>'ppyee','ッピィー'=>'ppyii','ッピョー'=>'ppyoo','ッピュー'=>'ppyuu', 1479 'ッキャー'=>'kkyaa','ッキェー'=>'kkyee','ッキィー'=>'kkyii','ッキョー'=>'kkyoo','ッキュー'=>'kkyuu', 1480 'ッギャー'=>'ggyaa','ッギェー'=>'ggyee','ッギィー'=>'ggyii','ッギョー'=>'ggyoo','ッギュー'=>'ggyuu', 1481 'ッミャー'=>'mmyaa','ッミェー'=>'mmyee','ッミィー'=>'mmyii','ッミョー'=>'mmyoo','ッミュー'=>'mmyuu', 1482 'ッニャー'=>'nnyaa','ッニェー'=>'nnyee','ッニィー'=>'nnyii','ッニョー'=>'nnyoo','ッニュー'=>'nnyuu', 1483 'ッリャー'=>'rryaa','ッリェー'=>'rryee','ッリィー'=>'rryii','ッリョー'=>'rryoo','ッリュー'=>'rryuu', 1484 'ッシャー'=>'sshaa','ッシェー'=>'sshee','ッシー'=>'sshii','ッショー'=>'sshoo','ッシュー'=>'sshuu', 1485 'ッチャー'=>'cchaa','ッチェー'=>'cchee','ッチー'=>'cchii','ッチョー'=>'cchoo','ッチュー'=>'cchuu', 1486 'ッティー'=>'ttii', 1487 'ッヂィー'=>'ddii', 1488 1489 // 3 character syllables - doubled vowels 1490 'ファー'=>'faa','フェー'=>'fee','フィー'=>'fii','フォー'=>'foo', 1491 'フャー'=>'fyaa','フェー'=>'fyee','フィー'=>'fyii','フョー'=>'fyoo','フュー'=>'fyuu', 1492 'ヒャー'=>'hyaa','ヒェー'=>'hyee','ヒィー'=>'hyii','ヒョー'=>'hyoo','ヒュー'=>'hyuu', 1493 'ビャー'=>'byaa','ビェー'=>'byee','ビィー'=>'byii','ビョー'=>'byoo','ビュー'=>'byuu', 1494 'ピャー'=>'pyaa','ピェー'=>'pyee','ピィー'=>'pyii','ピョー'=>'pyoo','ピュー'=>'pyuu', 1495 'キャー'=>'kyaa','キェー'=>'kyee','キィー'=>'kyii','キョー'=>'kyoo','キュー'=>'kyuu', 1496 'ギャー'=>'gyaa','ギェー'=>'gyee','ギィー'=>'gyii','ギョー'=>'gyoo','ギュー'=>'gyuu', 1497 'ミャー'=>'myaa','ミェー'=>'myee','ミィー'=>'myii','ミョー'=>'myoo','ミュー'=>'myuu', 1498 'ニャー'=>'nyaa','ニェー'=>'nyee','ニィー'=>'nyii','ニョー'=>'nyoo','ニュー'=>'nyuu', 1499 'リャー'=>'ryaa','リェー'=>'ryee','リィー'=>'ryii','リョー'=>'ryoo','リュー'=>'ryuu', 1500 'シャー'=>'shaa','シェー'=>'shee','シー'=>'shii','ショー'=>'shoo','シュー'=>'shuu', 1501 'ジャー'=>'jaa','ジェー'=>'jee','ジー'=>'jii','ジョー'=>'joo','ジュー'=>'juu', 1502 'スァー'=>'swaa','スェー'=>'swee','スィー'=>'swii','スォー'=>'swoo','スゥー'=>'swuu', 1503 'デァー'=>'daa','デェー'=>'dee','ディー'=>'dii','デォー'=>'doo','デゥー'=>'duu', 1504 'チャー'=>'chaa','チェー'=>'chee','チー'=>'chii','チョー'=>'choo','チュー'=>'chuu', 1505 'ヂャー'=>'dyaa','ヂェー'=>'dyee','ヂィー'=>'dyii','ヂョー'=>'dyoo','ヂュー'=>'dyuu', 1506 'ツャー'=>'tsaa','ツェー'=>'tsee','ツィー'=>'tsii','ツョー'=>'tsoo','ツー'=>'tsuu', 1507 'トァー'=>'twaa','トェー'=>'twee','トィー'=>'twii','トォー'=>'twoo','トゥー'=>'twuu', 1508 'ドァー'=>'dwaa','ドェー'=>'dwee','ドィー'=>'dwii','ドォー'=>'dwoo','ドゥー'=>'dwuu', 1509 'ウァー'=>'whaa','ウェー'=>'whee','ウィー'=>'whii','ウォー'=>'whoo','ウゥー'=>'whuu', 1510 'ヴャー'=>'vyaa','ヴェー'=>'vyee','ヴィー'=>'vyii','ヴョー'=>'vyoo','ヴュー'=>'vyuu', 1511 'ヴァー'=>'vaa','ヴェー'=>'vee','ヴィー'=>'vii','ヴォー'=>'voo','ヴー'=>'vuu', 1512 'ウェー'=>'wee','ウィー'=>'wii', 1513 'イェー'=>'yee', 1514 'ティー'=>'tii', 1515 'ヂィー'=>'dii', 1516 1517 // 3 character syllables - doubled consonants 1518 'ッビャ'=>'bbya','ッビェ'=>'bbye','ッビィ'=>'bbyi','ッビョ'=>'bbyo','ッビュ'=>'bbyu', 1519 'ッピャ'=>'ppya','ッピェ'=>'ppye','ッピィ'=>'ppyi','ッピョ'=>'ppyo','ッピュ'=>'ppyu', 1520 'ッキャ'=>'kkya','ッキェ'=>'kkye','ッキィ'=>'kkyi','ッキョ'=>'kkyo','ッキュ'=>'kkyu', 1521 'ッギャ'=>'ggya','ッギェ'=>'ggye','ッギィ'=>'ggyi','ッギョ'=>'ggyo','ッギュ'=>'ggyu', 1522 'ッミャ'=>'mmya','ッミェ'=>'mmye','ッミィ'=>'mmyi','ッミョ'=>'mmyo','ッミュ'=>'mmyu', 1523 'ッニャ'=>'nnya','ッニェ'=>'nnye','ッニィ'=>'nnyi','ッニョ'=>'nnyo','ッニュ'=>'nnyu', 1524 'ッリャ'=>'rrya','ッリェ'=>'rrye','ッリィ'=>'rryi','ッリョ'=>'rryo','ッリュ'=>'rryu', 1525 'ッシャ'=>'ssha','ッシェ'=>'sshe','ッシ'=>'sshi','ッショ'=>'ssho','ッシュ'=>'sshu', 1526 'ッチャ'=>'ccha','ッチェ'=>'cche','ッチ'=>'cchi','ッチョ'=>'ccho','ッチュ'=>'cchu', 1527 'ッティ'=>'tti', 1528 'ッヂィ'=>'ddi', 1529 1530 // 3 character syllables - doubled vowel and consonants 1531 'ッバー'=>'bbaa','ッベー'=>'bbee','ッビー'=>'bbii','ッボー'=>'bboo','ッブー'=>'bbuu', 1532 'ッパー'=>'ppaa','ッペー'=>'ppee','ッピー'=>'ppii','ッポー'=>'ppoo','ップー'=>'ppuu', 1533 'ッケー'=>'kkee','ッキー'=>'kkii','ッコー'=>'kkoo','ックー'=>'kkuu','ッカー'=>'kkaa', 1534 'ッガー'=>'ggaa','ッゲー'=>'ggee','ッギー'=>'ggii','ッゴー'=>'ggoo','ッグー'=>'gguu', 1535 'ッマー'=>'maa','ッメー'=>'mee','ッミー'=>'mii','ッモー'=>'moo','ッムー'=>'muu', 1536 'ッナー'=>'nnaa','ッネー'=>'nnee','ッニー'=>'nnii','ッノー'=>'nnoo','ッヌー'=>'nnuu', 1537 'ッラー'=>'rraa','ッレー'=>'rree','ッリー'=>'rrii','ッロー'=>'rroo','ッルー'=>'rruu', 1538 'ッサー'=>'ssaa','ッセー'=>'ssee','ッシー'=>'sshii','ッソー'=>'ssoo','ッスー'=>'ssuu', 1539 'ッザー'=>'zzaa','ッゼー'=>'zzee','ッジー'=>'jjii','ッゾー'=>'zzoo','ッズー'=>'zzuu', 1540 'ッター'=>'ttaa','ッテー'=>'ttee','ッチー'=>'chii','ットー'=>'ttoo','ッツー'=>'ttsuu', 1541 'ッダー'=>'ddaa','ッデー'=>'ddee','ッヂー'=>'ddii','ッドー'=>'ddoo','ッヅー'=>'dduu', 1542 1543 // 2 character syllables - normal 1544 'ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フゥ'=>'fu', 1545 // 'フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo','フュ'=>'fyu', 1546 'フャ'=>'fa','フェ'=>'fe','フィ'=>'fi','フョ'=>'fo','フュ'=>'fu', 1547 'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu', 1548 'ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo','ビュ'=>'byu', 1549 'ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu', 1550 'キャ'=>'kya','キェ'=>'kye','キィ'=>'kyi','キョ'=>'kyo','キュ'=>'kyu', 1551 'ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu', 1552 'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu', 1553 'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu', 1554 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu', 1555 'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu', 1556 'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju', 1557 'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu', 1558 'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du', 1559 'チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu', 1560 // 'ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi','ヂョ'=>'dyo','ヂュ'=>'dyu', 1561 'ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu', 1562 'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu', 1563 'ドァ'=>'dwa','ドェ'=>'dwe','ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu', 1564 'ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi','ウォ'=>'who','ウゥ'=>'whu', 1565 'ヴャ'=>'vya','ヴェ'=>'vye','ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu', 1566 'ヴァ'=>'va','ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu', 1567 'ウェ'=>'we','ウィ'=>'wi', 1568 'イェ'=>'ye', 1569 'ティ'=>'ti', 1570 'ヂィ'=>'di', 1571 1572 // 2 character syllables - doubled vocal 1573 'アー'=>'aa','エー'=>'ee','イー'=>'ii','オー'=>'oo','ウー'=>'uu', 1574 'ダー'=>'daa','デー'=>'dee','ヂー'=>'dii','ドー'=>'doo','ヅー'=>'duu', 1575 'ハー'=>'haa','ヘー'=>'hee','ヒー'=>'hii','ホー'=>'hoo','フー'=>'fuu', 1576 'バー'=>'baa','ベー'=>'bee','ビー'=>'bii','ボー'=>'boo','ブー'=>'buu', 1577 'パー'=>'paa','ペー'=>'pee','ピー'=>'pii','ポー'=>'poo','プー'=>'puu', 1578 'ケー'=>'kee','キー'=>'kii','コー'=>'koo','クー'=>'kuu','カー'=>'kaa', 1579 'ガー'=>'gaa','ゲー'=>'gee','ギー'=>'gii','ゴー'=>'goo','グー'=>'guu', 1580 'マー'=>'maa','メー'=>'mee','ミー'=>'mii','モー'=>'moo','ムー'=>'muu', 1581 'ナー'=>'naa','ネー'=>'nee','ニー'=>'nii','ノー'=>'noo','ヌー'=>'nuu', 1582 'ラー'=>'raa','レー'=>'ree','リー'=>'rii','ロー'=>'roo','ルー'=>'ruu', 1583 'サー'=>'saa','セー'=>'see','シー'=>'shii','ソー'=>'soo','スー'=>'suu', 1584 'ザー'=>'zaa','ゼー'=>'zee','ジー'=>'jii','ゾー'=>'zoo','ズー'=>'zuu', 1585 'ター'=>'taa','テー'=>'tee','チー'=>'chii','トー'=>'too','ツー'=>'tsuu', 1586 'ワー'=>'waa','ヲー'=>'woo', 1587 'ヤー'=>'yaa','ヨー'=>'yoo','ユー'=>'yuu', 1588 'ヵー'=>'kaa','ヶー'=>'kee', 1589 // old characters 1590 'ヱー'=>'wee','ヰー'=>'wii', 1591 1592 // seperate katakana 'n' 1593 'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u', 1594 'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンユ'=>'n_yu', 1595 1596 // 2 character syllables - doubled consonants 1597 'ッバ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu', 1598 'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッポ'=>'ppo','ップ'=>'ppu', 1599 'ッケ'=>'kke','ッキ'=>'kki','ッコ'=>'kko','ック'=>'kku','ッカ'=>'kka', 1600 'ッガ'=>'gga','ッゲ'=>'gge','ッギ'=>'ggi','ッゴ'=>'ggo','ッグ'=>'ggu', 1601 'ッマ'=>'ma','ッメ'=>'me','ッミ'=>'mi','ッモ'=>'mo','ッム'=>'mu', 1602 'ッナ'=>'nna','ッネ'=>'nne','ッニ'=>'nni','ッノ'=>'nno','ッヌ'=>'nnu', 1603 'ッラ'=>'rra','ッレ'=>'rre','ッリ'=>'rri','ッロ'=>'rro','ッル'=>'rru', 1604 'ッサ'=>'ssa','ッセ'=>'sse','ッシ'=>'sshi','ッソ'=>'sso','ッス'=>'ssu', 1605 'ッザ'=>'zza','ッゼ'=>'zze','ッジ'=>'jji','ッゾ'=>'zzo','ッズ'=>'zzu', 1606 'ッタ'=>'tta','ッテ'=>'tte','ッチ'=>'cchi','ット'=>'tto','ッツ'=>'ttsu', 1607 'ッダ'=>'dda','ッデ'=>'dde','ッヂ'=>'ddi','ッド'=>'ddo','ッヅ'=>'ddu', 1608 1609 // 1 character syllables 1610 'ア'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','ン'=>'n', 1611 'ハ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho','フ'=>'fu', 1612 'バ'=>'ba','ベ'=>'be','ビ'=>'bi','ボ'=>'bo','ブ'=>'bu', 1613 'パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ポ'=>'po','プ'=>'pu', 1614 'ケ'=>'ke','キ'=>'ki','コ'=>'ko','ク'=>'ku','カ'=>'ka', 1615 'ガ'=>'ga','ゲ'=>'ge','ギ'=>'gi','ゴ'=>'go','グ'=>'gu', 1616 'マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu', 1617 'ナ'=>'na','ネ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu', 1618 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ロ'=>'ro','ル'=>'ru', 1619 'サ'=>'sa','セ'=>'se','シ'=>'shi','ソ'=>'so','ス'=>'su', 1620 'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu', 1621 'タ'=>'ta','テ'=>'te','チ'=>'chi','ト'=>'to','ツ'=>'tsu', 1622 'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du', 1623 'ワ'=>'wa','ヲ'=>'wo', 1624 'ヤ'=>'ya','ヨ'=>'yo','ユ'=>'yu', 1625 'ヵ'=>'ka','ヶ'=>'ke', 1626 // old characters 1627 'ヱ'=>'we','ヰ'=>'wi', 1628 1629 // convert what's left (probably only kicks in when something's missing above) 1630 'ァ'=>'a','ェ'=>'e','ィ'=>'i','ォ'=>'o','ゥ'=>'u', 1631 'ャ'=>'ya','ョ'=>'yo','ュ'=>'yu', 1632 1633 // special characters 1634 '・'=>'_','、'=>'_', 1635 'ー'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise 1636 1637 // 'ラ'=>'la','レ'=>'le','リ'=>'li','ロ'=>'lo','ル'=>'lu', 1638 // 'チャ'=>'cya','チェ'=>'cye','チィ'=>'cyi','チョ'=>'cyo','チュ'=>'cyu', 1639 //'デャ'=>'dha','デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu', 1640 // 'リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi','リョ'=>'lyo','リュ'=>'lyu', 1641 // 'テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho','テュ'=>'thu', 1642 //'ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi','フォ'=>'fwo','フゥ'=>'fwu', 1643 //'チャ'=>'tya','チェ'=>'tye','チィ'=>'tyi','チョ'=>'tyo','チュ'=>'tyu', 1644 // 'ジャ'=>'jya','ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu', 1645 // 'ジャ'=>'zha','ジェ'=>'zhe','ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu', 1646 //'ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi','ジョ'=>'zyo','ジュ'=>'zyu', 1647 //'シャ'=>'sya','シェ'=>'sye','シィ'=>'syi','ショ'=>'syo','シュ'=>'syu', 1648 //'シ'=>'ci','フ'=>'hu',シ'=>'si','チ'=>'ti','ツ'=>'tu','イ'=>'yi','ヂ'=>'dzi', 1649 1650 // "Greeklish" 1651 'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Π'=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps', 1652 'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','π'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps', 1653 1654 // Thai 1655 'ก'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch', 1656 'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','ญ'=>'y','ฎ'=>'d','ฏ'=>'t','ฐ'=>'th', 1657 'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th', 1658 'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','ฝ'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph', 1659 'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue', 1660 'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h', 1661 'ะ'=>'a','ั'=>'a','รร'=>'a','า'=>'a','ๅ'=>'a','ำ'=>'am','ํา'=>'am', 1662 'ิ'=>'i','ี'=>'i','ึ'=>'ue','ี'=>'ue','ุ'=>'u','ู'=>'u', 1663 'เ'=>'e','แ'=>'ae','โ'=>'o','อ'=>'o', 1664 'ียะ'=>'ia','ีย'=>'ia','ือะ'=>'uea','ือ'=>'uea','ัวะ'=>'ua','ัว'=>'ua', 1665 'ใ'=>'ai','ไ'=>'ai','ัย'=>'ai','าย'=>'ai','าว'=>'ao', 1666 'ุย'=>'ui','อย'=>'oi','ือย'=>'ueai','วย'=>'uai', 1667 'ิว'=>'io','็ว'=>'eo','ียว'=>'iao', 1668 '่'=>'','้'=>'','๊'=>'','๋'=>'','็'=>'', 1669 '์'=>'','๎'=>'','ํ'=>'','ฺ'=>'', 1670 'ๆ'=>'2','๏'=>'o','ฯ'=>'-','๚'=>'-','๛'=>'-', 1671 '๐'=>'0','๑'=>'1','๒'=>'2','๓'=>'3','๔'=>'4', 1672 '๕'=>'5','๖'=>'6','๗'=>'7','๘'=>'8','๙'=>'9', 1673 1674 // Korean 1675 'ㄱ'=>'k','ㅋ'=>'kh','ㄲ'=>'kk','ㄷ'=>'t','ㅌ'=>'th','ㄸ'=>'tt','ㅂ'=>'p', 1676 'ㅍ'=>'ph','ㅃ'=>'pp','ㅈ'=>'c','ㅊ'=>'ch','ㅉ'=>'cc','ㅅ'=>'s','ㅆ'=>'ss', 1677 'ㅎ'=>'h','ㅇ'=>'ng','ㄴ'=>'n','ㄹ'=>'l','ㅁ'=>'m', 'ㅏ'=>'a','ㅓ'=>'e','ㅗ'=>'o', 1678 'ㅜ'=>'wu','ㅡ'=>'u','ㅣ'=>'i','ㅐ'=>'ay','ㅔ'=>'ey','ㅚ'=>'oy','ㅘ'=>'wa','ㅝ'=>'we', 1679 'ㅟ'=>'wi','ㅙ'=>'way','ㅞ'=>'wey','ㅢ'=>'uy','ㅑ'=>'ya','ㅕ'=>'ye','ㅛ'=>'oy', 1680 'ㅠ'=>'yu','ㅒ'=>'yay','ㅖ'=>'yey', 1681); 1682 1683 1684