1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license LGPL 2.1 (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * check for mb_string support 11 */ 12if(!defined('UTF8_MBSTRING')){ 13 if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){ 14 define('UTF8_MBSTRING',1); 15 }else{ 16 define('UTF8_MBSTRING',0); 17 } 18} 19 20if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } 21 22if(!function_exists('utf8_isASCII')){ 23 /** 24 * Checks if a string contains 7bit ASCII only 25 * 26 * @author Andreas Haerter <andreas.haerter@dev.mail-node.com> 27 */ 28 function utf8_isASCII($str){ 29 return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); 30 } 31} 32 33if(!function_exists('utf8_strip')){ 34 /** 35 * Strips all highbyte chars 36 * 37 * Returns a pure ASCII7 string 38 * 39 * @author Andreas Gohr <andi@splitbrain.org> 40 */ 41 function utf8_strip($str){ 42 $ascii = ''; 43 $len = strlen($str); 44 for($i=0; $i<$len; $i++){ 45 if(ord($str{$i}) <128){ 46 $ascii .= $str{$i}; 47 } 48 } 49 return $ascii; 50 } 51} 52 53if(!function_exists('utf8_check')){ 54 /** 55 * Tries to detect if a string is in Unicode encoding 56 * 57 * @author <bmorel@ssi.fr> 58 * @link http://www.php.net/manual/en/function.utf8-encode.php 59 */ 60 function utf8_check($Str) { 61 $len = strlen($Str); 62 for ($i=0; $i<$len; $i++) { 63 $b = ord($Str[$i]); 64 if ($b < 0x80) continue; # 0bbbbbbb 65 elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb 66 elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb 67 elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb 68 elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb 69 elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b 70 else return false; # Does not match any model 71 72 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 73 if ((++$i == $len) || ((ord($Str[$i]) & 0xC0) != 0x80)) 74 return false; 75 } 76 } 77 return true; 78 } 79} 80 81if(!function_exists('utf8_basename')){ 82 /** 83 * A locale independent basename() implementation 84 * 85 * works around a bug in PHP's basename() implementation 86 * 87 * @see basename() 88 * @link https://bugs.php.net/bug.php?id=37738 89 * @param string $path A path 90 * @param string $suffix If the name component ends in suffix this will also be cut off 91 * @return string 92 */ 93 function utf8_basename($path, $suffix=''){ 94 $slashrpos = strrpos($path, '/'); 95 $bslashrpos = strrpos($path, '\\'); 96 $rpos = max($slashrpos === false ? -1 : $slashrpos, $bslashrpos === false ? -1 : $bslashrpos); 97 $path = substr($path, $rpos+1); 98 99 $suflen = strlen($suffix); 100 if($suflen && (substr($path, -$suflen) == $suffix)){ 101 $path = substr($path, 0, -$suflen); 102 } 103 104 return $path; 105 } 106} 107 108if(!function_exists('utf8_strlen')){ 109 /** 110 * Unicode aware replacement for strlen() 111 * 112 * utf8_decode() converts characters that are not in ISO-8859-1 113 * to '?', which, for the purpose of counting, is alright - It's 114 * even faster than mb_strlen. 115 * 116 * @author <chernyshevsky at hotmail dot com> 117 * @see strlen() 118 * @see utf8_decode() 119 */ 120 function utf8_strlen($string){ 121 return strlen(utf8_decode($string)); 122 } 123} 124 125if(!function_exists('utf8_substr')){ 126 /** 127 * UTF-8 aware alternative to substr 128 * 129 * Return part of a string given character offset (and optionally length) 130 * 131 * @author Harry Fuecks <hfuecks@gmail.com> 132 * @author Chris Smith <chris@jalakai.co.uk> 133 * @param string $str 134 * @param int $offset number of UTF-8 characters offset (from left) 135 * @param int $length (optional) length in UTF-8 characters from offset 136 * @return mixed string or false if failure 137 */ 138 function utf8_substr($str, $offset, $length = null) { 139 if(UTF8_MBSTRING){ 140 if( $length === null ){ 141 return mb_substr($str, $offset); 142 }else{ 143 return mb_substr($str, $offset, $length); 144 } 145 } 146 147 /* 148 * Notes: 149 * 150 * no mb string support, so we'll use pcre regex's with 'u' flag 151 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 152 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 153 * 154 * substr documentation states false can be returned in some cases (e.g. offset > string length) 155 * mb_substr never returns false, it will return an empty string instead. 156 * 157 * calculating the number of characters in the string is a relatively expensive operation, so 158 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 159 */ 160 161 // cast parameters to appropriate types to avoid multiple notices/warnings 162 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 163 $offset = (int)$offset; 164 if (!is_null($length)) $length = (int)$length; 165 166 // handle trivial cases 167 if ($length === 0) return ''; 168 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 169 170 $offset_pattern = ''; 171 $length_pattern = ''; 172 173 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 174 if ($offset < 0) { 175 $strlen = strlen(utf8_decode($str)); // see notes 176 $offset = $strlen + $offset; 177 if ($offset < 0) $offset = 0; 178 } 179 180 // establish a pattern for offset, a non-captured group equal in length to offset 181 if ($offset > 0) { 182 $Ox = (int)($offset/65535); 183 $Oy = $offset%65535; 184 185 if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}'; 186 $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})'; 187 } else { 188 $offset_pattern = '^'; // offset == 0; just anchor the pattern 189 } 190 191 // establish a pattern for length 192 if (is_null($length)) { 193 $length_pattern = '(.*)$'; // the rest of the string 194 } else { 195 196 if (!isset($strlen)) $strlen = strlen(utf8_decode($str)); // see notes 197 if ($offset > $strlen) return ''; // another trivial case 198 199 if ($length > 0) { 200 201 $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string 202 203 $Lx = (int)($length/65535); 204 $Ly = $length%65535; 205 206 // +ve length requires ... a captured group of length characters 207 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 208 $length_pattern = '('.$length_pattern.'.{'.$Ly.'})'; 209 210 } else if ($length < 0) { 211 212 if ($length < ($offset - $strlen)) return ''; 213 214 $Lx = (int)((-$length)/65535); 215 $Ly = (-$length)%65535; 216 217 // -ve length requires ... capture everything except a group of -length characters 218 // anchored at the tail-end of the string 219 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 220 $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$'; 221 } 222 } 223 224 if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return ''; 225 return $match[1]; 226 } 227} 228 229if(!function_exists('utf8_substr_replace')){ 230 /** 231 * Unicode aware replacement for substr_replace() 232 * 233 * @author Andreas Gohr <andi@splitbrain.org> 234 * @see substr_replace() 235 */ 236 function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 237 $ret = ''; 238 if($start>0) $ret .= utf8_substr($string, 0, $start); 239 $ret .= $replacement; 240 $ret .= utf8_substr($string, $start+$length); 241 return $ret; 242 } 243} 244 245if(!function_exists('utf8_ltrim')){ 246 /** 247 * Unicode aware replacement for ltrim() 248 * 249 * @author Andreas Gohr <andi@splitbrain.org> 250 * @see ltrim() 251 * @param string $str 252 * @param string $charlist 253 * @return string 254 */ 255 function utf8_ltrim($str,$charlist=''){ 256 if($charlist == '') return ltrim($str); 257 258 //quote charlist for use in a characterclass 259 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 260 261 return preg_replace('/^['.$charlist.']+/u','',$str); 262 } 263} 264 265if(!function_exists('utf8_rtrim')){ 266 /** 267 * Unicode aware replacement for rtrim() 268 * 269 * @author Andreas Gohr <andi@splitbrain.org> 270 * @see rtrim() 271 * @param string $str 272 * @param string $charlist 273 * @return string 274 */ 275 function utf8_rtrim($str,$charlist=''){ 276 if($charlist == '') return rtrim($str); 277 278 //quote charlist for use in a characterclass 279 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 280 281 return preg_replace('/['.$charlist.']+$/u','',$str); 282 } 283} 284 285if(!function_exists('utf8_trim')){ 286 /** 287 * Unicode aware replacement for trim() 288 * 289 * @author Andreas Gohr <andi@splitbrain.org> 290 * @see trim() 291 * @param string $str 292 * @param string $charlist 293 * @return string 294 */ 295 function utf8_trim($str,$charlist='') { 296 if($charlist == '') return trim($str); 297 298 return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist); 299 } 300} 301 302if(!function_exists('utf8_strtolower')){ 303 /** 304 * This is a unicode aware replacement for strtolower() 305 * 306 * Uses mb_string extension if available 307 * 308 * @author Leo Feyer <leo@typolight.org> 309 * @see strtolower() 310 * @see utf8_strtoupper() 311 */ 312 function utf8_strtolower($string){ 313 if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8'); 314 315 global $UTF8_UPPER_TO_LOWER; 316 return strtr($string,$UTF8_UPPER_TO_LOWER); 317 } 318} 319 320if(!function_exists('utf8_strtoupper')){ 321 /** 322 * This is a unicode aware replacement for strtoupper() 323 * 324 * Uses mb_string extension if available 325 * 326 * @author Leo Feyer <leo@typolight.org> 327 * @see strtoupper() 328 * @see utf8_strtoupper() 329 */ 330 function utf8_strtoupper($string){ 331 if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); 332 333 global $UTF8_LOWER_TO_UPPER; 334 return strtr($string,$UTF8_LOWER_TO_UPPER); 335 } 336} 337 338if(!function_exists('utf8_ucfirst')){ 339 /** 340 * UTF-8 aware alternative to ucfirst 341 * Make a string's first character uppercase 342 * 343 * @author Harry Fuecks 344 * @param string 345 * @return string with first character as upper case (if applicable) 346 */ 347 function utf8_ucfirst($str){ 348 switch ( utf8_strlen($str) ) { 349 case 0: 350 return ''; 351 case 1: 352 return utf8_strtoupper($str); 353 default: 354 preg_match('/^(.{1})(.*)$/us', $str, $matches); 355 return utf8_strtoupper($matches[1]).$matches[2]; 356 } 357 } 358} 359 360if(!function_exists('utf8_ucwords')){ 361 /** 362 * UTF-8 aware alternative to ucwords 363 * Uppercase the first character of each word in a string 364 * 365 * @author Harry Fuecks 366 * @param string 367 * @return string with first char of each word uppercase 368 * @see http://www.php.net/ucwords 369 */ 370 function utf8_ucwords($str) { 371 // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; 372 // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns 373 // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords 374 $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; 375 376 return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str); 377 } 378 379 /** 380 * Callback function for preg_replace_callback call in utf8_ucwords 381 * You don't need to call this yourself 382 * 383 * @author Harry Fuecks 384 * @param array $matches matches corresponding to a single word 385 * @return string with first char of the word in uppercase 386 * @see utf8_ucwords 387 * @see utf8_strtoupper 388 */ 389 function utf8_ucwords_callback($matches) { 390 $leadingws = $matches[2]; 391 $ucfirst = utf8_strtoupper($matches[3]); 392 $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1); 393 return $leadingws . $ucword; 394 } 395} 396 397if(!function_exists('utf8_deaccent')){ 398 /** 399 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 400 * 401 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 402 * letters. Default is to deaccent both cases ($case = 0) 403 * 404 * @author Andreas Gohr <andi@splitbrain.org> 405 */ 406 function utf8_deaccent($string,$case=0){ 407 if($case <= 0){ 408 global $UTF8_LOWER_ACCENTS; 409 $string = strtr($string,$UTF8_LOWER_ACCENTS); 410 } 411 if($case >= 0){ 412 global $UTF8_UPPER_ACCENTS; 413 $string = strtr($string,$UTF8_UPPER_ACCENTS); 414 } 415 return $string; 416 } 417} 418 419if(!function_exists('utf8_romanize')){ 420 /** 421 * Romanize a non-latin string 422 * 423 * @author Andreas Gohr <andi@splitbrain.org> 424 */ 425 function utf8_romanize($string){ 426 if(utf8_isASCII($string)) return $string; //nothing to do 427 428 global $UTF8_ROMANIZATION; 429 return strtr($string,$UTF8_ROMANIZATION); 430 } 431} 432 433if(!function_exists('utf8_stripspecials')){ 434 /** 435 * Removes special characters (nonalphanumeric) from a UTF-8 string 436 * 437 * This function adds the controlchars 0x00 to 0x19 to the array of 438 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 439 * 440 * @author Andreas Gohr <andi@splitbrain.org> 441 * @param string $string The UTF8 string to strip of special chars 442 * @param string $repl Replace special with this string 443 * @param string $additional Additional chars to strip (used in regexp char class) 444 * @return string 445 */ 446 function utf8_stripspecials($string,$repl='',$additional=''){ 447 global $UTF8_SPECIAL_CHARS2; 448 449 static $specials = null; 450 if(is_null($specials)){ 451 #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 452 $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/'); 453 } 454 455 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 456 } 457} 458 459if(!function_exists('utf8_strpos')){ 460 /** 461 * This is an Unicode aware replacement for strpos 462 * 463 * @author Leo Feyer <leo@typolight.org> 464 * @see strpos() 465 * @param string 466 * @param string 467 * @param integer 468 * @return integer 469 */ 470 function utf8_strpos($haystack, $needle, $offset=0){ 471 $comp = 0; 472 $length = null; 473 474 while (is_null($length) || $length < $offset) { 475 $pos = strpos($haystack, $needle, $offset + $comp); 476 477 if ($pos === false) 478 return false; 479 480 $length = utf8_strlen(substr($haystack, 0, $pos)); 481 482 if ($length < $offset) 483 $comp = $pos - $length; 484 } 485 486 return $length; 487 } 488} 489 490if(!function_exists('utf8_tohtml')){ 491 /** 492 * Encodes UTF-8 characters to HTML entities 493 * 494 * @author Tom N Harris <tnharris@whoopdedo.org> 495 * @author <vpribish at shopping dot com> 496 * @link http://www.php.net/manual/en/function.utf8-decode.php 497 */ 498 function utf8_tohtml ($str) { 499 $ret = ''; 500 foreach (utf8_to_unicode($str) as $cp) { 501 if ($cp < 0x80) 502 $ret .= chr($cp); 503 elseif ($cp < 0x100) 504 $ret .= "&#$cp;"; 505 else 506 $ret .= '&#x'.dechex($cp).';'; 507 } 508 return $ret; 509 } 510} 511 512if(!function_exists('utf8_unhtml')){ 513 /** 514 * Decodes HTML entities to UTF-8 characters 515 * 516 * Convert any &#..; entity to a codepoint, 517 * The entities flag defaults to only decoding numeric entities. 518 * Pass HTML_ENTITIES and named entities, including & < etc. 519 * are handled as well. Avoids the problem that would occur if you 520 * had to decode "&#38;&amp;#38;" 521 * 522 * unhtmlspecialchars(utf8_unhtml($s)) -> "&&" 523 * utf8_unhtml(unhtmlspecialchars($s)) -> "&&#38;" 524 * what it should be -> "&&#38;" 525 * 526 * @author Tom N Harris <tnharris@whoopdedo.org> 527 * @param string $str UTF-8 encoded string 528 * @param boolean $entities Flag controlling decoding of named entities. 529 * @return string UTF-8 encoded string with numeric (and named) entities replaced. 530 */ 531 function utf8_unhtml($str, $entities=null) { 532 static $decoder = null; 533 if (is_null($decoder)) 534 $decoder = new utf8_entity_decoder(); 535 if (is_null($entities)) 536 return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m', 537 'utf8_decode_numeric', $str); 538 else 539 return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m', 540 array(&$decoder, 'decode'), $str); 541 } 542} 543 544if(!function_exists('utf8_decode_numeric')){ 545 /** 546 * Decodes numeric HTML entities to their correct UTF-8 characters 547 * 548 * @param $ent string A numeric entity 549 * @return string 550 */ 551 function utf8_decode_numeric($ent) { 552 switch ($ent[2]) { 553 case 'X': 554 case 'x': 555 $cp = hexdec($ent[3]); 556 break; 557 default: 558 $cp = intval($ent[3]); 559 break; 560 } 561 return unicode_to_utf8(array($cp)); 562 } 563} 564 565if(!class_exists('utf8_entity_decoder')){ 566 /** 567 * Encapsulate HTML entity decoding tables 568 */ 569 class utf8_entity_decoder { 570 var $table; 571 572 /** 573 * Initializes the decoding tables 574 */ 575 function __construct() { 576 $table = get_html_translation_table(HTML_ENTITIES); 577 $table = array_flip($table); 578 $this->table = array_map(array(&$this,'makeutf8'), $table); 579 } 580 581 /** 582 * Wrapper aorund unicode_to_utf8() 583 * 584 * @param $c string 585 * @return mixed 586 */ 587 function makeutf8($c) { 588 return unicode_to_utf8(array(ord($c))); 589 } 590 591 /** 592 * Decodes any HTML entity to it's correct UTF-8 char equivalent 593 * 594 * @param $ent string An entity 595 * @return string 596 */ 597 function decode($ent) { 598 if ($ent[1] == '#') { 599 return utf8_decode_numeric($ent); 600 } elseif (array_key_exists($ent[0],$this->table)) { 601 return $this->table[$ent[0]]; 602 } else { 603 return $ent[0]; 604 } 605 } 606 } 607} 608 609if(!function_exists('utf8_to_unicode')){ 610 /** 611 * Takes an UTF-8 string and returns an array of ints representing the 612 * Unicode characters. Astral planes are supported ie. the ints in the 613 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 614 * are not allowed. 615 * 616 * If $strict is set to true the function returns false if the input 617 * string isn't a valid UTF-8 octet sequence and raises a PHP error at 618 * level E_USER_WARNING 619 * 620 * Note: this function has been modified slightly in this library to 621 * trigger errors on encountering bad bytes 622 * 623 * @author <hsivonen@iki.fi> 624 * @author Harry Fuecks <hfuecks@gmail.com> 625 * @param string $str UTF-8 encoded string 626 * @param boolean $strict Check for invalid sequences? 627 * @return mixed array of unicode code points or false if UTF-8 invalid 628 * @see unicode_to_utf8 629 * @link http://hsivonen.iki.fi/php-utf8/ 630 * @link http://sourceforge.net/projects/phputf8/ 631 */ 632 function utf8_to_unicode($str,$strict=false) { 633 $mState = 0; // cached expected number of octets after the current octet 634 // until the beginning of the next UTF8 character sequence 635 $mUcs4 = 0; // cached Unicode character 636 $mBytes = 1; // cached expected number of octets in the current sequence 637 638 $out = array(); 639 640 $len = strlen($str); 641 642 for($i = 0; $i < $len; $i++) { 643 644 $in = ord($str{$i}); 645 646 if ( $mState == 0) { 647 648 // When mState is zero we expect either a US-ASCII character or a 649 // multi-octet sequence. 650 if (0 == (0x80 & ($in))) { 651 // US-ASCII, pass straight through. 652 $out[] = $in; 653 $mBytes = 1; 654 655 } else if (0xC0 == (0xE0 & ($in))) { 656 // First octet of 2 octet sequence 657 $mUcs4 = ($in); 658 $mUcs4 = ($mUcs4 & 0x1F) << 6; 659 $mState = 1; 660 $mBytes = 2; 661 662 } else if (0xE0 == (0xF0 & ($in))) { 663 // First octet of 3 octet sequence 664 $mUcs4 = ($in); 665 $mUcs4 = ($mUcs4 & 0x0F) << 12; 666 $mState = 2; 667 $mBytes = 3; 668 669 } else if (0xF0 == (0xF8 & ($in))) { 670 // First octet of 4 octet sequence 671 $mUcs4 = ($in); 672 $mUcs4 = ($mUcs4 & 0x07) << 18; 673 $mState = 3; 674 $mBytes = 4; 675 676 } else if (0xF8 == (0xFC & ($in))) { 677 /* First octet of 5 octet sequence. 678 * 679 * This is illegal because the encoded codepoint must be either 680 * (a) not the shortest form or 681 * (b) outside the Unicode range of 0-0x10FFFF. 682 * Rather than trying to resynchronize, we will carry on until the end 683 * of the sequence and let the later error handling code catch it. 684 */ 685 $mUcs4 = ($in); 686 $mUcs4 = ($mUcs4 & 0x03) << 24; 687 $mState = 4; 688 $mBytes = 5; 689 690 } else if (0xFC == (0xFE & ($in))) { 691 // First octet of 6 octet sequence, see comments for 5 octet sequence. 692 $mUcs4 = ($in); 693 $mUcs4 = ($mUcs4 & 1) << 30; 694 $mState = 5; 695 $mBytes = 6; 696 697 } elseif($strict) { 698 /* Current octet is neither in the US-ASCII range nor a legal first 699 * octet of a multi-octet sequence. 700 */ 701 trigger_error( 702 'utf8_to_unicode: Illegal sequence identifier '. 703 'in UTF-8 at byte '.$i, 704 E_USER_WARNING 705 ); 706 return false; 707 708 } 709 710 } else { 711 712 // When mState is non-zero, we expect a continuation of the multi-octet 713 // sequence 714 if (0x80 == (0xC0 & ($in))) { 715 716 // Legal continuation. 717 $shift = ($mState - 1) * 6; 718 $tmp = $in; 719 $tmp = ($tmp & 0x0000003F) << $shift; 720 $mUcs4 |= $tmp; 721 722 /** 723 * End of the multi-octet sequence. mUcs4 now contains the final 724 * Unicode codepoint to be output 725 */ 726 if (0 == --$mState) { 727 728 /* 729 * Check for illegal sequences and codepoints. 730 */ 731 // From Unicode 3.1, non-shortest form is illegal 732 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 733 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 734 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 735 (4 < $mBytes) || 736 // From Unicode 3.2, surrogate characters are illegal 737 (($mUcs4 & 0xFFFFF800) == 0xD800) || 738 // Codepoints outside the Unicode range are illegal 739 ($mUcs4 > 0x10FFFF)) { 740 741 if($strict){ 742 trigger_error( 743 'utf8_to_unicode: Illegal sequence or codepoint '. 744 'in UTF-8 at byte '.$i, 745 E_USER_WARNING 746 ); 747 748 return false; 749 } 750 751 } 752 753 if (0xFEFF != $mUcs4) { 754 // BOM is legal but we don't want to output it 755 $out[] = $mUcs4; 756 } 757 758 //initialize UTF8 cache 759 $mState = 0; 760 $mUcs4 = 0; 761 $mBytes = 1; 762 } 763 764 } elseif($strict) { 765 /** 766 *((0xC0 & (*in) != 0x80) && (mState != 0)) 767 * Incomplete multi-octet sequence. 768 */ 769 trigger_error( 770 'utf8_to_unicode: Incomplete multi-octet '. 771 ' sequence in UTF-8 at byte '.$i, 772 E_USER_WARNING 773 ); 774 775 return false; 776 } 777 } 778 } 779 return $out; 780 } 781} 782 783if(!function_exists('unicode_to_utf8')){ 784 /** 785 * Takes an array of ints representing the Unicode characters and returns 786 * a UTF-8 string. Astral planes are supported ie. the ints in the 787 * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 788 * are not allowed. 789 * 790 * If $strict is set to true the function returns false if the input 791 * array contains ints that represent surrogates or are outside the 792 * Unicode range and raises a PHP error at level E_USER_WARNING 793 * 794 * Note: this function has been modified slightly in this library to use 795 * output buffering to concatenate the UTF-8 string (faster) as well as 796 * reference the array by it's keys 797 * 798 * @param array $arr of unicode code points representing a string 799 * @param boolean $strict Check for invalid sequences? 800 * @return mixed UTF-8 string or false if array contains invalid code points 801 * @author <hsivonen@iki.fi> 802 * @author Harry Fuecks <hfuecks@gmail.com> 803 * @see utf8_to_unicode 804 * @link http://hsivonen.iki.fi/php-utf8/ 805 * @link http://sourceforge.net/projects/phputf8/ 806 */ 807 function unicode_to_utf8($arr,$strict=false) { 808 if (!is_array($arr)) return ''; 809 ob_start(); 810 811 foreach (array_keys($arr) as $k) { 812 813 if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { 814 # ASCII range (including control chars) 815 816 echo chr($arr[$k]); 817 818 } else if ($arr[$k] <= 0x07ff) { 819 # 2 byte sequence 820 821 echo chr(0xc0 | ($arr[$k] >> 6)); 822 echo chr(0x80 | ($arr[$k] & 0x003f)); 823 824 } else if($arr[$k] == 0xFEFF) { 825 # Byte order mark (skip) 826 827 // nop -- zap the BOM 828 829 } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { 830 # Test for illegal surrogates 831 832 // found a surrogate 833 if($strict){ 834 trigger_error( 835 'unicode_to_utf8: Illegal surrogate '. 836 'at index: '.$k.', value: '.$arr[$k], 837 E_USER_WARNING 838 ); 839 return false; 840 } 841 842 } else if ($arr[$k] <= 0xffff) { 843 # 3 byte sequence 844 845 echo chr(0xe0 | ($arr[$k] >> 12)); 846 echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); 847 echo chr(0x80 | ($arr[$k] & 0x003f)); 848 849 } else if ($arr[$k] <= 0x10ffff) { 850 # 4 byte sequence 851 852 echo chr(0xf0 | ($arr[$k] >> 18)); 853 echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); 854 echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); 855 echo chr(0x80 | ($arr[$k] & 0x3f)); 856 857 } elseif($strict) { 858 859 trigger_error( 860 'unicode_to_utf8: Codepoint out of Unicode range '. 861 'at index: '.$k.', value: '.$arr[$k], 862 E_USER_WARNING 863 ); 864 865 // out of range 866 return false; 867 } 868 } 869 870 $result = ob_get_contents(); 871 ob_end_clean(); 872 return $result; 873 } 874} 875 876if(!function_exists('utf8_to_utf16be')){ 877 /** 878 * UTF-8 to UTF-16BE conversion. 879 * 880 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 881 */ 882 function utf8_to_utf16be(&$str, $bom = false) { 883 $out = $bom ? "\xFE\xFF" : ''; 884 if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 885 886 $uni = utf8_to_unicode($str); 887 foreach($uni as $cp){ 888 $out .= pack('n',$cp); 889 } 890 return $out; 891 } 892} 893 894if(!function_exists('utf16be_to_utf8')){ 895 /** 896 * UTF-8 to UTF-16BE conversion. 897 * 898 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 899 */ 900 function utf16be_to_utf8(&$str) { 901 $uni = unpack('n*',$str); 902 return unicode_to_utf8($uni); 903 } 904} 905 906if(!function_exists('utf8_bad_replace')){ 907 /** 908 * Replace bad bytes with an alternative character 909 * 910 * ASCII character is recommended for replacement char 911 * 912 * PCRE Pattern to locate bad bytes in a UTF-8 string 913 * Comes from W3 FAQ: Multilingual Forms 914 * Note: modified to include full ASCII range including control chars 915 * 916 * @author Harry Fuecks <hfuecks@gmail.com> 917 * @see http://www.w3.org/International/questions/qa-forms-utf-8 918 * @param string $str to search 919 * @param string $replace to replace bad bytes with (defaults to '?') - use ASCII 920 * @return string 921 */ 922 function utf8_bad_replace($str, $replace = '') { 923 $UTF8_BAD = 924 '([\x00-\x7F]'. # ASCII (including control chars) 925 '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte 926 '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs 927 '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte 928 '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates 929 '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 930 '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 931 '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 932 '|(.{1}))'; # invalid byte 933 ob_start(); 934 while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { 935 if ( !isset($matches[2])) { 936 echo $matches[0]; 937 } else { 938 echo $replace; 939 } 940 $str = substr($str,strlen($matches[0])); 941 } 942 $result = ob_get_contents(); 943 ob_end_clean(); 944 return $result; 945 } 946} 947 948if(!function_exists('utf8_correctIdx')){ 949 /** 950 * adjust a byte index into a utf8 string to a utf8 character boundary 951 * 952 * @param $str string utf8 character string 953 * @param $i int byte index into $str 954 * @param $next bool direction to search for boundary, 955 * false = up (current character) 956 * true = down (next character) 957 * 958 * @return int byte index into $str now pointing to a utf8 character boundary 959 * 960 * @author chris smith <chris@jalakai.co.uk> 961 */ 962 function utf8_correctIdx(&$str,$i,$next=false) { 963 964 if ($i <= 0) return 0; 965 966 $limit = strlen($str); 967 if ($i>=$limit) return $limit; 968 969 if ($next) { 970 while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; 971 } else { 972 while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; 973 } 974 975 return $i; 976 } 977} 978 979// only needed if no mb_string available 980if(!UTF8_MBSTRING){ 981 /** 982 * UTF-8 Case lookup table 983 * 984 * This lookuptable defines the upper case letters to their correspponding 985 * lower case letter in UTF-8 986 * 987 * @author Andreas Gohr <andi@splitbrain.org> 988 */ 989 global $UTF8_LOWER_TO_UPPER; 990 if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array( 991 "z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T","s"=>"S","r"=>"R","q"=>"Q", 992 "p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J","i"=>"I","h"=>"H","g"=>"G", 993 "f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A","ῳ"=>"ῼ","ῥ"=>"Ῥ","ῡ"=>"Ῡ","ῑ"=>"Ῑ", 994 "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ", 995 "ᾣ"=>"ᾫ","ᾢ"=>"ᾪ","ᾡ"=>"ᾩ","ᾗ"=>"ᾟ","ᾖ"=>"ᾞ","ᾕ"=>"ᾝ","ᾔ"=>"ᾜ","ᾓ"=>"ᾛ","ᾒ"=>"ᾚ","ᾑ"=>"ᾙ", 996 "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ", 997 "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ", 998 "ὲ"=>"Ὲ","ά"=>"Ά","ὰ"=>"Ὰ","ὧ"=>"Ὧ","ὦ"=>"Ὦ","ὥ"=>"Ὥ","ὤ"=>"Ὤ","ὣ"=>"Ὣ","ὢ"=>"Ὢ","ὡ"=>"Ὡ", 999 "ὗ"=>"Ὗ","ὕ"=>"Ὕ","ὓ"=>"Ὓ","ὑ"=>"Ὑ","ὅ"=>"Ὅ","ὄ"=>"Ὄ","ὃ"=>"Ὃ","ὂ"=>"Ὂ","ὁ"=>"Ὁ","ὀ"=>"Ὀ", 1000 "ἷ"=>"Ἷ","ἶ"=>"Ἶ","ἵ"=>"Ἵ","ἴ"=>"Ἴ","ἳ"=>"Ἳ","ἲ"=>"Ἲ","ἱ"=>"Ἱ","ἰ"=>"Ἰ","ἧ"=>"Ἧ","ἦ"=>"Ἦ", 1001 "ἥ"=>"Ἥ","ἤ"=>"Ἤ","ἣ"=>"Ἣ","ἢ"=>"Ἢ","ἡ"=>"Ἡ","ἕ"=>"Ἕ","ἔ"=>"Ἔ","ἓ"=>"Ἓ","ἒ"=>"Ἒ","ἑ"=>"Ἑ", 1002 "ἐ"=>"Ἐ","ἇ"=>"Ἇ","ἆ"=>"Ἆ","ἅ"=>"Ἅ","ἄ"=>"Ἄ","ἃ"=>"Ἃ","ἂ"=>"Ἂ","ἁ"=>"Ἁ","ἀ"=>"Ἀ","ỹ"=>"Ỹ", 1003 "ỷ"=>"Ỷ","ỵ"=>"Ỵ","ỳ"=>"Ỳ","ự"=>"Ự","ữ"=>"Ữ","ử"=>"Ử","ừ"=>"Ừ","ứ"=>"Ứ","ủ"=>"Ủ","ụ"=>"Ụ", 1004 "ợ"=>"Ợ","ỡ"=>"Ỡ","ở"=>"Ở","ờ"=>"Ờ","ớ"=>"Ớ","ộ"=>"Ộ","ỗ"=>"Ỗ","ổ"=>"Ổ","ồ"=>"Ồ","ố"=>"Ố", 1005 "ỏ"=>"Ỏ","ọ"=>"Ọ","ị"=>"Ị","ỉ"=>"Ỉ","ệ"=>"Ệ","ễ"=>"Ễ","ể"=>"Ể","ề"=>"Ề","ế"=>"Ế","ẽ"=>"Ẽ", 1006 "ẻ"=>"Ẻ","ẹ"=>"Ẹ","ặ"=>"Ặ","ẵ"=>"Ẵ","ẳ"=>"Ẳ","ằ"=>"Ằ","ắ"=>"Ắ","ậ"=>"Ậ","ẫ"=>"Ẫ","ẩ"=>"Ẩ", 1007 "ầ"=>"Ầ","ấ"=>"Ấ","ả"=>"Ả","ạ"=>"Ạ","ẛ"=>"Ṡ","ẕ"=>"Ẕ","ẓ"=>"Ẓ","ẑ"=>"Ẑ","ẏ"=>"Ẏ","ẍ"=>"Ẍ", 1008 "ẋ"=>"Ẋ","ẉ"=>"Ẉ","ẇ"=>"Ẇ","ẅ"=>"Ẅ","ẃ"=>"Ẃ","ẁ"=>"Ẁ","ṿ"=>"Ṿ","ṽ"=>"Ṽ","ṻ"=>"Ṻ","ṹ"=>"Ṹ", 1009 "ṷ"=>"Ṷ","ṵ"=>"Ṵ","ṳ"=>"Ṳ","ṱ"=>"Ṱ","ṯ"=>"Ṯ","ṭ"=>"Ṭ","ṫ"=>"Ṫ","ṩ"=>"Ṩ","ṧ"=>"Ṧ","ṥ"=>"Ṥ", 1010 "ṣ"=>"Ṣ","ṡ"=>"Ṡ","ṟ"=>"Ṟ","ṝ"=>"Ṝ","ṛ"=>"Ṛ","ṙ"=>"Ṙ","ṗ"=>"Ṗ","ṕ"=>"Ṕ","ṓ"=>"Ṓ","ṑ"=>"Ṑ", 1011 "ṏ"=>"Ṏ","ṍ"=>"Ṍ","ṋ"=>"Ṋ","ṉ"=>"Ṉ","ṇ"=>"Ṇ","ṅ"=>"Ṅ","ṃ"=>"Ṃ","ṁ"=>"Ṁ","ḿ"=>"Ḿ","ḽ"=>"Ḽ", 1012 "ḻ"=>"Ḻ","ḹ"=>"Ḹ","ḷ"=>"Ḷ","ḵ"=>"Ḵ","ḳ"=>"Ḳ","ḱ"=>"Ḱ","ḯ"=>"Ḯ","ḭ"=>"Ḭ","ḫ"=>"Ḫ","ḩ"=>"Ḩ", 1013 "ḧ"=>"Ḧ","ḥ"=>"Ḥ","ḣ"=>"Ḣ","ḡ"=>"Ḡ","ḟ"=>"Ḟ","ḝ"=>"Ḝ","ḛ"=>"Ḛ","ḙ"=>"Ḙ","ḗ"=>"Ḗ","ḕ"=>"Ḕ", 1014 "ḓ"=>"Ḓ","ḑ"=>"Ḑ","ḏ"=>"Ḏ","ḍ"=>"Ḍ","ḋ"=>"Ḋ","ḉ"=>"Ḉ","ḇ"=>"Ḇ","ḅ"=>"Ḅ","ḃ"=>"Ḃ","ḁ"=>"Ḁ", 1015 "ֆ"=>"Ֆ","օ"=>"Օ","ք"=>"Ք","փ"=>"Փ","ւ"=>"Ւ","ց"=>"Ց","ր"=>"Ր","տ"=>"Տ","վ"=>"Վ","ս"=>"Ս", 1016 "ռ"=>"Ռ","ջ"=>"Ջ","պ"=>"Պ","չ"=>"Չ","ո"=>"Ո","շ"=>"Շ","ն"=>"Ն","յ"=>"Յ","մ"=>"Մ","ճ"=>"Ճ", 1017 "ղ"=>"Ղ","ձ"=>"Ձ","հ"=>"Հ","կ"=>"Կ","ծ"=>"Ծ","խ"=>"Խ","լ"=>"Լ","ի"=>"Ի","ժ"=>"Ժ","թ"=>"Թ", 1018 "ը"=>"Ը","է"=>"Է","զ"=>"Զ","ե"=>"Ե","դ"=>"Դ","գ"=>"Գ","բ"=>"Բ","ա"=>"Ա","ԏ"=>"Ԏ","ԍ"=>"Ԍ", 1019 "ԋ"=>"Ԋ","ԉ"=>"Ԉ","ԇ"=>"Ԇ","ԅ"=>"Ԅ","ԃ"=>"Ԃ","ԁ"=>"Ԁ","ӹ"=>"Ӹ","ӵ"=>"Ӵ","ӳ"=>"Ӳ","ӱ"=>"Ӱ", 1020 "ӯ"=>"Ӯ","ӭ"=>"Ӭ","ӫ"=>"Ӫ","ө"=>"Ө","ӧ"=>"Ӧ","ӥ"=>"Ӥ","ӣ"=>"Ӣ","ӡ"=>"Ӡ","ӟ"=>"Ӟ","ӝ"=>"Ӝ", 1021 "ӛ"=>"Ӛ","ә"=>"Ә","ӗ"=>"Ӗ","ӕ"=>"Ӕ","ӓ"=>"Ӓ","ӑ"=>"Ӑ","ӎ"=>"Ӎ","ӌ"=>"Ӌ","ӊ"=>"Ӊ","ӈ"=>"Ӈ", 1022 "ӆ"=>"Ӆ","ӄ"=>"Ӄ","ӂ"=>"Ӂ","ҿ"=>"Ҿ","ҽ"=>"Ҽ","һ"=>"Һ","ҹ"=>"Ҹ","ҷ"=>"Ҷ","ҵ"=>"Ҵ","ҳ"=>"Ҳ", 1023 "ұ"=>"Ұ","ү"=>"Ү","ҭ"=>"Ҭ","ҫ"=>"Ҫ","ҩ"=>"Ҩ","ҧ"=>"Ҧ","ҥ"=>"Ҥ","ң"=>"Ң","ҡ"=>"Ҡ","ҟ"=>"Ҟ", 1024 "ҝ"=>"Ҝ","қ"=>"Қ","ҙ"=>"Ҙ","җ"=>"Җ","ҕ"=>"Ҕ","ғ"=>"Ғ","ґ"=>"Ґ","ҏ"=>"Ҏ","ҍ"=>"Ҍ","ҋ"=>"Ҋ", 1025 "ҁ"=>"Ҁ","ѿ"=>"Ѿ","ѽ"=>"Ѽ","ѻ"=>"Ѻ","ѹ"=>"Ѹ","ѷ"=>"Ѷ","ѵ"=>"Ѵ","ѳ"=>"Ѳ","ѱ"=>"Ѱ","ѯ"=>"Ѯ", 1026 "ѭ"=>"Ѭ","ѫ"=>"Ѫ","ѩ"=>"Ѩ","ѧ"=>"Ѧ","ѥ"=>"Ѥ","ѣ"=>"Ѣ","ѡ"=>"Ѡ","џ"=>"Џ","ў"=>"Ў","ѝ"=>"Ѝ", 1027 "ќ"=>"Ќ","ћ"=>"Ћ","њ"=>"Њ","љ"=>"Љ","ј"=>"Ј","ї"=>"Ї","і"=>"І","ѕ"=>"Ѕ","є"=>"Є","ѓ"=>"Ѓ", 1028 "ђ"=>"Ђ","ё"=>"Ё","ѐ"=>"Ѐ","я"=>"Я","ю"=>"Ю","э"=>"Э","ь"=>"Ь","ы"=>"Ы","ъ"=>"Ъ","щ"=>"Щ", 1029 "ш"=>"Ш","ч"=>"Ч","ц"=>"Ц","х"=>"Х","ф"=>"Ф","у"=>"У","т"=>"Т","с"=>"С","р"=>"Р","п"=>"П", 1030 "о"=>"О","н"=>"Н","м"=>"М","л"=>"Л","к"=>"К","й"=>"Й","и"=>"И","з"=>"З","ж"=>"Ж","е"=>"Е", 1031 "д"=>"Д","г"=>"Г","в"=>"В","б"=>"Б","а"=>"А","ϵ"=>"Ε","ϲ"=>"Σ","ϱ"=>"Ρ","ϰ"=>"Κ","ϯ"=>"Ϯ", 1032 "ϭ"=>"Ϭ","ϫ"=>"Ϫ","ϩ"=>"Ϩ","ϧ"=>"Ϧ","ϥ"=>"Ϥ","ϣ"=>"Ϣ","ϡ"=>"Ϡ","ϟ"=>"Ϟ","ϝ"=>"Ϝ","ϛ"=>"Ϛ", 1033 "ϙ"=>"Ϙ","ϖ"=>"Π","ϕ"=>"Φ","ϑ"=>"Θ","ϐ"=>"Β","ώ"=>"Ώ","ύ"=>"Ύ","ό"=>"Ό","ϋ"=>"Ϋ","ϊ"=>"Ϊ", 1034 "ω"=>"Ω","ψ"=>"Ψ","χ"=>"Χ","φ"=>"Φ","υ"=>"Υ","τ"=>"Τ","σ"=>"Σ","ς"=>"Σ","ρ"=>"Ρ","π"=>"Π", 1035 "ο"=>"Ο","ξ"=>"Ξ","ν"=>"Ν","μ"=>"Μ","λ"=>"Λ","κ"=>"Κ","ι"=>"Ι","θ"=>"Θ","η"=>"Η","ζ"=>"Ζ", 1036 "ε"=>"Ε","δ"=>"Δ","γ"=>"Γ","β"=>"Β","α"=>"Α","ί"=>"Ί","ή"=>"Ή","έ"=>"Έ","ά"=>"Ά","ʒ"=>"Ʒ", 1037 "ʋ"=>"Ʋ","ʊ"=>"Ʊ","ʈ"=>"Ʈ","ʃ"=>"Ʃ","ʀ"=>"Ʀ","ɵ"=>"Ɵ","ɲ"=>"Ɲ","ɯ"=>"Ɯ","ɩ"=>"Ɩ","ɨ"=>"Ɨ", 1038 "ɣ"=>"Ɣ","ɛ"=>"Ɛ","ə"=>"Ə","ɗ"=>"Ɗ","ɖ"=>"Ɖ","ɔ"=>"Ɔ","ɓ"=>"Ɓ","ȳ"=>"Ȳ","ȱ"=>"Ȱ","ȯ"=>"Ȯ", 1039 "ȭ"=>"Ȭ","ȫ"=>"Ȫ","ȩ"=>"Ȩ","ȧ"=>"Ȧ","ȥ"=>"Ȥ","ȣ"=>"Ȣ","ȟ"=>"Ȟ","ȝ"=>"Ȝ","ț"=>"Ț","ș"=>"Ș", 1040 "ȗ"=>"Ȗ","ȕ"=>"Ȕ","ȓ"=>"Ȓ","ȑ"=>"Ȑ","ȏ"=>"Ȏ","ȍ"=>"Ȍ","ȋ"=>"Ȋ","ȉ"=>"Ȉ","ȇ"=>"Ȇ","ȅ"=>"Ȅ", 1041 "ȃ"=>"Ȃ","ȁ"=>"Ȁ","ǿ"=>"Ǿ","ǽ"=>"Ǽ","ǻ"=>"Ǻ","ǹ"=>"Ǹ","ǵ"=>"Ǵ","dz"=>"Dz","ǯ"=>"Ǯ","ǭ"=>"Ǭ", 1042 "ǫ"=>"Ǫ","ǩ"=>"Ǩ","ǧ"=>"Ǧ","ǥ"=>"Ǥ","ǣ"=>"Ǣ","ǡ"=>"Ǡ","ǟ"=>"Ǟ","ǝ"=>"Ǝ","ǜ"=>"Ǜ","ǚ"=>"Ǚ", 1043 "ǘ"=>"Ǘ","ǖ"=>"Ǖ","ǔ"=>"Ǔ","ǒ"=>"Ǒ","ǐ"=>"Ǐ","ǎ"=>"Ǎ","nj"=>"Nj","lj"=>"Lj","dž"=>"Dž","ƿ"=>"Ƿ", 1044 "ƽ"=>"Ƽ","ƹ"=>"Ƹ","ƶ"=>"Ƶ","ƴ"=>"Ƴ","ư"=>"Ư","ƭ"=>"Ƭ","ƨ"=>"Ƨ","ƥ"=>"Ƥ","ƣ"=>"Ƣ","ơ"=>"Ơ", 1045 "ƞ"=>"Ƞ","ƙ"=>"Ƙ","ƕ"=>"Ƕ","ƒ"=>"Ƒ","ƌ"=>"Ƌ","ƈ"=>"Ƈ","ƅ"=>"Ƅ","ƃ"=>"Ƃ","ſ"=>"S","ž"=>"Ž", 1046 "ż"=>"Ż","ź"=>"Ź","ŷ"=>"Ŷ","ŵ"=>"Ŵ","ų"=>"Ų","ű"=>"Ű","ů"=>"Ů","ŭ"=>"Ŭ","ū"=>"Ū","ũ"=>"Ũ", 1047 "ŧ"=>"Ŧ","ť"=>"Ť","ţ"=>"Ţ","š"=>"Š","ş"=>"Ş","ŝ"=>"Ŝ","ś"=>"Ś","ř"=>"Ř","ŗ"=>"Ŗ","ŕ"=>"Ŕ", 1048 "œ"=>"Œ","ő"=>"Ő","ŏ"=>"Ŏ","ō"=>"Ō","ŋ"=>"Ŋ","ň"=>"Ň","ņ"=>"Ņ","ń"=>"Ń","ł"=>"Ł","ŀ"=>"Ŀ", 1049 "ľ"=>"Ľ","ļ"=>"Ļ","ĺ"=>"Ĺ","ķ"=>"Ķ","ĵ"=>"Ĵ","ij"=>"IJ","ı"=>"I","į"=>"Į","ĭ"=>"Ĭ","ī"=>"Ī", 1050 "ĩ"=>"Ĩ","ħ"=>"Ħ","ĥ"=>"Ĥ","ģ"=>"Ģ","ġ"=>"Ġ","ğ"=>"Ğ","ĝ"=>"Ĝ","ě"=>"Ě","ę"=>"Ę","ė"=>"Ė", 1051 "ĕ"=>"Ĕ","ē"=>"Ē","đ"=>"Đ","ď"=>"Ď","č"=>"Č","ċ"=>"Ċ","ĉ"=>"Ĉ","ć"=>"Ć","ą"=>"Ą","ă"=>"Ă", 1052 "ā"=>"Ā","ÿ"=>"Ÿ","þ"=>"Þ","ý"=>"Ý","ü"=>"Ü","û"=>"Û","ú"=>"Ú","ù"=>"Ù","ø"=>"Ø","ö"=>"Ö", 1053 "õ"=>"Õ","ô"=>"Ô","ó"=>"Ó","ò"=>"Ò","ñ"=>"Ñ","ð"=>"Ð","ï"=>"Ï","î"=>"Î","í"=>"Í","ì"=>"Ì", 1054 "ë"=>"Ë","ê"=>"Ê","é"=>"É","è"=>"È","ç"=>"Ç","æ"=>"Æ","å"=>"Å","ä"=>"Ä","ã"=>"Ã","â"=>"Â", 1055 "á"=>"Á","à"=>"À","µ"=>"Μ","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T", 1056 "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J", 1057 "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A" 1058 ); 1059 1060 /** 1061 * UTF-8 Case lookup table 1062 * 1063 * This lookuptable defines the lower case letters to their corresponding 1064 * upper case letter in UTF-8 1065 * 1066 * @author Andreas Gohr <andi@splitbrain.org> 1067 */ 1068 global $UTF8_UPPER_TO_LOWER; 1069 if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array ( 1070 "Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t","S"=>"s","R"=>"r","Q"=>"q", 1071 "P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j","I"=>"i","H"=>"h","G"=>"g", 1072 "F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a","ῼ"=>"ῳ","Ῥ"=>"ῥ","Ῡ"=>"ῡ","Ῑ"=>"ῑ", 1073 "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ", 1074 "ᾫ"=>"ᾣ","ᾪ"=>"ᾢ","ᾩ"=>"ᾡ","ᾟ"=>"ᾗ","ᾞ"=>"ᾖ","ᾝ"=>"ᾕ","ᾜ"=>"ᾔ","ᾛ"=>"ᾓ","ᾚ"=>"ᾒ","ᾙ"=>"ᾑ", 1075 "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ", 1076 "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ", 1077 "Ὲ"=>"ὲ","Ά"=>"ά","Ὰ"=>"ὰ","Ὧ"=>"ὧ","Ὦ"=>"ὦ","Ὥ"=>"ὥ","Ὤ"=>"ὤ","Ὣ"=>"ὣ","Ὢ"=>"ὢ","Ὡ"=>"ὡ", 1078 "Ὗ"=>"ὗ","Ὕ"=>"ὕ","Ὓ"=>"ὓ","Ὑ"=>"ὑ","Ὅ"=>"ὅ","Ὄ"=>"ὄ","Ὃ"=>"ὃ","Ὂ"=>"ὂ","Ὁ"=>"ὁ","Ὀ"=>"ὀ", 1079 "Ἷ"=>"ἷ","Ἶ"=>"ἶ","Ἵ"=>"ἵ","Ἴ"=>"ἴ","Ἳ"=>"ἳ","Ἲ"=>"ἲ","Ἱ"=>"ἱ","Ἰ"=>"ἰ","Ἧ"=>"ἧ","Ἦ"=>"ἦ", 1080 "Ἥ"=>"ἥ","Ἤ"=>"ἤ","Ἣ"=>"ἣ","Ἢ"=>"ἢ","Ἡ"=>"ἡ","Ἕ"=>"ἕ","Ἔ"=>"ἔ","Ἓ"=>"ἓ","Ἒ"=>"ἒ","Ἑ"=>"ἑ", 1081 "Ἐ"=>"ἐ","Ἇ"=>"ἇ","Ἆ"=>"ἆ","Ἅ"=>"ἅ","Ἄ"=>"ἄ","Ἃ"=>"ἃ","Ἂ"=>"ἂ","Ἁ"=>"ἁ","Ἀ"=>"ἀ","Ỹ"=>"ỹ", 1082 "Ỷ"=>"ỷ","Ỵ"=>"ỵ","Ỳ"=>"ỳ","Ự"=>"ự","Ữ"=>"ữ","Ử"=>"ử","Ừ"=>"ừ","Ứ"=>"ứ","Ủ"=>"ủ","Ụ"=>"ụ", 1083 "Ợ"=>"ợ","Ỡ"=>"ỡ","Ở"=>"ở","Ờ"=>"ờ","Ớ"=>"ớ","Ộ"=>"ộ","Ỗ"=>"ỗ","Ổ"=>"ổ","Ồ"=>"ồ","Ố"=>"ố", 1084 "Ỏ"=>"ỏ","Ọ"=>"ọ","Ị"=>"ị","Ỉ"=>"ỉ","Ệ"=>"ệ","Ễ"=>"ễ","Ể"=>"ể","Ề"=>"ề","Ế"=>"ế","Ẽ"=>"ẽ", 1085 "Ẻ"=>"ẻ","Ẹ"=>"ẹ","Ặ"=>"ặ","Ẵ"=>"ẵ","Ẳ"=>"ẳ","Ằ"=>"ằ","Ắ"=>"ắ","Ậ"=>"ậ","Ẫ"=>"ẫ","Ẩ"=>"ẩ", 1086 "Ầ"=>"ầ","Ấ"=>"ấ","Ả"=>"ả","Ạ"=>"ạ","Ṡ"=>"ẛ","Ẕ"=>"ẕ","Ẓ"=>"ẓ","Ẑ"=>"ẑ","Ẏ"=>"ẏ","Ẍ"=>"ẍ", 1087 "Ẋ"=>"ẋ","Ẉ"=>"ẉ","Ẇ"=>"ẇ","Ẅ"=>"ẅ","Ẃ"=>"ẃ","Ẁ"=>"ẁ","Ṿ"=>"ṿ","Ṽ"=>"ṽ","Ṻ"=>"ṻ","Ṹ"=>"ṹ", 1088 "Ṷ"=>"ṷ","Ṵ"=>"ṵ","Ṳ"=>"ṳ","Ṱ"=>"ṱ","Ṯ"=>"ṯ","Ṭ"=>"ṭ","Ṫ"=>"ṫ","Ṩ"=>"ṩ","Ṧ"=>"ṧ","Ṥ"=>"ṥ", 1089 "Ṣ"=>"ṣ","Ṡ"=>"ṡ","Ṟ"=>"ṟ","Ṝ"=>"ṝ","Ṛ"=>"ṛ","Ṙ"=>"ṙ","Ṗ"=>"ṗ","Ṕ"=>"ṕ","Ṓ"=>"ṓ","Ṑ"=>"ṑ", 1090 "Ṏ"=>"ṏ","Ṍ"=>"ṍ","Ṋ"=>"ṋ","Ṉ"=>"ṉ","Ṇ"=>"ṇ","Ṅ"=>"ṅ","Ṃ"=>"ṃ","Ṁ"=>"ṁ","Ḿ"=>"ḿ","Ḽ"=>"ḽ", 1091 "Ḻ"=>"ḻ","Ḹ"=>"ḹ","Ḷ"=>"ḷ","Ḵ"=>"ḵ","Ḳ"=>"ḳ","Ḱ"=>"ḱ","Ḯ"=>"ḯ","Ḭ"=>"ḭ","Ḫ"=>"ḫ","Ḩ"=>"ḩ", 1092 "Ḧ"=>"ḧ","Ḥ"=>"ḥ","Ḣ"=>"ḣ","Ḡ"=>"ḡ","Ḟ"=>"ḟ","Ḝ"=>"ḝ","Ḛ"=>"ḛ","Ḙ"=>"ḙ","Ḗ"=>"ḗ","Ḕ"=>"ḕ", 1093 "Ḓ"=>"ḓ","Ḑ"=>"ḑ","Ḏ"=>"ḏ","Ḍ"=>"ḍ","Ḋ"=>"ḋ","Ḉ"=>"ḉ","Ḇ"=>"ḇ","Ḅ"=>"ḅ","Ḃ"=>"ḃ","Ḁ"=>"ḁ", 1094 "Ֆ"=>"ֆ","Օ"=>"օ","Ք"=>"ք","Փ"=>"փ","Ւ"=>"ւ","Ց"=>"ց","Ր"=>"ր","Տ"=>"տ","Վ"=>"վ","Ս"=>"ս", 1095 "Ռ"=>"ռ","Ջ"=>"ջ","Պ"=>"պ","Չ"=>"չ","Ո"=>"ո","Շ"=>"շ","Ն"=>"ն","Յ"=>"յ","Մ"=>"մ","Ճ"=>"ճ", 1096 "Ղ"=>"ղ","Ձ"=>"ձ","Հ"=>"հ","Կ"=>"կ","Ծ"=>"ծ","Խ"=>"խ","Լ"=>"լ","Ի"=>"ի","Ժ"=>"ժ","Թ"=>"թ", 1097 "Ը"=>"ը","Է"=>"է","Զ"=>"զ","Ե"=>"ե","Դ"=>"դ","Գ"=>"գ","Բ"=>"բ","Ա"=>"ա","Ԏ"=>"ԏ","Ԍ"=>"ԍ", 1098 "Ԋ"=>"ԋ","Ԉ"=>"ԉ","Ԇ"=>"ԇ","Ԅ"=>"ԅ","Ԃ"=>"ԃ","Ԁ"=>"ԁ","Ӹ"=>"ӹ","Ӵ"=>"ӵ","Ӳ"=>"ӳ","Ӱ"=>"ӱ", 1099 "Ӯ"=>"ӯ","Ӭ"=>"ӭ","Ӫ"=>"ӫ","Ө"=>"ө","Ӧ"=>"ӧ","Ӥ"=>"ӥ","Ӣ"=>"ӣ","Ӡ"=>"ӡ","Ӟ"=>"ӟ","Ӝ"=>"ӝ", 1100 "Ӛ"=>"ӛ","Ә"=>"ә","Ӗ"=>"ӗ","Ӕ"=>"ӕ","Ӓ"=>"ӓ","Ӑ"=>"ӑ","Ӎ"=>"ӎ","Ӌ"=>"ӌ","Ӊ"=>"ӊ","Ӈ"=>"ӈ", 1101 "Ӆ"=>"ӆ","Ӄ"=>"ӄ","Ӂ"=>"ӂ","Ҿ"=>"ҿ","Ҽ"=>"ҽ","Һ"=>"һ","Ҹ"=>"ҹ","Ҷ"=>"ҷ","Ҵ"=>"ҵ","Ҳ"=>"ҳ", 1102 "Ұ"=>"ұ","Ү"=>"ү","Ҭ"=>"ҭ","Ҫ"=>"ҫ","Ҩ"=>"ҩ","Ҧ"=>"ҧ","Ҥ"=>"ҥ","Ң"=>"ң","Ҡ"=>"ҡ","Ҟ"=>"ҟ", 1103 "Ҝ"=>"ҝ","Қ"=>"қ","Ҙ"=>"ҙ","Җ"=>"җ","Ҕ"=>"ҕ","Ғ"=>"ғ","Ґ"=>"ґ","Ҏ"=>"ҏ","Ҍ"=>"ҍ","Ҋ"=>"ҋ", 1104 "Ҁ"=>"ҁ","Ѿ"=>"ѿ","Ѽ"=>"ѽ","Ѻ"=>"ѻ","Ѹ"=>"ѹ","Ѷ"=>"ѷ","Ѵ"=>"ѵ","Ѳ"=>"ѳ","Ѱ"=>"ѱ","Ѯ"=>"ѯ", 1105 "Ѭ"=>"ѭ","Ѫ"=>"ѫ","Ѩ"=>"ѩ","Ѧ"=>"ѧ","Ѥ"=>"ѥ","Ѣ"=>"ѣ","Ѡ"=>"ѡ","Џ"=>"џ","Ў"=>"ў","Ѝ"=>"ѝ", 1106 "Ќ"=>"ќ","Ћ"=>"ћ","Њ"=>"њ","Љ"=>"љ","Ј"=>"ј","Ї"=>"ї","І"=>"і","Ѕ"=>"ѕ","Є"=>"є","Ѓ"=>"ѓ", 1107 "Ђ"=>"ђ","Ё"=>"ё","Ѐ"=>"ѐ","Я"=>"я","Ю"=>"ю","Э"=>"э","Ь"=>"ь","Ы"=>"ы","Ъ"=>"ъ","Щ"=>"щ", 1108 "Ш"=>"ш","Ч"=>"ч","Ц"=>"ц","Х"=>"х","Ф"=>"ф","У"=>"у","Т"=>"т","С"=>"с","Р"=>"р","П"=>"п", 1109 "О"=>"о","Н"=>"н","М"=>"м","Л"=>"л","К"=>"к","Й"=>"й","И"=>"и","З"=>"з","Ж"=>"ж","Е"=>"е", 1110 "Д"=>"д","Г"=>"г","В"=>"в","Б"=>"б","А"=>"а","Ε"=>"ϵ","Σ"=>"ϲ","Ρ"=>"ϱ","Κ"=>"ϰ","Ϯ"=>"ϯ", 1111 "Ϭ"=>"ϭ","Ϫ"=>"ϫ","Ϩ"=>"ϩ","Ϧ"=>"ϧ","Ϥ"=>"ϥ","Ϣ"=>"ϣ","Ϡ"=>"ϡ","Ϟ"=>"ϟ","Ϝ"=>"ϝ","Ϛ"=>"ϛ", 1112 "Ϙ"=>"ϙ","Π"=>"ϖ","Φ"=>"ϕ","Θ"=>"ϑ","Β"=>"ϐ","Ώ"=>"ώ","Ύ"=>"ύ","Ό"=>"ό","Ϋ"=>"ϋ","Ϊ"=>"ϊ", 1113 "Ω"=>"ω","Ψ"=>"ψ","Χ"=>"χ","Φ"=>"φ","Υ"=>"υ","Τ"=>"τ","Σ"=>"σ","Σ"=>"ς","Ρ"=>"ρ","Π"=>"π", 1114 "Ο"=>"ο","Ξ"=>"ξ","Ν"=>"ν","Μ"=>"μ","Λ"=>"λ","Κ"=>"κ","Ι"=>"ι","Θ"=>"θ","Η"=>"η","Ζ"=>"ζ", 1115 "Ε"=>"ε","Δ"=>"δ","Γ"=>"γ","Β"=>"β","Α"=>"α","Ί"=>"ί","Ή"=>"ή","Έ"=>"έ","Ά"=>"ά","Ʒ"=>"ʒ", 1116 "Ʋ"=>"ʋ","Ʊ"=>"ʊ","Ʈ"=>"ʈ","Ʃ"=>"ʃ","Ʀ"=>"ʀ","Ɵ"=>"ɵ","Ɲ"=>"ɲ","Ɯ"=>"ɯ","Ɩ"=>"ɩ","Ɨ"=>"ɨ", 1117 "Ɣ"=>"ɣ","Ɛ"=>"ɛ","Ə"=>"ə","Ɗ"=>"ɗ","Ɖ"=>"ɖ","Ɔ"=>"ɔ","Ɓ"=>"ɓ","Ȳ"=>"ȳ","Ȱ"=>"ȱ","Ȯ"=>"ȯ", 1118 "Ȭ"=>"ȭ","Ȫ"=>"ȫ","Ȩ"=>"ȩ","Ȧ"=>"ȧ","Ȥ"=>"ȥ","Ȣ"=>"ȣ","Ȟ"=>"ȟ","Ȝ"=>"ȝ","Ț"=>"ț","Ș"=>"ș", 1119 "Ȗ"=>"ȗ","Ȕ"=>"ȕ","Ȓ"=>"ȓ","Ȑ"=>"ȑ","Ȏ"=>"ȏ","Ȍ"=>"ȍ","Ȋ"=>"ȋ","Ȉ"=>"ȉ","Ȇ"=>"ȇ","Ȅ"=>"ȅ", 1120 "Ȃ"=>"ȃ","Ȁ"=>"ȁ","Ǿ"=>"ǿ","Ǽ"=>"ǽ","Ǻ"=>"ǻ","Ǹ"=>"ǹ","Ǵ"=>"ǵ","Dz"=>"dz","Ǯ"=>"ǯ","Ǭ"=>"ǭ", 1121 "Ǫ"=>"ǫ","Ǩ"=>"ǩ","Ǧ"=>"ǧ","Ǥ"=>"ǥ","Ǣ"=>"ǣ","Ǡ"=>"ǡ","Ǟ"=>"ǟ","Ǝ"=>"ǝ","Ǜ"=>"ǜ","Ǚ"=>"ǚ", 1122 "Ǘ"=>"ǘ","Ǖ"=>"ǖ","Ǔ"=>"ǔ","Ǒ"=>"ǒ","Ǐ"=>"ǐ","Ǎ"=>"ǎ","Nj"=>"nj","Lj"=>"lj","Dž"=>"dž","Ƿ"=>"ƿ", 1123 "Ƽ"=>"ƽ","Ƹ"=>"ƹ","Ƶ"=>"ƶ","Ƴ"=>"ƴ","Ư"=>"ư","Ƭ"=>"ƭ","Ƨ"=>"ƨ","Ƥ"=>"ƥ","Ƣ"=>"ƣ","Ơ"=>"ơ", 1124 "Ƞ"=>"ƞ","Ƙ"=>"ƙ","Ƕ"=>"ƕ","Ƒ"=>"ƒ","Ƌ"=>"ƌ","Ƈ"=>"ƈ","Ƅ"=>"ƅ","Ƃ"=>"ƃ","S"=>"ſ","Ž"=>"ž", 1125 "Ż"=>"ż","Ź"=>"ź","Ŷ"=>"ŷ","Ŵ"=>"ŵ","Ų"=>"ų","Ű"=>"ű","Ů"=>"ů","Ŭ"=>"ŭ","Ū"=>"ū","Ũ"=>"ũ", 1126 "Ŧ"=>"ŧ","Ť"=>"ť","Ţ"=>"ţ","Š"=>"š","Ş"=>"ş","Ŝ"=>"ŝ","Ś"=>"ś","Ř"=>"ř","Ŗ"=>"ŗ","Ŕ"=>"ŕ", 1127 "Œ"=>"œ","Ő"=>"ő","Ŏ"=>"ŏ","Ō"=>"ō","Ŋ"=>"ŋ","Ň"=>"ň","Ņ"=>"ņ","Ń"=>"ń","Ł"=>"ł","Ŀ"=>"ŀ", 1128 "Ľ"=>"ľ","Ļ"=>"ļ","Ĺ"=>"ĺ","Ķ"=>"ķ","Ĵ"=>"ĵ","IJ"=>"ij","I"=>"ı","Į"=>"į","Ĭ"=>"ĭ","Ī"=>"ī", 1129 "Ĩ"=>"ĩ","Ħ"=>"ħ","Ĥ"=>"ĥ","Ģ"=>"ģ","Ġ"=>"ġ","Ğ"=>"ğ","Ĝ"=>"ĝ","Ě"=>"ě","Ę"=>"ę","Ė"=>"ė", 1130 "Ĕ"=>"ĕ","Ē"=>"ē","Đ"=>"đ","Ď"=>"ď","Č"=>"č","Ċ"=>"ċ","Ĉ"=>"ĉ","Ć"=>"ć","Ą"=>"ą","Ă"=>"ă", 1131 "Ā"=>"ā","Ÿ"=>"ÿ","Þ"=>"þ","Ý"=>"ý","Ü"=>"ü","Û"=>"û","Ú"=>"ú","Ù"=>"ù","Ø"=>"ø","Ö"=>"ö", 1132 "Õ"=>"õ","Ô"=>"ô","Ó"=>"ó","Ò"=>"ò","Ñ"=>"ñ","Ð"=>"ð","Ï"=>"ï","Î"=>"î","Í"=>"í","Ì"=>"ì", 1133 "Ë"=>"ë","Ê"=>"ê","É"=>"é","È"=>"è","Ç"=>"ç","Æ"=>"æ","Å"=>"å","Ä"=>"ä","Ã"=>"ã","Â"=>"â", 1134 "Á"=>"á","À"=>"à","Μ"=>"µ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t", 1135 "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j", 1136 "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a" 1137 ); 1138}; // end of case lookup tables 1139 1140/** 1141 * UTF-8 lookup table for lower case accented letters 1142 * 1143 * This lookuptable defines replacements for accented characters from the ASCII-7 1144 * range. This are lower case letters only. 1145 * 1146 * @author Andreas Gohr <andi@splitbrain.org> 1147 * @see utf8_deaccent() 1148 */ 1149global $UTF8_LOWER_ACCENTS; 1150if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array( 1151 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 1152 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 1153 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 1154 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 1155 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 1156 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 1157 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 1158 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 1159 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 1160 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 1161 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 1162 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 1163 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 1164 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 1165 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', 1166); 1167 1168/** 1169 * UTF-8 lookup table for upper case accented letters 1170 * 1171 * This lookuptable defines replacements for accented characters from the ASCII-7 1172 * range. This are upper case letters only. 1173 * 1174 * @author Andreas Gohr <andi@splitbrain.org> 1175 * @see utf8_deaccent() 1176 */ 1177global $UTF8_UPPER_ACCENTS; 1178if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array( 1179 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', 1180 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 1181 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', 1182 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', 1183 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', 1184 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', 1185 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', 1186 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', 1187 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 1188 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', 1189 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', 1190 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 1191 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 1192 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', 1193 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', 1194); 1195 1196/** 1197 * UTF-8 array of common special characters 1198 * 1199 * This array should contain all special characters (not a letter or digit) 1200 * defined in the various local charsets - it's not a complete list of non-alphanum 1201 * characters in UTF-8. It's not perfect but should match most cases of special 1202 * chars. 1203 * 1204 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 1205 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 1206 * 1207 * @author Andreas Gohr <andi@splitbrain.org> 1208 * @see utf8_stripspecials() 1209 */ 1210global $UTF8_SPECIAL_CHARS; 1211if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array( 1212 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 1213 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 1214 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 1215 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 1216 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 1217 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 1218 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 1219 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 1220 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 1221 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 1222 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 1223 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 1224 0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 1225 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 1226 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 1227 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 1228 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 1229 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 1230 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 1231 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 1232 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 1233 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 1234 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 1235 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 1236 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 1237 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 1238 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 1239 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 1240 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 1241 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 1242 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 1243 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 1244 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 1245 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 1246 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 1247 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 1248 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 1249 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 1250 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 1251 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 1252 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 1253 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 1254 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 1255 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 1256 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 1257 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 1258 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017, 1259 0x3018, 0x3019, 0x301a, 0x301b, 0x3036, 1260 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 1261 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 1262 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 1263 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 1264 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 1265 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 1266 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c, 1267 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b, 1268 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 1269 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea, 1270 0xffeb, 0xffec, 0xffed, 0xffee, 1271 0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703, 1272 0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b, 1273 0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713, 1274 0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b, 1275 0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf, 1276); 1277 1278// utf8 version of above data 1279global $UTF8_SPECIAL_CHARS2; 1280if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 = 1281 "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~ �'. 1282 '� ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½�'. 1283 '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. 1284 '�ׁׂ׃׳״،؛؟ـًٌٍَُِّْ٪฿–—―‗‘’‚“”�'. 1285 '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'. 1286 '⇐⇑⇒⇓⇔∀∂∃∅∆∇∈∉∋∏∑−∕∗∙√∝∞∠∧∨�'. 1287 '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'. 1288 '��┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠'. 1289 '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌▐░▒▓■▲▼◆◊●�'. 1290 '�★☎☛☞♠♣♥♦✁✂✃✄✆✇✈✉✌✍✎✏✐✑✒✓✔✕�'. 1291 '��✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✩✪✫✬✭✮✯✰✱'. 1292 '✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋�'. 1293 '�❏❐❑❒❖❘❙❚❛❜❝❞❡❢❣❤❥❦❧❿➉➓➔➘➙➚�'. 1294 '��➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶'. 1295 '➷➸➹➺➻➼➽➾'. 1296 ' 、。〃〈〉《》「」『』【】〒〔〕〖〗〘〙〚〛〶'. 1297 '�'. 1298 '�ﹼﹽ'. 1299 '!"#$%&'()*+,-./:;<=>?@[\]^`{|}~'. 1300 '⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○'. 1301 ''. 1302 ' '; 1303 1304/** 1305 * Romanization lookup table 1306 * 1307 * This lookup tables provides a way to transform strings written in a language 1308 * different from the ones based upon latin letters into plain ASCII. 1309 * 1310 * Please note: this is not a scientific transliteration table. It only works 1311 * oneway from nonlatin to ASCII and it works by simple character replacement 1312 * only. Specialities of each language are not supported. 1313 * 1314 * @author Andreas Gohr <andi@splitbrain.org> 1315 * @author Vitaly Blokhin <vitinfo@vitn.com> 1316 * @link http://www.uconv.com/translit.htm 1317 * @author Bisqwit <bisqwit@iki.fi> 1318 * @link http://kanjidict.stc.cx/hiragana.php?src=2 1319 * @link http://www.translatum.gr/converter/greek-transliteration.htm 1320 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 1321 * @link http://www.btranslations.com/resources/romanization/korean.asp 1322 * @author Arthit Suriyawongkul <arthit@gmail.com> 1323 * @author Denis Scheither <amorphis@uni-bremen.de> 1324 * @author Eivind Morland <eivind.morland@gmail.com> 1325 */ 1326global $UTF8_ROMANIZATION; 1327if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array( 1328 // scandinavian - differs from what we do in deaccent 1329 'å'=>'a','Å'=>'A','ä'=>'a','Ä'=>'A','ö'=>'o','Ö'=>'O', 1330 1331 //russian cyrillic 1332 'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G', 1333 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh', 1334 'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K', 1335 'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Н'=>'N','о'=>'o','О'=>'O', 1336 'п'=>'p','П'=>'P','р'=>'r','Р'=>'R','с'=>'s','С'=>'S','т'=>'t','Т'=>'T', 1337 'у'=>'u','У'=>'U','ф'=>'f','Ф'=>'F','х'=>'x','Х'=>'X','ц'=>'c','Ц'=>'C', 1338 'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'sch','Щ'=>'Sch','ъ'=>'', 1339 'Ъ'=>'','ы'=>'y','Ы'=>'Y','ь'=>'','Ь'=>'','э'=>'eh','Э'=>'Eh','ю'=>'ju', 1340 'Ю'=>'Ju','я'=>'ja','Я'=>'Ja', 1341 // Ukrainian cyrillic 1342 'Ґ'=>'Gh','ґ'=>'gh','Є'=>'Je','є'=>'je','І'=>'I','і'=>'i','Ї'=>'Ji','ї'=>'ji', 1343 // Georgian 1344 'ა'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th', 1345 'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','ო'=>'o','პ'=>'p','ჟ'=>'zh', 1346 'რ'=>'r','ს'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ქ'=>'kh','ღ'=>'gh','ყ'=>'q', 1347 'შ'=>'sh','ჩ'=>'ch','ც'=>'c','ძ'=>'dh','წ'=>'w','ჭ'=>'j','ხ'=>'x','ჯ'=>'jh', 1348 'ჰ'=>'xh', 1349 //Sanskrit 1350 'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry', 1351 'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','ए'=>'e','ऐ'=>'ay','ओ'=>'o','औ'=>'aw', 1352 'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh', 1353 'च'=>'c','छ'=>'ch','ज'=>'j','झ'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh', 1354 'ड'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','थ'=>'th','द'=>'d','ध'=>'dh', 1355 'न'=>'n','प'=>'p','फ'=>'ph','ब'=>'b','भ'=>'bh','म'=>'m','य'=>'z','र'=>'r', 1356 'ल'=>'l','व'=>'v','श'=>'sh','ष'=>'sqh','स'=>'s','ह'=>'x', 1357 //Sanskrit diacritics 1358 'Ā'=>'A','Ī'=>'I','Ū'=>'U','Ṛ'=>'R','Ṝ'=>'R','Ṅ'=>'N','Ñ'=>'N','Ṭ'=>'T', 1359 'Ḍ'=>'D','Ṇ'=>'N','Ś'=>'S','Ṣ'=>'S','Ṁ'=>'M','Ṃ'=>'M','Ḥ'=>'H','Ḷ'=>'L','Ḹ'=>'L', 1360 'ā'=>'a','ī'=>'i','ū'=>'u','ṛ'=>'r','ṝ'=>'r','ṅ'=>'n','ñ'=>'n','ṭ'=>'t', 1361 'ḍ'=>'d','ṇ'=>'n','ś'=>'s','ṣ'=>'s','ṁ'=>'m','ṃ'=>'m','ḥ'=>'h','ḷ'=>'l','ḹ'=>'l', 1362 //Hebrew 1363 'א'=>'a', 'ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'kh','ט'=>'th', 1364 'י'=>'y','ך'=>'h','כ'=>'k','ל'=>'l','ם'=>'m','מ'=>'m','ן'=>'n','נ'=>'n', 1365 'ס'=>'s','ע'=>'ah','ף'=>'f','פ'=>'p','ץ'=>'c','צ'=>'c','ק'=>'q','ר'=>'r', 1366 'ש'=>'sh','ת'=>'t', 1367 //Arabic 1368 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'g','ح'=>'xh','خ'=>'x','د'=>'d', 1369 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s\'','ض'=>'d\'', 1370 'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k', 1371 'ل'=>'l','م'=>'m','ن'=>'n','ه'=>'x\'','و'=>'u','ي'=>'i', 1372 1373 // Japanese characters (last update: 2008-05-09) 1374 1375 // Japanese hiragana 1376 1377 // 3 character syllables, っ doubles the consonant after 1378 'っちゃ'=>'ccha','っちぇ'=>'cche','っちょ'=>'ccho','っちゅ'=>'cchu', 1379 'っびゃ'=>'bbya','っびぇ'=>'bbye','っびぃ'=>'bbyi','っびょ'=>'bbyo','っびゅ'=>'bbyu', 1380 'っぴゃ'=>'ppya','っぴぇ'=>'ppye','っぴぃ'=>'ppyi','っぴょ'=>'ppyo','っぴゅ'=>'ppyu', 1381 'っちゃ'=>'ccha','っちぇ'=>'cche','っち'=>'cchi','っちょ'=>'ccho','っちゅ'=>'cchu', 1382 // 'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu', 1383 'っきゃ'=>'kkya','っきぇ'=>'kkye','っきぃ'=>'kkyi','っきょ'=>'kkyo','っきゅ'=>'kkyu', 1384 'っぎゃ'=>'ggya','っぎぇ'=>'ggye','っぎぃ'=>'ggyi','っぎょ'=>'ggyo','っぎゅ'=>'ggyu', 1385 'っみゃ'=>'mmya','っみぇ'=>'mmye','っみぃ'=>'mmyi','っみょ'=>'mmyo','っみゅ'=>'mmyu', 1386 'っにゃ'=>'nnya','っにぇ'=>'nnye','っにぃ'=>'nnyi','っにょ'=>'nnyo','っにゅ'=>'nnyu', 1387 'っりゃ'=>'rrya','っりぇ'=>'rrye','っりぃ'=>'rryi','っりょ'=>'rryo','っりゅ'=>'rryu', 1388 'っしゃ'=>'ssha','っしぇ'=>'sshe','っし'=>'sshi','っしょ'=>'ssho','っしゅ'=>'sshu', 1389 1390 // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway) 1391 'んあ'=>'n_a','んえ'=>'n_e','んい'=>'n_i','んお'=>'n_o','んう'=>'n_u', 1392 'んや'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu', 1393 1394 // 2 character syllables - normal 1395 'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo', 1396 'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu', 1397 'ひゃ'=>'hya','ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu', 1398 'びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu', 1399 'ぴゃ'=>'pya','ぴぇ'=>'pye','ぴぃ'=>'pyi','ぴょ'=>'pyo','ぴゅ'=>'pyu', 1400 'きゃ'=>'kya','きぇ'=>'kye','きぃ'=>'kyi','きょ'=>'kyo','きゅ'=>'kyu', 1401 'ぎゃ'=>'gya','ぎぇ'=>'gye','ぎぃ'=>'gyi','ぎょ'=>'gyo','ぎゅ'=>'gyu', 1402 'みゃ'=>'mya','みぇ'=>'mye','みぃ'=>'myi','みょ'=>'myo','みゅ'=>'myu', 1403 'にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu', 1404 'りゃ'=>'rya','りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu', 1405 'しゃ'=>'sha','しぇ'=>'she','し'=>'shi','しょ'=>'sho','しゅ'=>'shu', 1406 'じゃ'=>'ja','じぇ'=>'je','じょ'=>'jo','じゅ'=>'ju', 1407 'うぇ'=>'we','うぃ'=>'wi', 1408 'いぇ'=>'ye', 1409 1410 // 2 character syllables, っ doubles the consonant after 1411 'っば'=>'bba','っべ'=>'bbe','っび'=>'bbi','っぼ'=>'bbo','っぶ'=>'bbu', 1412 'っぱ'=>'ppa','っぺ'=>'ppe','っぴ'=>'ppi','っぽ'=>'ppo','っぷ'=>'ppu', 1413 'った'=>'tta','って'=>'tte','っち'=>'cchi','っと'=>'tto','っつ'=>'ttsu', 1414 'っだ'=>'dda','っで'=>'dde','っぢ'=>'ddi','っど'=>'ddo','っづ'=>'ddu', 1415 'っが'=>'gga','っげ'=>'gge','っぎ'=>'ggi','っご'=>'ggo','っぐ'=>'ggu', 1416 'っか'=>'kka','っけ'=>'kke','っき'=>'kki','っこ'=>'kko','っく'=>'kku', 1417 'っま'=>'mma','っめ'=>'mme','っみ'=>'mmi','っも'=>'mmo','っむ'=>'mmu', 1418 'っな'=>'nna','っね'=>'nne','っに'=>'nni','っの'=>'nno','っぬ'=>'nnu', 1419 'っら'=>'rra','っれ'=>'rre','っり'=>'rri','っろ'=>'rro','っる'=>'rru', 1420 'っさ'=>'ssa','っせ'=>'sse','っし'=>'sshi','っそ'=>'sso','っす'=>'ssu', 1421 'っざ'=>'zza','っぜ'=>'zze','っじ'=>'jji','っぞ'=>'zzo','っず'=>'zzu', 1422 1423 // 1 character syllabels 1424 'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ん'=>'n', 1425 'は'=>'ha','へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'fu', 1426 'ば'=>'ba','べ'=>'be','び'=>'bi','ぼ'=>'bo','ぶ'=>'bu', 1427 'ぱ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po','ぷ'=>'pu', 1428 'た'=>'ta','て'=>'te','ち'=>'chi','と'=>'to','つ'=>'tsu', 1429 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1430 'が'=>'ga','げ'=>'ge','ぎ'=>'gi','ご'=>'go','ぐ'=>'gu', 1431 'か'=>'ka','け'=>'ke','き'=>'ki','こ'=>'ko','く'=>'ku', 1432 'ま'=>'ma','め'=>'me','み'=>'mi','も'=>'mo','む'=>'mu', 1433 'な'=>'na','ね'=>'ne','に'=>'ni','の'=>'no','ぬ'=>'nu', 1434 'ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru', 1435 'さ'=>'sa','せ'=>'se','し'=>'shi','そ'=>'so','す'=>'su', 1436 'わ'=>'wa','を'=>'wo', 1437 'ざ'=>'za','ぜ'=>'ze','じ'=>'ji','ぞ'=>'zo','ず'=>'zu', 1438 'や'=>'ya','よ'=>'yo','ゆ'=>'yu', 1439 // old characters 1440 'ゑ'=>'we','ゐ'=>'wi', 1441 1442 // convert what's left (probably only kicks in when something's missing above) 1443 // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u', 1444 // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu', 1445 1446 // never seen one of those (disabled for the moment) 1447 // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu', 1448 // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu', 1449 // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu', 1450 // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu', 1451 // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu', 1452 // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu', 1453 // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu', 1454 // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu', 1455 // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu', 1456 // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu', 1457 // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu', 1458 // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu', 1459 // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu', 1460 // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu', 1461 1462 // 'spare' characters from other romanization systems 1463 // 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1464 // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu', 1465 // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su', 1466 // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu', 1467 //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu', 1468 //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu', 1469 //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu', 1470 //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu', 1471 //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi', 1472 //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju', 1473 1474 1475 // Japanese katakana 1476 1477 // 4 character syllables: ッ doubles the consonant after, ー doubles the vowel before (usualy written with macron, but we don't want that in our URLs) 1478 'ッビャー'=>'bbyaa','ッビェー'=>'bbyee','ッビィー'=>'bbyii','ッビョー'=>'bbyoo','ッビュー'=>'bbyuu', 1479 'ッピャー'=>'ppyaa','ッピェー'=>'ppyee','ッピィー'=>'ppyii','ッピョー'=>'ppyoo','ッピュー'=>'ppyuu', 1480 'ッキャー'=>'kkyaa','ッキェー'=>'kkyee','ッキィー'=>'kkyii','ッキョー'=>'kkyoo','ッキュー'=>'kkyuu', 1481 'ッギャー'=>'ggyaa','ッギェー'=>'ggyee','ッギィー'=>'ggyii','ッギョー'=>'ggyoo','ッギュー'=>'ggyuu', 1482 'ッミャー'=>'mmyaa','ッミェー'=>'mmyee','ッミィー'=>'mmyii','ッミョー'=>'mmyoo','ッミュー'=>'mmyuu', 1483 'ッニャー'=>'nnyaa','ッニェー'=>'nnyee','ッニィー'=>'nnyii','ッニョー'=>'nnyoo','ッニュー'=>'nnyuu', 1484 'ッリャー'=>'rryaa','ッリェー'=>'rryee','ッリィー'=>'rryii','ッリョー'=>'rryoo','ッリュー'=>'rryuu', 1485 'ッシャー'=>'sshaa','ッシェー'=>'sshee','ッシー'=>'sshii','ッショー'=>'sshoo','ッシュー'=>'sshuu', 1486 'ッチャー'=>'cchaa','ッチェー'=>'cchee','ッチー'=>'cchii','ッチョー'=>'cchoo','ッチュー'=>'cchuu', 1487 'ッティー'=>'ttii', 1488 'ッヂィー'=>'ddii', 1489 1490 // 3 character syllables - doubled vowels 1491 'ファー'=>'faa','フェー'=>'fee','フィー'=>'fii','フォー'=>'foo', 1492 'フャー'=>'fyaa','フェー'=>'fyee','フィー'=>'fyii','フョー'=>'fyoo','フュー'=>'fyuu', 1493 'ヒャー'=>'hyaa','ヒェー'=>'hyee','ヒィー'=>'hyii','ヒョー'=>'hyoo','ヒュー'=>'hyuu', 1494 'ビャー'=>'byaa','ビェー'=>'byee','ビィー'=>'byii','ビョー'=>'byoo','ビュー'=>'byuu', 1495 'ピャー'=>'pyaa','ピェー'=>'pyee','ピィー'=>'pyii','ピョー'=>'pyoo','ピュー'=>'pyuu', 1496 'キャー'=>'kyaa','キェー'=>'kyee','キィー'=>'kyii','キョー'=>'kyoo','キュー'=>'kyuu', 1497 'ギャー'=>'gyaa','ギェー'=>'gyee','ギィー'=>'gyii','ギョー'=>'gyoo','ギュー'=>'gyuu', 1498 'ミャー'=>'myaa','ミェー'=>'myee','ミィー'=>'myii','ミョー'=>'myoo','ミュー'=>'myuu', 1499 'ニャー'=>'nyaa','ニェー'=>'nyee','ニィー'=>'nyii','ニョー'=>'nyoo','ニュー'=>'nyuu', 1500 'リャー'=>'ryaa','リェー'=>'ryee','リィー'=>'ryii','リョー'=>'ryoo','リュー'=>'ryuu', 1501 'シャー'=>'shaa','シェー'=>'shee','シー'=>'shii','ショー'=>'shoo','シュー'=>'shuu', 1502 'ジャー'=>'jaa','ジェー'=>'jee','ジー'=>'jii','ジョー'=>'joo','ジュー'=>'juu', 1503 'スァー'=>'swaa','スェー'=>'swee','スィー'=>'swii','スォー'=>'swoo','スゥー'=>'swuu', 1504 'デァー'=>'daa','デェー'=>'dee','ディー'=>'dii','デォー'=>'doo','デゥー'=>'duu', 1505 'チャー'=>'chaa','チェー'=>'chee','チー'=>'chii','チョー'=>'choo','チュー'=>'chuu', 1506 'ヂャー'=>'dyaa','ヂェー'=>'dyee','ヂィー'=>'dyii','ヂョー'=>'dyoo','ヂュー'=>'dyuu', 1507 'ツャー'=>'tsaa','ツェー'=>'tsee','ツィー'=>'tsii','ツョー'=>'tsoo','ツー'=>'tsuu', 1508 'トァー'=>'twaa','トェー'=>'twee','トィー'=>'twii','トォー'=>'twoo','トゥー'=>'twuu', 1509 'ドァー'=>'dwaa','ドェー'=>'dwee','ドィー'=>'dwii','ドォー'=>'dwoo','ドゥー'=>'dwuu', 1510 'ウァー'=>'whaa','ウェー'=>'whee','ウィー'=>'whii','ウォー'=>'whoo','ウゥー'=>'whuu', 1511 'ヴャー'=>'vyaa','ヴェー'=>'vyee','ヴィー'=>'vyii','ヴョー'=>'vyoo','ヴュー'=>'vyuu', 1512 'ヴァー'=>'vaa','ヴェー'=>'vee','ヴィー'=>'vii','ヴォー'=>'voo','ヴー'=>'vuu', 1513 'ウェー'=>'wee','ウィー'=>'wii', 1514 'イェー'=>'yee', 1515 'ティー'=>'tii', 1516 'ヂィー'=>'dii', 1517 1518 // 3 character syllables - doubled consonants 1519 'ッビャ'=>'bbya','ッビェ'=>'bbye','ッビィ'=>'bbyi','ッビョ'=>'bbyo','ッビュ'=>'bbyu', 1520 'ッピャ'=>'ppya','ッピェ'=>'ppye','ッピィ'=>'ppyi','ッピョ'=>'ppyo','ッピュ'=>'ppyu', 1521 'ッキャ'=>'kkya','ッキェ'=>'kkye','ッキィ'=>'kkyi','ッキョ'=>'kkyo','ッキュ'=>'kkyu', 1522 'ッギャ'=>'ggya','ッギェ'=>'ggye','ッギィ'=>'ggyi','ッギョ'=>'ggyo','ッギュ'=>'ggyu', 1523 'ッミャ'=>'mmya','ッミェ'=>'mmye','ッミィ'=>'mmyi','ッミョ'=>'mmyo','ッミュ'=>'mmyu', 1524 'ッニャ'=>'nnya','ッニェ'=>'nnye','ッニィ'=>'nnyi','ッニョ'=>'nnyo','ッニュ'=>'nnyu', 1525 'ッリャ'=>'rrya','ッリェ'=>'rrye','ッリィ'=>'rryi','ッリョ'=>'rryo','ッリュ'=>'rryu', 1526 'ッシャ'=>'ssha','ッシェ'=>'sshe','ッシ'=>'sshi','ッショ'=>'ssho','ッシュ'=>'sshu', 1527 'ッチャ'=>'ccha','ッチェ'=>'cche','ッチ'=>'cchi','ッチョ'=>'ccho','ッチュ'=>'cchu', 1528 'ッティ'=>'tti', 1529 'ッヂィ'=>'ddi', 1530 1531 // 3 character syllables - doubled vowel and consonants 1532 'ッバー'=>'bbaa','ッベー'=>'bbee','ッビー'=>'bbii','ッボー'=>'bboo','ッブー'=>'bbuu', 1533 'ッパー'=>'ppaa','ッペー'=>'ppee','ッピー'=>'ppii','ッポー'=>'ppoo','ップー'=>'ppuu', 1534 'ッケー'=>'kkee','ッキー'=>'kkii','ッコー'=>'kkoo','ックー'=>'kkuu','ッカー'=>'kkaa', 1535 'ッガー'=>'ggaa','ッゲー'=>'ggee','ッギー'=>'ggii','ッゴー'=>'ggoo','ッグー'=>'gguu', 1536 'ッマー'=>'maa','ッメー'=>'mee','ッミー'=>'mii','ッモー'=>'moo','ッムー'=>'muu', 1537 'ッナー'=>'nnaa','ッネー'=>'nnee','ッニー'=>'nnii','ッノー'=>'nnoo','ッヌー'=>'nnuu', 1538 'ッラー'=>'rraa','ッレー'=>'rree','ッリー'=>'rrii','ッロー'=>'rroo','ッルー'=>'rruu', 1539 'ッサー'=>'ssaa','ッセー'=>'ssee','ッシー'=>'sshii','ッソー'=>'ssoo','ッスー'=>'ssuu', 1540 'ッザー'=>'zzaa','ッゼー'=>'zzee','ッジー'=>'jjii','ッゾー'=>'zzoo','ッズー'=>'zzuu', 1541 'ッター'=>'ttaa','ッテー'=>'ttee','ッチー'=>'chii','ットー'=>'ttoo','ッツー'=>'ttsuu', 1542 'ッダー'=>'ddaa','ッデー'=>'ddee','ッヂー'=>'ddii','ッドー'=>'ddoo','ッヅー'=>'dduu', 1543 1544 // 2 character syllables - normal 1545 'ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フゥ'=>'fu', 1546 // 'フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo','フュ'=>'fyu', 1547 'フャ'=>'fa','フェ'=>'fe','フィ'=>'fi','フョ'=>'fo','フュ'=>'fu', 1548 'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu', 1549 'ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo','ビュ'=>'byu', 1550 'ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu', 1551 'キャ'=>'kya','キェ'=>'kye','キィ'=>'kyi','キョ'=>'kyo','キュ'=>'kyu', 1552 'ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu', 1553 'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu', 1554 'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu', 1555 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu', 1556 'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu', 1557 'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju', 1558 'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu', 1559 'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du', 1560 'チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu', 1561 // 'ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi','ヂョ'=>'dyo','ヂュ'=>'dyu', 1562 'ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu', 1563 'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu', 1564 'ドァ'=>'dwa','ドェ'=>'dwe','ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu', 1565 'ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi','ウォ'=>'who','ウゥ'=>'whu', 1566 'ヴャ'=>'vya','ヴェ'=>'vye','ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu', 1567 'ヴァ'=>'va','ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu', 1568 'ウェ'=>'we','ウィ'=>'wi', 1569 'イェ'=>'ye', 1570 'ティ'=>'ti', 1571 'ヂィ'=>'di', 1572 1573 // 2 character syllables - doubled vocal 1574 'アー'=>'aa','エー'=>'ee','イー'=>'ii','オー'=>'oo','ウー'=>'uu', 1575 'ダー'=>'daa','デー'=>'dee','ヂー'=>'dii','ドー'=>'doo','ヅー'=>'duu', 1576 'ハー'=>'haa','ヘー'=>'hee','ヒー'=>'hii','ホー'=>'hoo','フー'=>'fuu', 1577 'バー'=>'baa','ベー'=>'bee','ビー'=>'bii','ボー'=>'boo','ブー'=>'buu', 1578 'パー'=>'paa','ペー'=>'pee','ピー'=>'pii','ポー'=>'poo','プー'=>'puu', 1579 'ケー'=>'kee','キー'=>'kii','コー'=>'koo','クー'=>'kuu','カー'=>'kaa', 1580 'ガー'=>'gaa','ゲー'=>'gee','ギー'=>'gii','ゴー'=>'goo','グー'=>'guu', 1581 'マー'=>'maa','メー'=>'mee','ミー'=>'mii','モー'=>'moo','ムー'=>'muu', 1582 'ナー'=>'naa','ネー'=>'nee','ニー'=>'nii','ノー'=>'noo','ヌー'=>'nuu', 1583 'ラー'=>'raa','レー'=>'ree','リー'=>'rii','ロー'=>'roo','ルー'=>'ruu', 1584 'サー'=>'saa','セー'=>'see','シー'=>'shii','ソー'=>'soo','スー'=>'suu', 1585 'ザー'=>'zaa','ゼー'=>'zee','ジー'=>'jii','ゾー'=>'zoo','ズー'=>'zuu', 1586 'ター'=>'taa','テー'=>'tee','チー'=>'chii','トー'=>'too','ツー'=>'tsuu', 1587 'ワー'=>'waa','ヲー'=>'woo', 1588 'ヤー'=>'yaa','ヨー'=>'yoo','ユー'=>'yuu', 1589 'ヵー'=>'kaa','ヶー'=>'kee', 1590 // old characters 1591 'ヱー'=>'wee','ヰー'=>'wii', 1592 1593 // seperate katakana 'n' 1594 'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u', 1595 'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンユ'=>'n_yu', 1596 1597 // 2 character syllables - doubled consonants 1598 'ッバ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu', 1599 'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッポ'=>'ppo','ップ'=>'ppu', 1600 'ッケ'=>'kke','ッキ'=>'kki','ッコ'=>'kko','ック'=>'kku','ッカ'=>'kka', 1601 'ッガ'=>'gga','ッゲ'=>'gge','ッギ'=>'ggi','ッゴ'=>'ggo','ッグ'=>'ggu', 1602 'ッマ'=>'ma','ッメ'=>'me','ッミ'=>'mi','ッモ'=>'mo','ッム'=>'mu', 1603 'ッナ'=>'nna','ッネ'=>'nne','ッニ'=>'nni','ッノ'=>'nno','ッヌ'=>'nnu', 1604 'ッラ'=>'rra','ッレ'=>'rre','ッリ'=>'rri','ッロ'=>'rro','ッル'=>'rru', 1605 'ッサ'=>'ssa','ッセ'=>'sse','ッシ'=>'sshi','ッソ'=>'sso','ッス'=>'ssu', 1606 'ッザ'=>'zza','ッゼ'=>'zze','ッジ'=>'jji','ッゾ'=>'zzo','ッズ'=>'zzu', 1607 'ッタ'=>'tta','ッテ'=>'tte','ッチ'=>'cchi','ット'=>'tto','ッツ'=>'ttsu', 1608 'ッダ'=>'dda','ッデ'=>'dde','ッヂ'=>'ddi','ッド'=>'ddo','ッヅ'=>'ddu', 1609 1610 // 1 character syllables 1611 'ア'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','ン'=>'n', 1612 'ハ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho','フ'=>'fu', 1613 'バ'=>'ba','ベ'=>'be','ビ'=>'bi','ボ'=>'bo','ブ'=>'bu', 1614 'パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ポ'=>'po','プ'=>'pu', 1615 'ケ'=>'ke','キ'=>'ki','コ'=>'ko','ク'=>'ku','カ'=>'ka', 1616 'ガ'=>'ga','ゲ'=>'ge','ギ'=>'gi','ゴ'=>'go','グ'=>'gu', 1617 'マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu', 1618 'ナ'=>'na','ネ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu', 1619 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ロ'=>'ro','ル'=>'ru', 1620 'サ'=>'sa','セ'=>'se','シ'=>'shi','ソ'=>'so','ス'=>'su', 1621 'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu', 1622 'タ'=>'ta','テ'=>'te','チ'=>'chi','ト'=>'to','ツ'=>'tsu', 1623 'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du', 1624 'ワ'=>'wa','ヲ'=>'wo', 1625 'ヤ'=>'ya','ヨ'=>'yo','ユ'=>'yu', 1626 'ヵ'=>'ka','ヶ'=>'ke', 1627 // old characters 1628 'ヱ'=>'we','ヰ'=>'wi', 1629 1630 // convert what's left (probably only kicks in when something's missing above) 1631 'ァ'=>'a','ェ'=>'e','ィ'=>'i','ォ'=>'o','ゥ'=>'u', 1632 'ャ'=>'ya','ョ'=>'yo','ュ'=>'yu', 1633 1634 // special characters 1635 '・'=>'_','、'=>'_', 1636 'ー'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise 1637 1638 // 'ラ'=>'la','レ'=>'le','リ'=>'li','ロ'=>'lo','ル'=>'lu', 1639 // 'チャ'=>'cya','チェ'=>'cye','チィ'=>'cyi','チョ'=>'cyo','チュ'=>'cyu', 1640 //'デャ'=>'dha','デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu', 1641 // 'リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi','リョ'=>'lyo','リュ'=>'lyu', 1642 // 'テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho','テュ'=>'thu', 1643 //'ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi','フォ'=>'fwo','フゥ'=>'fwu', 1644 //'チャ'=>'tya','チェ'=>'tye','チィ'=>'tyi','チョ'=>'tyo','チュ'=>'tyu', 1645 // 'ジャ'=>'jya','ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu', 1646 // 'ジャ'=>'zha','ジェ'=>'zhe','ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu', 1647 //'ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi','ジョ'=>'zyo','ジュ'=>'zyu', 1648 //'シャ'=>'sya','シェ'=>'sye','シィ'=>'syi','ショ'=>'syo','シュ'=>'syu', 1649 //'シ'=>'ci','フ'=>'hu',シ'=>'si','チ'=>'ti','ツ'=>'tu','イ'=>'yi','ヂ'=>'dzi', 1650 1651 // "Greeklish" 1652 'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Π'=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps', 1653 'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','π'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps', 1654 1655 // Thai 1656 'ก'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch', 1657 'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','ญ'=>'y','ฎ'=>'d','ฏ'=>'t','ฐ'=>'th', 1658 'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th', 1659 'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','ฝ'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph', 1660 'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue', 1661 'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h', 1662 'ะ'=>'a','ั'=>'a','รร'=>'a','า'=>'a','ๅ'=>'a','ำ'=>'am','ํา'=>'am', 1663 'ิ'=>'i','ี'=>'i','ึ'=>'ue','ี'=>'ue','ุ'=>'u','ู'=>'u', 1664 'เ'=>'e','แ'=>'ae','โ'=>'o','อ'=>'o', 1665 'ียะ'=>'ia','ีย'=>'ia','ือะ'=>'uea','ือ'=>'uea','ัวะ'=>'ua','ัว'=>'ua', 1666 'ใ'=>'ai','ไ'=>'ai','ัย'=>'ai','าย'=>'ai','าว'=>'ao', 1667 'ุย'=>'ui','อย'=>'oi','ือย'=>'ueai','วย'=>'uai', 1668 'ิว'=>'io','็ว'=>'eo','ียว'=>'iao', 1669 '่'=>'','้'=>'','๊'=>'','๋'=>'','็'=>'', 1670 '์'=>'','๎'=>'','ํ'=>'','ฺ'=>'', 1671 'ๆ'=>'2','๏'=>'o','ฯ'=>'-','๚'=>'-','๛'=>'-', 1672 '๐'=>'0','๑'=>'1','๒'=>'2','๓'=>'3','๔'=>'4', 1673 '๕'=>'5','๖'=>'6','๗'=>'7','๘'=>'8','๙'=>'9', 1674 1675 // Korean 1676 'ㄱ'=>'k','ㅋ'=>'kh','ㄲ'=>'kk','ㄷ'=>'t','ㅌ'=>'th','ㄸ'=>'tt','ㅂ'=>'p', 1677 'ㅍ'=>'ph','ㅃ'=>'pp','ㅈ'=>'c','ㅊ'=>'ch','ㅉ'=>'cc','ㅅ'=>'s','ㅆ'=>'ss', 1678 'ㅎ'=>'h','ㅇ'=>'ng','ㄴ'=>'n','ㄹ'=>'l','ㅁ'=>'m', 'ㅏ'=>'a','ㅓ'=>'e','ㅗ'=>'o', 1679 'ㅜ'=>'wu','ㅡ'=>'u','ㅣ'=>'i','ㅐ'=>'ay','ㅔ'=>'ey','ㅚ'=>'oy','ㅘ'=>'wa','ㅝ'=>'we', 1680 'ㅟ'=>'wi','ㅙ'=>'way','ㅞ'=>'wey','ㅢ'=>'uy','ㅑ'=>'ya','ㅕ'=>'ye','ㅛ'=>'oy', 1681 'ㅠ'=>'yu','ㅒ'=>'yay','ㅖ'=>'yey', 1682); 1683 1684 1685