1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license LGPL 2.1 (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * check for mb_string support 11 */ 12if(!defined('UTF8_MBSTRING')){ 13 if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){ 14 define('UTF8_MBSTRING',1); 15 }else{ 16 define('UTF8_MBSTRING',0); 17 } 18} 19 20/** 21 * Check if PREG was compiled with UTF-8 support 22 * 23 * Without this many of the functions below will not work, so this is a minimal requirement 24 */ 25if(!defined('UTF8_PREGSUPPORT')){ 26 define('UTF8_PREGSUPPORT', (bool) @preg_match('/^.$/u', 'ñ')); 27} 28 29/** 30 * Check if PREG was compiled with Unicode Property support 31 * 32 * This is not required for the functions below, but might be needed in a UTF-8 aware application 33 */ 34if(!defined('UTF8_PROPERTYSUPPORT')){ 35 define('UTF8_PROPERTYSUPPORT', (bool) @preg_match('/^\pL$/u', 'ñ')); 36} 37 38 39if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } 40 41if(!function_exists('utf8_isASCII')){ 42 /** 43 * Checks if a string contains 7bit ASCII only 44 * 45 * @author Andreas Haerter <andreas.haerter@dev.mail-node.com> 46 * 47 * @param string $str 48 * @return bool 49 */ 50 function utf8_isASCII($str){ 51 return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); 52 } 53} 54 55if(!function_exists('utf8_strip')){ 56 /** 57 * Strips all highbyte chars 58 * 59 * Returns a pure ASCII7 string 60 * 61 * @author Andreas Gohr <andi@splitbrain.org> 62 * 63 * @param string $str 64 * @return string 65 */ 66 function utf8_strip($str){ 67 $ascii = ''; 68 $len = strlen($str); 69 for($i=0; $i<$len; $i++){ 70 if(ord($str{$i}) <128){ 71 $ascii .= $str{$i}; 72 } 73 } 74 return $ascii; 75 } 76} 77 78if(!function_exists('utf8_check')){ 79 /** 80 * Tries to detect if a string is in Unicode encoding 81 * 82 * @author <bmorel@ssi.fr> 83 * @link http://php.net/manual/en/function.utf8-encode.php 84 * 85 * @param string $Str 86 * @return bool 87 */ 88 function utf8_check($Str) { 89 $len = strlen($Str); 90 for ($i=0; $i<$len; $i++) { 91 $b = ord($Str[$i]); 92 if ($b < 0x80) continue; # 0bbbbbbb 93 elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb 94 elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb 95 elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb 96 elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb 97 elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b 98 else return false; # Does not match any model 99 100 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 101 if ((++$i == $len) || ((ord($Str[$i]) & 0xC0) != 0x80)) 102 return false; 103 } 104 } 105 return true; 106 } 107} 108 109if(!function_exists('utf8_basename')){ 110 /** 111 * A locale independent basename() implementation 112 * 113 * works around a bug in PHP's basename() implementation 114 * 115 * @see basename() 116 * @link https://bugs.php.net/bug.php?id=37738 117 * 118 * @param string $path A path 119 * @param string $suffix If the name component ends in suffix this will also be cut off 120 * @return string 121 */ 122 function utf8_basename($path, $suffix=''){ 123 $path = trim($path,'\\/'); 124 $rpos = max(strrpos($path, '/'), strrpos($path, '\\')); 125 if($rpos) $path = substr($path, $rpos+1); 126 127 $suflen = strlen($suffix); 128 if($suflen && (substr($path, -$suflen) == $suffix)){ 129 $path = substr($path, 0, -$suflen); 130 } 131 132 return $path; 133 } 134} 135 136if(!function_exists('utf8_strlen')){ 137 /** 138 * Unicode aware replacement for strlen() 139 * 140 * utf8_decode() converts characters that are not in ISO-8859-1 141 * to '?', which, for the purpose of counting, is alright - It's 142 * even faster than mb_strlen. 143 * 144 * @author <chernyshevsky at hotmail dot com> 145 * @see strlen() 146 * @see utf8_decode() 147 * 148 * @param string $string 149 * @return int 150 */ 151 function utf8_strlen($string) { 152 if (function_exists('utf8_decode')) { 153 return strlen(utf8_decode($string)); 154 } elseif (UTF8_MBSTRING) { 155 return mb_strlen($string, 'UTF-8'); 156 } elseif (function_exists('iconv_strlen')) { 157 return iconv_strlen($string, 'UTF-8'); 158 } else { 159 return strlen($string); 160 } 161 } 162} 163 164if(!function_exists('utf8_substr')){ 165 /** 166 * UTF-8 aware alternative to substr 167 * 168 * Return part of a string given character offset (and optionally length) 169 * 170 * @author Harry Fuecks <hfuecks@gmail.com> 171 * @author Chris Smith <chris@jalakai.co.uk> 172 * 173 * @param string $str 174 * @param int $offset number of UTF-8 characters offset (from left) 175 * @param int $length (optional) length in UTF-8 characters from offset 176 * @return string 177 */ 178 function utf8_substr($str, $offset, $length = null) { 179 if(UTF8_MBSTRING){ 180 if( $length === null ){ 181 return mb_substr($str, $offset); 182 }else{ 183 return mb_substr($str, $offset, $length); 184 } 185 } 186 187 /* 188 * Notes: 189 * 190 * no mb string support, so we'll use pcre regex's with 'u' flag 191 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 192 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 193 * 194 * substr documentation states false can be returned in some cases (e.g. offset > string length) 195 * mb_substr never returns false, it will return an empty string instead. 196 * 197 * calculating the number of characters in the string is a relatively expensive operation, so 198 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 199 */ 200 201 // cast parameters to appropriate types to avoid multiple notices/warnings 202 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 203 $offset = (int)$offset; 204 if (!is_null($length)) $length = (int)$length; 205 206 // handle trivial cases 207 if ($length === 0) return ''; 208 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 209 210 $offset_pattern = ''; 211 $length_pattern = ''; 212 213 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 214 if ($offset < 0) { 215 $strlen = utf8_strlen($str); // see notes 216 $offset = $strlen + $offset; 217 if ($offset < 0) $offset = 0; 218 } 219 220 // establish a pattern for offset, a non-captured group equal in length to offset 221 if ($offset > 0) { 222 $Ox = (int)($offset/65535); 223 $Oy = $offset%65535; 224 225 if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}'; 226 $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})'; 227 } else { 228 $offset_pattern = '^'; // offset == 0; just anchor the pattern 229 } 230 231 // establish a pattern for length 232 if (is_null($length)) { 233 $length_pattern = '(.*)$'; // the rest of the string 234 } else { 235 236 if (!isset($strlen)) $strlen = utf8_strlen($str); // see notes 237 if ($offset > $strlen) return ''; // another trivial case 238 239 if ($length > 0) { 240 241 $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string 242 243 $Lx = (int)($length/65535); 244 $Ly = $length%65535; 245 246 // +ve length requires ... a captured group of length characters 247 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 248 $length_pattern = '('.$length_pattern.'.{'.$Ly.'})'; 249 250 } else if ($length < 0) { 251 252 if ($length < ($offset - $strlen)) return ''; 253 254 $Lx = (int)((-$length)/65535); 255 $Ly = (-$length)%65535; 256 257 // -ve length requires ... capture everything except a group of -length characters 258 // anchored at the tail-end of the string 259 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 260 $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$'; 261 } 262 } 263 264 if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return ''; 265 return $match[1]; 266 } 267} 268 269if(!function_exists('utf8_substr_replace')){ 270 /** 271 * Unicode aware replacement for substr_replace() 272 * 273 * @author Andreas Gohr <andi@splitbrain.org> 274 * @see substr_replace() 275 * 276 * @param string $string input string 277 * @param string $replacement the replacement 278 * @param int $start the replacing will begin at the start'th offset into string. 279 * @param int $length If given and is positive, it represents the length of the portion of string which is 280 * to be replaced. If length is zero then this function will have the effect of inserting 281 * replacement into string at the given start offset. 282 * @return string 283 */ 284 function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 285 $ret = ''; 286 if($start>0) $ret .= utf8_substr($string, 0, $start); 287 $ret .= $replacement; 288 $ret .= utf8_substr($string, $start+$length); 289 return $ret; 290 } 291} 292 293if(!function_exists('utf8_ltrim')){ 294 /** 295 * Unicode aware replacement for ltrim() 296 * 297 * @author Andreas Gohr <andi@splitbrain.org> 298 * @see ltrim() 299 * 300 * @param string $str 301 * @param string $charlist 302 * @return string 303 */ 304 function utf8_ltrim($str,$charlist=''){ 305 if($charlist == '') return ltrim($str); 306 307 //quote charlist for use in a characterclass 308 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 309 310 return preg_replace('/^['.$charlist.']+/u','',$str); 311 } 312} 313 314if(!function_exists('utf8_rtrim')){ 315 /** 316 * Unicode aware replacement for rtrim() 317 * 318 * @author Andreas Gohr <andi@splitbrain.org> 319 * @see rtrim() 320 * 321 * @param string $str 322 * @param string $charlist 323 * @return string 324 */ 325 function utf8_rtrim($str,$charlist=''){ 326 if($charlist == '') return rtrim($str); 327 328 //quote charlist for use in a characterclass 329 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 330 331 return preg_replace('/['.$charlist.']+$/u','',$str); 332 } 333} 334 335if(!function_exists('utf8_trim')){ 336 /** 337 * Unicode aware replacement for trim() 338 * 339 * @author Andreas Gohr <andi@splitbrain.org> 340 * @see trim() 341 * 342 * @param string $str 343 * @param string $charlist 344 * @return string 345 */ 346 function utf8_trim($str,$charlist='') { 347 if($charlist == '') return trim($str); 348 349 return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist); 350 } 351} 352 353if(!function_exists('utf8_strtolower')){ 354 /** 355 * This is a unicode aware replacement for strtolower() 356 * 357 * Uses mb_string extension if available 358 * 359 * @author Leo Feyer <leo@typolight.org> 360 * @see strtolower() 361 * @see utf8_strtoupper() 362 * 363 * @param string $string 364 * @return string 365 */ 366 function utf8_strtolower($string){ 367 if(UTF8_MBSTRING) { 368 if (class_exists("Normalizer", $autoload = false)) 369 return normalizer::normalize(mb_strtolower($string,'utf-8')); 370 else 371 return (mb_strtolower($string,'utf-8')); 372 } 373 global $UTF8_UPPER_TO_LOWER; 374 return strtr($string,$UTF8_UPPER_TO_LOWER); 375 } 376} 377 378if(!function_exists('utf8_strtoupper')){ 379 /** 380 * This is a unicode aware replacement for strtoupper() 381 * 382 * Uses mb_string extension if available 383 * 384 * @author Leo Feyer <leo@typolight.org> 385 * @see strtoupper() 386 * @see utf8_strtoupper() 387 * 388 * @param string $string 389 * @return string 390 */ 391 function utf8_strtoupper($string){ 392 if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); 393 394 global $UTF8_LOWER_TO_UPPER; 395 return strtr($string,$UTF8_LOWER_TO_UPPER); 396 } 397} 398 399if(!function_exists('utf8_ucfirst')){ 400 /** 401 * UTF-8 aware alternative to ucfirst 402 * Make a string's first character uppercase 403 * 404 * @author Harry Fuecks 405 * 406 * @param string $str 407 * @return string with first character as upper case (if applicable) 408 */ 409 function utf8_ucfirst($str){ 410 switch ( utf8_strlen($str) ) { 411 case 0: 412 return ''; 413 case 1: 414 return utf8_strtoupper($str); 415 default: 416 preg_match('/^(.{1})(.*)$/us', $str, $matches); 417 return utf8_strtoupper($matches[1]).$matches[2]; 418 } 419 } 420} 421 422if(!function_exists('utf8_ucwords')){ 423 /** 424 * UTF-8 aware alternative to ucwords 425 * Uppercase the first character of each word in a string 426 * 427 * @author Harry Fuecks 428 * @see http://php.net/ucwords 429 * 430 * @param string $str 431 * @return string with first char of each word uppercase 432 */ 433 function utf8_ucwords($str) { 434 // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; 435 // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns 436 // This corresponds to the definition of a "word" defined at http://php.net/ucwords 437 $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; 438 439 return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str); 440 } 441 442 /** 443 * Callback function for preg_replace_callback call in utf8_ucwords 444 * You don't need to call this yourself 445 * 446 * @author Harry Fuecks 447 * @see utf8_ucwords 448 * @see utf8_strtoupper 449 * 450 * @param array $matches matches corresponding to a single word 451 * @return string with first char of the word in uppercase 452 */ 453 function utf8_ucwords_callback($matches) { 454 $leadingws = $matches[2]; 455 $ucfirst = utf8_strtoupper($matches[3]); 456 $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1); 457 return $leadingws . $ucword; 458 } 459} 460 461if(!function_exists('utf8_deaccent')){ 462 /** 463 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 464 * 465 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 466 * letters. Default is to deaccent both cases ($case = 0) 467 * 468 * @author Andreas Gohr <andi@splitbrain.org> 469 * 470 * @param string $string 471 * @param int $case 472 * @return string 473 */ 474 function utf8_deaccent($string,$case=0){ 475 if($case <= 0){ 476 global $UTF8_LOWER_ACCENTS; 477 $string = strtr($string,$UTF8_LOWER_ACCENTS); 478 } 479 if($case >= 0){ 480 global $UTF8_UPPER_ACCENTS; 481 $string = strtr($string,$UTF8_UPPER_ACCENTS); 482 } 483 return $string; 484 } 485} 486 487if(!function_exists('utf8_romanize')){ 488 /** 489 * Romanize a non-latin string 490 * 491 * @author Andreas Gohr <andi@splitbrain.org> 492 * 493 * @param string $string 494 * @return string 495 */ 496 function utf8_romanize($string){ 497 if(utf8_isASCII($string)) return $string; //nothing to do 498 499 global $UTF8_ROMANIZATION; 500 return strtr($string,$UTF8_ROMANIZATION); 501 } 502} 503 504if(!function_exists('utf8_stripspecials')){ 505 /** 506 * Removes special characters (nonalphanumeric) from a UTF-8 string 507 * 508 * This function adds the controlchars 0x00 to 0x19 to the array of 509 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 510 * 511 * @author Andreas Gohr <andi@splitbrain.org> 512 * 513 * @param string $string The UTF8 string to strip of special chars 514 * @param string $repl Replace special with this string 515 * @param string $additional Additional chars to strip (used in regexp char class) 516 * @return string 517 */ 518 function utf8_stripspecials($string,$repl='',$additional=''){ 519 global $UTF8_SPECIAL_CHARS2; 520 521 static $specials = null; 522 if(is_null($specials)){ 523 #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 524 $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/'); 525 } 526 527 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 528 } 529} 530 531if(!function_exists('utf8_strpos')){ 532 /** 533 * This is an Unicode aware replacement for strpos 534 * 535 * @author Leo Feyer <leo@typolight.org> 536 * @see strpos() 537 * 538 * @param string $haystack 539 * @param string $needle 540 * @param integer $offset 541 * @return integer 542 */ 543 function utf8_strpos($haystack, $needle, $offset=0){ 544 $comp = 0; 545 $length = null; 546 547 while (is_null($length) || $length < $offset) { 548 $pos = strpos($haystack, $needle, $offset + $comp); 549 550 if ($pos === false) 551 return false; 552 553 $length = utf8_strlen(substr($haystack, 0, $pos)); 554 555 if ($length < $offset) 556 $comp = $pos - $length; 557 } 558 559 return $length; 560 } 561} 562 563if(!function_exists('utf8_tohtml')){ 564 /** 565 * Encodes UTF-8 characters to HTML entities 566 * 567 * @author Tom N Harris <tnharris@whoopdedo.org> 568 * @author <vpribish at shopping dot com> 569 * @link http://php.net/manual/en/function.utf8-decode.php 570 * 571 * @param string $str 572 * @param bool $all Encode non-utf8 char to HTML as well 573 * @return string 574 */ 575 function utf8_tohtml($str, $all = false) { 576 $ret = ''; 577 foreach (utf8_to_unicode($str) as $cp) { 578 if ($cp < 0x80 && !$all) 579 $ret .= chr($cp); 580 elseif ($cp < 0x100) 581 $ret .= "&#$cp;"; 582 else 583 $ret .= '&#x'.dechex($cp).';'; 584 } 585 return $ret; 586 } 587} 588 589if(!function_exists('utf8_unhtml')){ 590 /** 591 * Decodes HTML entities to UTF-8 characters 592 * 593 * Convert any &#..; entity to a codepoint, 594 * The entities flag defaults to only decoding numeric entities. 595 * Pass HTML_ENTITIES and named entities, including & < etc. 596 * are handled as well. Avoids the problem that would occur if you 597 * had to decode "&#38;&amp;#38;" 598 * 599 * unhtmlspecialchars(utf8_unhtml($s)) -> "&&" 600 * utf8_unhtml(unhtmlspecialchars($s)) -> "&&#38;" 601 * what it should be -> "&&#38;" 602 * 603 * @author Tom N Harris <tnharris@whoopdedo.org> 604 * 605 * @param string $str UTF-8 encoded string 606 * @param boolean $entities Flag controlling decoding of named entities. 607 * @return string UTF-8 encoded string with numeric (and named) entities replaced. 608 */ 609 function utf8_unhtml($str, $entities=null) { 610 static $decoder = null; 611 if (is_null($decoder)) 612 $decoder = new utf8_entity_decoder(); 613 if (is_null($entities)) 614 return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m', 615 'utf8_decode_numeric', $str); 616 else 617 return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m', 618 array(&$decoder, 'decode'), $str); 619 } 620} 621 622if(!function_exists('utf8_decode_numeric')){ 623 /** 624 * Decodes numeric HTML entities to their correct UTF-8 characters 625 * 626 * @param $ent string A numeric entity 627 * @return string|false 628 */ 629 function utf8_decode_numeric($ent) { 630 switch ($ent[2]) { 631 case 'X': 632 case 'x': 633 $cp = hexdec($ent[3]); 634 break; 635 default: 636 $cp = intval($ent[3]); 637 break; 638 } 639 return unicode_to_utf8(array($cp)); 640 } 641} 642 643if(!class_exists('utf8_entity_decoder')){ 644 /** 645 * Encapsulate HTML entity decoding tables 646 */ 647 class utf8_entity_decoder { 648 protected $table; 649 650 /** 651 * Initializes the decoding tables 652 */ 653 function __construct() { 654 $table = get_html_translation_table(HTML_ENTITIES); 655 $table = array_flip($table); 656 $this->table = array_map(array(&$this,'makeutf8'), $table); 657 } 658 659 /** 660 * Wrapper around unicode_to_utf8() 661 * 662 * @param string $c 663 * @return string|false 664 */ 665 function makeutf8($c) { 666 return unicode_to_utf8(array(ord($c))); 667 } 668 669 /** 670 * Decodes any HTML entity to it's correct UTF-8 char equivalent 671 * 672 * @param string $ent An entity 673 * @return string|false 674 */ 675 function decode($ent) { 676 if ($ent[1] == '#') { 677 return utf8_decode_numeric($ent); 678 } elseif (array_key_exists($ent[0],$this->table)) { 679 return $this->table[$ent[0]]; 680 } else { 681 return $ent[0]; 682 } 683 } 684 } 685} 686 687if(!function_exists('utf8_to_unicode')){ 688 /** 689 * Takes an UTF-8 string and returns an array of ints representing the 690 * Unicode characters. Astral planes are supported ie. the ints in the 691 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 692 * are not allowed. 693 * 694 * If $strict is set to true the function returns false if the input 695 * string isn't a valid UTF-8 octet sequence and raises a PHP error at 696 * level E_USER_WARNING 697 * 698 * Note: this function has been modified slightly in this library to 699 * trigger errors on encountering bad bytes 700 * 701 * @author <hsivonen@iki.fi> 702 * @author Harry Fuecks <hfuecks@gmail.com> 703 * @see unicode_to_utf8 704 * @link http://hsivonen.iki.fi/php-utf8/ 705 * @link http://sourceforge.net/projects/phputf8/ 706 * 707 * @param string $str UTF-8 encoded string 708 * @param boolean $strict Check for invalid sequences? 709 * @return mixed array of unicode code points or false if UTF-8 invalid 710 */ 711 function utf8_to_unicode($str,$strict=false) { 712 $mState = 0; // cached expected number of octets after the current octet 713 // until the beginning of the next UTF8 character sequence 714 $mUcs4 = 0; // cached Unicode character 715 $mBytes = 1; // cached expected number of octets in the current sequence 716 717 $out = array(); 718 719 $len = strlen($str); 720 721 for($i = 0; $i < $len; $i++) { 722 723 $in = ord($str{$i}); 724 725 if ( $mState == 0) { 726 727 // When mState is zero we expect either a US-ASCII character or a 728 // multi-octet sequence. 729 if (0 == (0x80 & ($in))) { 730 // US-ASCII, pass straight through. 731 $out[] = $in; 732 $mBytes = 1; 733 734 } else if (0xC0 == (0xE0 & ($in))) { 735 // First octet of 2 octet sequence 736 $mUcs4 = ($in); 737 $mUcs4 = ($mUcs4 & 0x1F) << 6; 738 $mState = 1; 739 $mBytes = 2; 740 741 } else if (0xE0 == (0xF0 & ($in))) { 742 // First octet of 3 octet sequence 743 $mUcs4 = ($in); 744 $mUcs4 = ($mUcs4 & 0x0F) << 12; 745 $mState = 2; 746 $mBytes = 3; 747 748 } else if (0xF0 == (0xF8 & ($in))) { 749 // First octet of 4 octet sequence 750 $mUcs4 = ($in); 751 $mUcs4 = ($mUcs4 & 0x07) << 18; 752 $mState = 3; 753 $mBytes = 4; 754 755 } else if (0xF8 == (0xFC & ($in))) { 756 /* First octet of 5 octet sequence. 757 * 758 * This is illegal because the encoded codepoint must be either 759 * (a) not the shortest form or 760 * (b) outside the Unicode range of 0-0x10FFFF. 761 * Rather than trying to resynchronize, we will carry on until the end 762 * of the sequence and let the later error handling code catch it. 763 */ 764 $mUcs4 = ($in); 765 $mUcs4 = ($mUcs4 & 0x03) << 24; 766 $mState = 4; 767 $mBytes = 5; 768 769 } else if (0xFC == (0xFE & ($in))) { 770 // First octet of 6 octet sequence, see comments for 5 octet sequence. 771 $mUcs4 = ($in); 772 $mUcs4 = ($mUcs4 & 1) << 30; 773 $mState = 5; 774 $mBytes = 6; 775 776 } elseif($strict) { 777 /* Current octet is neither in the US-ASCII range nor a legal first 778 * octet of a multi-octet sequence. 779 */ 780 trigger_error( 781 'utf8_to_unicode: Illegal sequence identifier '. 782 'in UTF-8 at byte '.$i, 783 E_USER_WARNING 784 ); 785 return false; 786 787 } 788 789 } else { 790 791 // When mState is non-zero, we expect a continuation of the multi-octet 792 // sequence 793 if (0x80 == (0xC0 & ($in))) { 794 795 // Legal continuation. 796 $shift = ($mState - 1) * 6; 797 $tmp = $in; 798 $tmp = ($tmp & 0x0000003F) << $shift; 799 $mUcs4 |= $tmp; 800 801 /** 802 * End of the multi-octet sequence. mUcs4 now contains the final 803 * Unicode codepoint to be output 804 */ 805 if (0 == --$mState) { 806 807 /* 808 * Check for illegal sequences and codepoints. 809 */ 810 // From Unicode 3.1, non-shortest form is illegal 811 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 812 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 813 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 814 (4 < $mBytes) || 815 // From Unicode 3.2, surrogate characters are illegal 816 (($mUcs4 & 0xFFFFF800) == 0xD800) || 817 // Codepoints outside the Unicode range are illegal 818 ($mUcs4 > 0x10FFFF)) { 819 820 if($strict){ 821 trigger_error( 822 'utf8_to_unicode: Illegal sequence or codepoint '. 823 'in UTF-8 at byte '.$i, 824 E_USER_WARNING 825 ); 826 827 return false; 828 } 829 830 } 831 832 if (0xFEFF != $mUcs4) { 833 // BOM is legal but we don't want to output it 834 $out[] = $mUcs4; 835 } 836 837 //initialize UTF8 cache 838 $mState = 0; 839 $mUcs4 = 0; 840 $mBytes = 1; 841 } 842 843 } elseif($strict) { 844 /** 845 *((0xC0 & (*in) != 0x80) && (mState != 0)) 846 * Incomplete multi-octet sequence. 847 */ 848 trigger_error( 849 'utf8_to_unicode: Incomplete multi-octet '. 850 ' sequence in UTF-8 at byte '.$i, 851 E_USER_WARNING 852 ); 853 854 return false; 855 } 856 } 857 } 858 return $out; 859 } 860} 861 862if(!function_exists('unicode_to_utf8')){ 863 /** 864 * Takes an array of ints representing the Unicode characters and returns 865 * a UTF-8 string. Astral planes are supported ie. the ints in the 866 * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 867 * are not allowed. 868 * 869 * If $strict is set to true the function returns false if the input 870 * array contains ints that represent surrogates or are outside the 871 * Unicode range and raises a PHP error at level E_USER_WARNING 872 * 873 * Note: this function has been modified slightly in this library to use 874 * output buffering to concatenate the UTF-8 string (faster) as well as 875 * reference the array by it's keys 876 * 877 * @param array $arr of unicode code points representing a string 878 * @param boolean $strict Check for invalid sequences? 879 * @return string|false UTF-8 string or false if array contains invalid code points 880 * 881 * @author <hsivonen@iki.fi> 882 * @author Harry Fuecks <hfuecks@gmail.com> 883 * @see utf8_to_unicode 884 * @link http://hsivonen.iki.fi/php-utf8/ 885 * @link http://sourceforge.net/projects/phputf8/ 886 */ 887 function unicode_to_utf8($arr,$strict=false) { 888 if (!is_array($arr)) return ''; 889 ob_start(); 890 891 foreach (array_keys($arr) as $k) { 892 893 if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { 894 # ASCII range (including control chars) 895 896 echo chr($arr[$k]); 897 898 } else if ($arr[$k] <= 0x07ff) { 899 # 2 byte sequence 900 901 echo chr(0xc0 | ($arr[$k] >> 6)); 902 echo chr(0x80 | ($arr[$k] & 0x003f)); 903 904 } else if($arr[$k] == 0xFEFF) { 905 # Byte order mark (skip) 906 907 // nop -- zap the BOM 908 909 } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { 910 # Test for illegal surrogates 911 912 // found a surrogate 913 if($strict){ 914 trigger_error( 915 'unicode_to_utf8: Illegal surrogate '. 916 'at index: '.$k.', value: '.$arr[$k], 917 E_USER_WARNING 918 ); 919 return false; 920 } 921 922 } else if ($arr[$k] <= 0xffff) { 923 # 3 byte sequence 924 925 echo chr(0xe0 | ($arr[$k] >> 12)); 926 echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); 927 echo chr(0x80 | ($arr[$k] & 0x003f)); 928 929 } else if ($arr[$k] <= 0x10ffff) { 930 # 4 byte sequence 931 932 echo chr(0xf0 | ($arr[$k] >> 18)); 933 echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); 934 echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); 935 echo chr(0x80 | ($arr[$k] & 0x3f)); 936 937 } elseif($strict) { 938 939 trigger_error( 940 'unicode_to_utf8: Codepoint out of Unicode range '. 941 'at index: '.$k.', value: '.$arr[$k], 942 E_USER_WARNING 943 ); 944 945 // out of range 946 return false; 947 } 948 } 949 950 $result = ob_get_contents(); 951 ob_end_clean(); 952 return $result; 953 } 954} 955 956if(!function_exists('utf8_to_utf16be')){ 957 /** 958 * UTF-8 to UTF-16BE conversion. 959 * 960 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 961 * 962 * @param string $str 963 * @param bool $bom 964 * @return string 965 */ 966 function utf8_to_utf16be(&$str, $bom = false) { 967 $out = $bom ? "\xFE\xFF" : ''; 968 if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 969 970 $uni = utf8_to_unicode($str); 971 foreach($uni as $cp){ 972 $out .= pack('n',$cp); 973 } 974 return $out; 975 } 976} 977 978if(!function_exists('utf16be_to_utf8')){ 979 /** 980 * UTF-8 to UTF-16BE conversion. 981 * 982 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 983 * 984 * @param string $str 985 * @return false|string 986 */ 987 function utf16be_to_utf8(&$str) { 988 $uni = unpack('n*',$str); 989 return unicode_to_utf8($uni); 990 } 991} 992 993if(!function_exists('utf8_bad_replace')){ 994 /** 995 * Replace bad bytes with an alternative character 996 * 997 * ASCII character is recommended for replacement char 998 * 999 * PCRE Pattern to locate bad bytes in a UTF-8 string 1000 * Comes from W3 FAQ: Multilingual Forms 1001 * Note: modified to include full ASCII range including control chars 1002 * 1003 * @author Harry Fuecks <hfuecks@gmail.com> 1004 * @see http://www.w3.org/International/questions/qa-forms-utf-8 1005 * 1006 * @param string $str to search 1007 * @param string $replace to replace bad bytes with (defaults to '?') - use ASCII 1008 * @return string 1009 */ 1010 function utf8_bad_replace($str, $replace = '') { 1011 $UTF8_BAD = 1012 '([\x00-\x7F]'. # ASCII (including control chars) 1013 '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte 1014 '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs 1015 '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte 1016 '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates 1017 '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 1018 '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 1019 '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 1020 '|(.{1}))'; # invalid byte 1021 ob_start(); 1022 while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { 1023 if ( !isset($matches[2])) { 1024 echo $matches[0]; 1025 } else { 1026 echo $replace; 1027 } 1028 $str = substr($str,strlen($matches[0])); 1029 } 1030 $result = ob_get_contents(); 1031 ob_end_clean(); 1032 return $result; 1033 } 1034} 1035 1036if(!function_exists('utf8_correctIdx')){ 1037 /** 1038 * adjust a byte index into a utf8 string to a utf8 character boundary 1039 * 1040 * @param string $str utf8 character string 1041 * @param int $i byte index into $str 1042 * @param $next bool direction to search for boundary, 1043 * false = up (current character) 1044 * true = down (next character) 1045 * 1046 * @return int byte index into $str now pointing to a utf8 character boundary 1047 * 1048 * @author chris smith <chris@jalakai.co.uk> 1049 */ 1050 function utf8_correctIdx(&$str,$i,$next=false) { 1051 1052 if ($i <= 0) return 0; 1053 1054 $limit = strlen($str); 1055 if ($i>=$limit) return $limit; 1056 1057 if ($next) { 1058 while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; 1059 } else { 1060 while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; 1061 } 1062 1063 return $i; 1064 } 1065} 1066 1067// only needed if no mb_string available 1068if(!UTF8_MBSTRING){ 1069 /** 1070 * UTF-8 Case lookup table 1071 * 1072 * This lookuptable defines the upper case letters to their correspponding 1073 * lower case letter in UTF-8 1074 * 1075 * @author Andreas Gohr <andi@splitbrain.org> 1076 */ 1077 global $UTF8_LOWER_TO_UPPER; 1078 if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array( 1079 "z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T","s"=>"S","r"=>"R","q"=>"Q", 1080 "p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J","i"=>"I","h"=>"H","g"=>"G", 1081 "f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A","ῳ"=>"ῼ","ῥ"=>"Ῥ","ῡ"=>"Ῡ","ῑ"=>"Ῑ", 1082 "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ", 1083 "ᾣ"=>"ᾫ","ᾢ"=>"ᾪ","ᾡ"=>"ᾩ","ᾗ"=>"ᾟ","ᾖ"=>"ᾞ","ᾕ"=>"ᾝ","ᾔ"=>"ᾜ","ᾓ"=>"ᾛ","ᾒ"=>"ᾚ","ᾑ"=>"ᾙ", 1084 "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ", 1085 "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ", 1086 "ὲ"=>"Ὲ","ά"=>"Ά","ὰ"=>"Ὰ","ὧ"=>"Ὧ","ὦ"=>"Ὦ","ὥ"=>"Ὥ","ὤ"=>"Ὤ","ὣ"=>"Ὣ","ὢ"=>"Ὢ","ὡ"=>"Ὡ", 1087 "ὗ"=>"Ὗ","ὕ"=>"Ὕ","ὓ"=>"Ὓ","ὑ"=>"Ὑ","ὅ"=>"Ὅ","ὄ"=>"Ὄ","ὃ"=>"Ὃ","ὂ"=>"Ὂ","ὁ"=>"Ὁ","ὀ"=>"Ὀ", 1088 "ἷ"=>"Ἷ","ἶ"=>"Ἶ","ἵ"=>"Ἵ","ἴ"=>"Ἴ","ἳ"=>"Ἳ","ἲ"=>"Ἲ","ἱ"=>"Ἱ","ἰ"=>"Ἰ","ἧ"=>"Ἧ","ἦ"=>"Ἦ", 1089 "ἥ"=>"Ἥ","ἤ"=>"Ἤ","ἣ"=>"Ἣ","ἢ"=>"Ἢ","ἡ"=>"Ἡ","ἕ"=>"Ἕ","ἔ"=>"Ἔ","ἓ"=>"Ἓ","ἒ"=>"Ἒ","ἑ"=>"Ἑ", 1090 "ἐ"=>"Ἐ","ἇ"=>"Ἇ","ἆ"=>"Ἆ","ἅ"=>"Ἅ","ἄ"=>"Ἄ","ἃ"=>"Ἃ","ἂ"=>"Ἂ","ἁ"=>"Ἁ","ἀ"=>"Ἀ","ỹ"=>"Ỹ", 1091 "ỷ"=>"Ỷ","ỵ"=>"Ỵ","ỳ"=>"Ỳ","ự"=>"Ự","ữ"=>"Ữ","ử"=>"Ử","ừ"=>"Ừ","ứ"=>"Ứ","ủ"=>"Ủ","ụ"=>"Ụ", 1092 "ợ"=>"Ợ","ỡ"=>"Ỡ","ở"=>"Ở","ờ"=>"Ờ","ớ"=>"Ớ","ộ"=>"Ộ","ỗ"=>"Ỗ","ổ"=>"Ổ","ồ"=>"Ồ","ố"=>"Ố", 1093 "ỏ"=>"Ỏ","ọ"=>"Ọ","ị"=>"Ị","ỉ"=>"Ỉ","ệ"=>"Ệ","ễ"=>"Ễ","ể"=>"Ể","ề"=>"Ề","ế"=>"Ế","ẽ"=>"Ẽ", 1094 "ẻ"=>"Ẻ","ẹ"=>"Ẹ","ặ"=>"Ặ","ẵ"=>"Ẵ","ẳ"=>"Ẳ","ằ"=>"Ằ","ắ"=>"Ắ","ậ"=>"Ậ","ẫ"=>"Ẫ","ẩ"=>"Ẩ", 1095 "ầ"=>"Ầ","ấ"=>"Ấ","ả"=>"Ả","ạ"=>"Ạ","ẛ"=>"Ṡ","ẕ"=>"Ẕ","ẓ"=>"Ẓ","ẑ"=>"Ẑ","ẏ"=>"Ẏ","ẍ"=>"Ẍ", 1096 "ẋ"=>"Ẋ","ẉ"=>"Ẉ","ẇ"=>"Ẇ","ẅ"=>"Ẅ","ẃ"=>"Ẃ","ẁ"=>"Ẁ","ṿ"=>"Ṿ","ṽ"=>"Ṽ","ṻ"=>"Ṻ","ṹ"=>"Ṹ", 1097 "ṷ"=>"Ṷ","ṵ"=>"Ṵ","ṳ"=>"Ṳ","ṱ"=>"Ṱ","ṯ"=>"Ṯ","ṭ"=>"Ṭ","ṫ"=>"Ṫ","ṩ"=>"Ṩ","ṧ"=>"Ṧ","ṥ"=>"Ṥ", 1098 "ṣ"=>"Ṣ","ṡ"=>"Ṡ","ṟ"=>"Ṟ","ṝ"=>"Ṝ","ṛ"=>"Ṛ","ṙ"=>"Ṙ","ṗ"=>"Ṗ","ṕ"=>"Ṕ","ṓ"=>"Ṓ","ṑ"=>"Ṑ", 1099 "ṏ"=>"Ṏ","ṍ"=>"Ṍ","ṋ"=>"Ṋ","ṉ"=>"Ṉ","ṇ"=>"Ṇ","ṅ"=>"Ṅ","ṃ"=>"Ṃ","ṁ"=>"Ṁ","ḿ"=>"Ḿ","ḽ"=>"Ḽ", 1100 "ḻ"=>"Ḻ","ḹ"=>"Ḹ","ḷ"=>"Ḷ","ḵ"=>"Ḵ","ḳ"=>"Ḳ","ḱ"=>"Ḱ","ḯ"=>"Ḯ","ḭ"=>"Ḭ","ḫ"=>"Ḫ","ḩ"=>"Ḩ", 1101 "ḧ"=>"Ḧ","ḥ"=>"Ḥ","ḣ"=>"Ḣ","ḡ"=>"Ḡ","ḟ"=>"Ḟ","ḝ"=>"Ḝ","ḛ"=>"Ḛ","ḙ"=>"Ḙ","ḗ"=>"Ḗ","ḕ"=>"Ḕ", 1102 "ḓ"=>"Ḓ","ḑ"=>"Ḑ","ḏ"=>"Ḏ","ḍ"=>"Ḍ","ḋ"=>"Ḋ","ḉ"=>"Ḉ","ḇ"=>"Ḇ","ḅ"=>"Ḅ","ḃ"=>"Ḃ","ḁ"=>"Ḁ", 1103 "ֆ"=>"Ֆ","օ"=>"Օ","ք"=>"Ք","փ"=>"Փ","ւ"=>"Ւ","ց"=>"Ց","ր"=>"Ր","տ"=>"Տ","վ"=>"Վ","ս"=>"Ս", 1104 "ռ"=>"Ռ","ջ"=>"Ջ","պ"=>"Պ","չ"=>"Չ","ո"=>"Ո","շ"=>"Շ","ն"=>"Ն","յ"=>"Յ","մ"=>"Մ","ճ"=>"Ճ", 1105 "ղ"=>"Ղ","ձ"=>"Ձ","հ"=>"Հ","կ"=>"Կ","ծ"=>"Ծ","խ"=>"Խ","լ"=>"Լ","ի"=>"Ի","ժ"=>"Ժ","թ"=>"Թ", 1106 "ը"=>"Ը","է"=>"Է","զ"=>"Զ","ե"=>"Ե","դ"=>"Դ","գ"=>"Գ","բ"=>"Բ","ա"=>"Ա","ԏ"=>"Ԏ","ԍ"=>"Ԍ", 1107 "ԋ"=>"Ԋ","ԉ"=>"Ԉ","ԇ"=>"Ԇ","ԅ"=>"Ԅ","ԃ"=>"Ԃ","ԁ"=>"Ԁ","ӹ"=>"Ӹ","ӵ"=>"Ӵ","ӳ"=>"Ӳ","ӱ"=>"Ӱ", 1108 "ӯ"=>"Ӯ","ӭ"=>"Ӭ","ӫ"=>"Ӫ","ө"=>"Ө","ӧ"=>"Ӧ","ӥ"=>"Ӥ","ӣ"=>"Ӣ","ӡ"=>"Ӡ","ӟ"=>"Ӟ","ӝ"=>"Ӝ", 1109 "ӛ"=>"Ӛ","ә"=>"Ә","ӗ"=>"Ӗ","ӕ"=>"Ӕ","ӓ"=>"Ӓ","ӑ"=>"Ӑ","ӎ"=>"Ӎ","ӌ"=>"Ӌ","ӊ"=>"Ӊ","ӈ"=>"Ӈ", 1110 "ӆ"=>"Ӆ","ӄ"=>"Ӄ","ӂ"=>"Ӂ","ҿ"=>"Ҿ","ҽ"=>"Ҽ","һ"=>"Һ","ҹ"=>"Ҹ","ҷ"=>"Ҷ","ҵ"=>"Ҵ","ҳ"=>"Ҳ", 1111 "ұ"=>"Ұ","ү"=>"Ү","ҭ"=>"Ҭ","ҫ"=>"Ҫ","ҩ"=>"Ҩ","ҧ"=>"Ҧ","ҥ"=>"Ҥ","ң"=>"Ң","ҡ"=>"Ҡ","ҟ"=>"Ҟ", 1112 "ҝ"=>"Ҝ","қ"=>"Қ","ҙ"=>"Ҙ","җ"=>"Җ","ҕ"=>"Ҕ","ғ"=>"Ғ","ґ"=>"Ґ","ҏ"=>"Ҏ","ҍ"=>"Ҍ","ҋ"=>"Ҋ", 1113 "ҁ"=>"Ҁ","ѿ"=>"Ѿ","ѽ"=>"Ѽ","ѻ"=>"Ѻ","ѹ"=>"Ѹ","ѷ"=>"Ѷ","ѵ"=>"Ѵ","ѳ"=>"Ѳ","ѱ"=>"Ѱ","ѯ"=>"Ѯ", 1114 "ѭ"=>"Ѭ","ѫ"=>"Ѫ","ѩ"=>"Ѩ","ѧ"=>"Ѧ","ѥ"=>"Ѥ","ѣ"=>"Ѣ","ѡ"=>"Ѡ","џ"=>"Џ","ў"=>"Ў","ѝ"=>"Ѝ", 1115 "ќ"=>"Ќ","ћ"=>"Ћ","њ"=>"Њ","љ"=>"Љ","ј"=>"Ј","ї"=>"Ї","і"=>"І","ѕ"=>"Ѕ","є"=>"Є","ѓ"=>"Ѓ", 1116 "ђ"=>"Ђ","ё"=>"Ё","ѐ"=>"Ѐ","я"=>"Я","ю"=>"Ю","э"=>"Э","ь"=>"Ь","ы"=>"Ы","ъ"=>"Ъ","щ"=>"Щ", 1117 "ш"=>"Ш","ч"=>"Ч","ц"=>"Ц","х"=>"Х","ф"=>"Ф","у"=>"У","т"=>"Т","с"=>"С","р"=>"Р","п"=>"П", 1118 "о"=>"О","н"=>"Н","м"=>"М","л"=>"Л","к"=>"К","й"=>"Й","и"=>"И","з"=>"З","ж"=>"Ж","е"=>"Е", 1119 "д"=>"Д","г"=>"Г","в"=>"В","б"=>"Б","а"=>"А","ϵ"=>"Ε","ϲ"=>"Σ","ϱ"=>"Ρ","ϰ"=>"Κ","ϯ"=>"Ϯ", 1120 "ϭ"=>"Ϭ","ϫ"=>"Ϫ","ϩ"=>"Ϩ","ϧ"=>"Ϧ","ϥ"=>"Ϥ","ϣ"=>"Ϣ","ϡ"=>"Ϡ","ϟ"=>"Ϟ","ϝ"=>"Ϝ","ϛ"=>"Ϛ", 1121 "ϙ"=>"Ϙ","ϖ"=>"Π","ϕ"=>"Φ","ϑ"=>"Θ","ϐ"=>"Β","ώ"=>"Ώ","ύ"=>"Ύ","ό"=>"Ό","ϋ"=>"Ϋ","ϊ"=>"Ϊ", 1122 "ω"=>"Ω","ψ"=>"Ψ","χ"=>"Χ","φ"=>"Φ","υ"=>"Υ","τ"=>"Τ","σ"=>"Σ","ς"=>"Σ","ρ"=>"Ρ","π"=>"Π", 1123 "ο"=>"Ο","ξ"=>"Ξ","ν"=>"Ν","μ"=>"Μ","λ"=>"Λ","κ"=>"Κ","ι"=>"Ι","θ"=>"Θ","η"=>"Η","ζ"=>"Ζ", 1124 "ε"=>"Ε","δ"=>"Δ","γ"=>"Γ","β"=>"Β","α"=>"Α","ί"=>"Ί","ή"=>"Ή","έ"=>"Έ","ά"=>"Ά","ʒ"=>"Ʒ", 1125 "ʋ"=>"Ʋ","ʊ"=>"Ʊ","ʈ"=>"Ʈ","ʃ"=>"Ʃ","ʀ"=>"Ʀ","ɵ"=>"Ɵ","ɲ"=>"Ɲ","ɯ"=>"Ɯ","ɩ"=>"Ɩ","ɨ"=>"Ɨ", 1126 "ɣ"=>"Ɣ","ɛ"=>"Ɛ","ə"=>"Ə","ɗ"=>"Ɗ","ɖ"=>"Ɖ","ɔ"=>"Ɔ","ɓ"=>"Ɓ","ȳ"=>"Ȳ","ȱ"=>"Ȱ","ȯ"=>"Ȯ", 1127 "ȭ"=>"Ȭ","ȫ"=>"Ȫ","ȩ"=>"Ȩ","ȧ"=>"Ȧ","ȥ"=>"Ȥ","ȣ"=>"Ȣ","ȟ"=>"Ȟ","ȝ"=>"Ȝ","ț"=>"Ț","ș"=>"Ș", 1128 "ȗ"=>"Ȗ","ȕ"=>"Ȕ","ȓ"=>"Ȓ","ȑ"=>"Ȑ","ȏ"=>"Ȏ","ȍ"=>"Ȍ","ȋ"=>"Ȋ","ȉ"=>"Ȉ","ȇ"=>"Ȇ","ȅ"=>"Ȅ", 1129 "ȃ"=>"Ȃ","ȁ"=>"Ȁ","ǿ"=>"Ǿ","ǽ"=>"Ǽ","ǻ"=>"Ǻ","ǹ"=>"Ǹ","ǵ"=>"Ǵ","dz"=>"Dz","ǯ"=>"Ǯ","ǭ"=>"Ǭ", 1130 "ǫ"=>"Ǫ","ǩ"=>"Ǩ","ǧ"=>"Ǧ","ǥ"=>"Ǥ","ǣ"=>"Ǣ","ǡ"=>"Ǡ","ǟ"=>"Ǟ","ǝ"=>"Ǝ","ǜ"=>"Ǜ","ǚ"=>"Ǚ", 1131 "ǘ"=>"Ǘ","ǖ"=>"Ǖ","ǔ"=>"Ǔ","ǒ"=>"Ǒ","ǐ"=>"Ǐ","ǎ"=>"Ǎ","nj"=>"Nj","lj"=>"Lj","dž"=>"Dž","ƿ"=>"Ƿ", 1132 "ƽ"=>"Ƽ","ƹ"=>"Ƹ","ƶ"=>"Ƶ","ƴ"=>"Ƴ","ư"=>"Ư","ƭ"=>"Ƭ","ƨ"=>"Ƨ","ƥ"=>"Ƥ","ƣ"=>"Ƣ","ơ"=>"Ơ", 1133 "ƞ"=>"Ƞ","ƙ"=>"Ƙ","ƕ"=>"Ƕ","ƒ"=>"Ƒ","ƌ"=>"Ƌ","ƈ"=>"Ƈ","ƅ"=>"Ƅ","ƃ"=>"Ƃ","ſ"=>"S","ž"=>"Ž", 1134 "ż"=>"Ż","ź"=>"Ź","ŷ"=>"Ŷ","ŵ"=>"Ŵ","ų"=>"Ų","ű"=>"Ű","ů"=>"Ů","ŭ"=>"Ŭ","ū"=>"Ū","ũ"=>"Ũ", 1135 "ŧ"=>"Ŧ","ť"=>"Ť","ţ"=>"Ţ","š"=>"Š","ş"=>"Ş","ŝ"=>"Ŝ","ś"=>"Ś","ř"=>"Ř","ŗ"=>"Ŗ","ŕ"=>"Ŕ", 1136 "œ"=>"Œ","ő"=>"Ő","ŏ"=>"Ŏ","ō"=>"Ō","ŋ"=>"Ŋ","ň"=>"Ň","ņ"=>"Ņ","ń"=>"Ń","ł"=>"Ł","ŀ"=>"Ŀ", 1137 "ľ"=>"Ľ","ļ"=>"Ļ","ĺ"=>"Ĺ","ķ"=>"Ķ","ĵ"=>"Ĵ","ij"=>"IJ","ı"=>"I","į"=>"Į","ĭ"=>"Ĭ","ī"=>"Ī", 1138 "ĩ"=>"Ĩ","ħ"=>"Ħ","ĥ"=>"Ĥ","ģ"=>"Ģ","ġ"=>"Ġ","ğ"=>"Ğ","ĝ"=>"Ĝ","ě"=>"Ě","ę"=>"Ę","ė"=>"Ė", 1139 "ĕ"=>"Ĕ","ē"=>"Ē","đ"=>"Đ","ď"=>"Ď","č"=>"Č","ċ"=>"Ċ","ĉ"=>"Ĉ","ć"=>"Ć","ą"=>"Ą","ă"=>"Ă", 1140 "ā"=>"Ā","ÿ"=>"Ÿ","þ"=>"Þ","ý"=>"Ý","ü"=>"Ü","û"=>"Û","ú"=>"Ú","ù"=>"Ù","ø"=>"Ø","ö"=>"Ö", 1141 "õ"=>"Õ","ô"=>"Ô","ó"=>"Ó","ò"=>"Ò","ñ"=>"Ñ","ð"=>"Ð","ï"=>"Ï","î"=>"Î","í"=>"Í","ì"=>"Ì", 1142 "ë"=>"Ë","ê"=>"Ê","é"=>"É","è"=>"È","ç"=>"Ç","æ"=>"Æ","å"=>"Å","ä"=>"Ä","ã"=>"Ã","â"=>"Â", 1143 "á"=>"Á","à"=>"À","µ"=>"Μ","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T", 1144 "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J", 1145 "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A" 1146 ); 1147 1148 /** 1149 * UTF-8 Case lookup table 1150 * 1151 * This lookuptable defines the lower case letters to their corresponding 1152 * upper case letter in UTF-8 1153 * 1154 * @author Andreas Gohr <andi@splitbrain.org> 1155 */ 1156 global $UTF8_UPPER_TO_LOWER; 1157 if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array ( 1158 "Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t","S"=>"s","R"=>"r","Q"=>"q", 1159 "P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j","I"=>"i","H"=>"h","G"=>"g", 1160 "F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a","ῼ"=>"ῳ","Ῥ"=>"ῥ","Ῡ"=>"ῡ","Ῑ"=>"ῑ", 1161 "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ", 1162 "ᾫ"=>"ᾣ","ᾪ"=>"ᾢ","ᾩ"=>"ᾡ","ᾟ"=>"ᾗ","ᾞ"=>"ᾖ","ᾝ"=>"ᾕ","ᾜ"=>"ᾔ","ᾛ"=>"ᾓ","ᾚ"=>"ᾒ","ᾙ"=>"ᾑ", 1163 "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ", 1164 "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ", 1165 "Ὲ"=>"ὲ","Ά"=>"ά","Ὰ"=>"ὰ","Ὧ"=>"ὧ","Ὦ"=>"ὦ","Ὥ"=>"ὥ","Ὤ"=>"ὤ","Ὣ"=>"ὣ","Ὢ"=>"ὢ","Ὡ"=>"ὡ", 1166 "Ὗ"=>"ὗ","Ὕ"=>"ὕ","Ὓ"=>"ὓ","Ὑ"=>"ὑ","Ὅ"=>"ὅ","Ὄ"=>"ὄ","Ὃ"=>"ὃ","Ὂ"=>"ὂ","Ὁ"=>"ὁ","Ὀ"=>"ὀ", 1167 "Ἷ"=>"ἷ","Ἶ"=>"ἶ","Ἵ"=>"ἵ","Ἴ"=>"ἴ","Ἳ"=>"ἳ","Ἲ"=>"ἲ","Ἱ"=>"ἱ","Ἰ"=>"ἰ","Ἧ"=>"ἧ","Ἦ"=>"ἦ", 1168 "Ἥ"=>"ἥ","Ἤ"=>"ἤ","Ἣ"=>"ἣ","Ἢ"=>"ἢ","Ἡ"=>"ἡ","Ἕ"=>"ἕ","Ἔ"=>"ἔ","Ἓ"=>"ἓ","Ἒ"=>"ἒ","Ἑ"=>"ἑ", 1169 "Ἐ"=>"ἐ","Ἇ"=>"ἇ","Ἆ"=>"ἆ","Ἅ"=>"ἅ","Ἄ"=>"ἄ","Ἃ"=>"ἃ","Ἂ"=>"ἂ","Ἁ"=>"ἁ","Ἀ"=>"ἀ","Ỹ"=>"ỹ", 1170 "Ỷ"=>"ỷ","Ỵ"=>"ỵ","Ỳ"=>"ỳ","Ự"=>"ự","Ữ"=>"ữ","Ử"=>"ử","Ừ"=>"ừ","Ứ"=>"ứ","Ủ"=>"ủ","Ụ"=>"ụ", 1171 "Ợ"=>"ợ","Ỡ"=>"ỡ","Ở"=>"ở","Ờ"=>"ờ","Ớ"=>"ớ","Ộ"=>"ộ","Ỗ"=>"ỗ","Ổ"=>"ổ","Ồ"=>"ồ","Ố"=>"ố", 1172 "Ỏ"=>"ỏ","Ọ"=>"ọ","Ị"=>"ị","Ỉ"=>"ỉ","Ệ"=>"ệ","Ễ"=>"ễ","Ể"=>"ể","Ề"=>"ề","Ế"=>"ế","Ẽ"=>"ẽ", 1173 "Ẻ"=>"ẻ","Ẹ"=>"ẹ","Ặ"=>"ặ","Ẵ"=>"ẵ","Ẳ"=>"ẳ","Ằ"=>"ằ","Ắ"=>"ắ","Ậ"=>"ậ","Ẫ"=>"ẫ","Ẩ"=>"ẩ", 1174 "Ầ"=>"ầ","Ấ"=>"ấ","Ả"=>"ả","Ạ"=>"ạ","Ṡ"=>"ẛ","Ẕ"=>"ẕ","Ẓ"=>"ẓ","Ẑ"=>"ẑ","Ẏ"=>"ẏ","Ẍ"=>"ẍ", 1175 "Ẋ"=>"ẋ","Ẉ"=>"ẉ","Ẇ"=>"ẇ","Ẅ"=>"ẅ","Ẃ"=>"ẃ","Ẁ"=>"ẁ","Ṿ"=>"ṿ","Ṽ"=>"ṽ","Ṻ"=>"ṻ","Ṹ"=>"ṹ", 1176 "Ṷ"=>"ṷ","Ṵ"=>"ṵ","Ṳ"=>"ṳ","Ṱ"=>"ṱ","Ṯ"=>"ṯ","Ṭ"=>"ṭ","Ṫ"=>"ṫ","Ṩ"=>"ṩ","Ṧ"=>"ṧ","Ṥ"=>"ṥ", 1177 "Ṣ"=>"ṣ","Ṡ"=>"ṡ","Ṟ"=>"ṟ","Ṝ"=>"ṝ","Ṛ"=>"ṛ","Ṙ"=>"ṙ","Ṗ"=>"ṗ","Ṕ"=>"ṕ","Ṓ"=>"ṓ","Ṑ"=>"ṑ", 1178 "Ṏ"=>"ṏ","Ṍ"=>"ṍ","Ṋ"=>"ṋ","Ṉ"=>"ṉ","Ṇ"=>"ṇ","Ṅ"=>"ṅ","Ṃ"=>"ṃ","Ṁ"=>"ṁ","Ḿ"=>"ḿ","Ḽ"=>"ḽ", 1179 "Ḻ"=>"ḻ","Ḹ"=>"ḹ","Ḷ"=>"ḷ","Ḵ"=>"ḵ","Ḳ"=>"ḳ","Ḱ"=>"ḱ","Ḯ"=>"ḯ","Ḭ"=>"ḭ","Ḫ"=>"ḫ","Ḩ"=>"ḩ", 1180 "Ḧ"=>"ḧ","Ḥ"=>"ḥ","Ḣ"=>"ḣ","Ḡ"=>"ḡ","Ḟ"=>"ḟ","Ḝ"=>"ḝ","Ḛ"=>"ḛ","Ḙ"=>"ḙ","Ḗ"=>"ḗ","Ḕ"=>"ḕ", 1181 "Ḓ"=>"ḓ","Ḑ"=>"ḑ","Ḏ"=>"ḏ","Ḍ"=>"ḍ","Ḋ"=>"ḋ","Ḉ"=>"ḉ","Ḇ"=>"ḇ","Ḅ"=>"ḅ","Ḃ"=>"ḃ","Ḁ"=>"ḁ", 1182 "Ֆ"=>"ֆ","Օ"=>"օ","Ք"=>"ք","Փ"=>"փ","Ւ"=>"ւ","Ց"=>"ց","Ր"=>"ր","Տ"=>"տ","Վ"=>"վ","Ս"=>"ս", 1183 "Ռ"=>"ռ","Ջ"=>"ջ","Պ"=>"պ","Չ"=>"չ","Ո"=>"ո","Շ"=>"շ","Ն"=>"ն","Յ"=>"յ","Մ"=>"մ","Ճ"=>"ճ", 1184 "Ղ"=>"ղ","Ձ"=>"ձ","Հ"=>"հ","Կ"=>"կ","Ծ"=>"ծ","Խ"=>"խ","Լ"=>"լ","Ի"=>"ի","Ժ"=>"ժ","Թ"=>"թ", 1185 "Ը"=>"ը","Է"=>"է","Զ"=>"զ","Ե"=>"ե","Դ"=>"դ","Գ"=>"գ","Բ"=>"բ","Ա"=>"ա","Ԏ"=>"ԏ","Ԍ"=>"ԍ", 1186 "Ԋ"=>"ԋ","Ԉ"=>"ԉ","Ԇ"=>"ԇ","Ԅ"=>"ԅ","Ԃ"=>"ԃ","Ԁ"=>"ԁ","Ӹ"=>"ӹ","Ӵ"=>"ӵ","Ӳ"=>"ӳ","Ӱ"=>"ӱ", 1187 "Ӯ"=>"ӯ","Ӭ"=>"ӭ","Ӫ"=>"ӫ","Ө"=>"ө","Ӧ"=>"ӧ","Ӥ"=>"ӥ","Ӣ"=>"ӣ","Ӡ"=>"ӡ","Ӟ"=>"ӟ","Ӝ"=>"ӝ", 1188 "Ӛ"=>"ӛ","Ә"=>"ә","Ӗ"=>"ӗ","Ӕ"=>"ӕ","Ӓ"=>"ӓ","Ӑ"=>"ӑ","Ӎ"=>"ӎ","Ӌ"=>"ӌ","Ӊ"=>"ӊ","Ӈ"=>"ӈ", 1189 "Ӆ"=>"ӆ","Ӄ"=>"ӄ","Ӂ"=>"ӂ","Ҿ"=>"ҿ","Ҽ"=>"ҽ","Һ"=>"һ","Ҹ"=>"ҹ","Ҷ"=>"ҷ","Ҵ"=>"ҵ","Ҳ"=>"ҳ", 1190 "Ұ"=>"ұ","Ү"=>"ү","Ҭ"=>"ҭ","Ҫ"=>"ҫ","Ҩ"=>"ҩ","Ҧ"=>"ҧ","Ҥ"=>"ҥ","Ң"=>"ң","Ҡ"=>"ҡ","Ҟ"=>"ҟ", 1191 "Ҝ"=>"ҝ","Қ"=>"қ","Ҙ"=>"ҙ","Җ"=>"җ","Ҕ"=>"ҕ","Ғ"=>"ғ","Ґ"=>"ґ","Ҏ"=>"ҏ","Ҍ"=>"ҍ","Ҋ"=>"ҋ", 1192 "Ҁ"=>"ҁ","Ѿ"=>"ѿ","Ѽ"=>"ѽ","Ѻ"=>"ѻ","Ѹ"=>"ѹ","Ѷ"=>"ѷ","Ѵ"=>"ѵ","Ѳ"=>"ѳ","Ѱ"=>"ѱ","Ѯ"=>"ѯ", 1193 "Ѭ"=>"ѭ","Ѫ"=>"ѫ","Ѩ"=>"ѩ","Ѧ"=>"ѧ","Ѥ"=>"ѥ","Ѣ"=>"ѣ","Ѡ"=>"ѡ","Џ"=>"џ","Ў"=>"ў","Ѝ"=>"ѝ", 1194 "Ќ"=>"ќ","Ћ"=>"ћ","Њ"=>"њ","Љ"=>"љ","Ј"=>"ј","Ї"=>"ї","І"=>"і","Ѕ"=>"ѕ","Є"=>"є","Ѓ"=>"ѓ", 1195 "Ђ"=>"ђ","Ё"=>"ё","Ѐ"=>"ѐ","Я"=>"я","Ю"=>"ю","Э"=>"э","Ь"=>"ь","Ы"=>"ы","Ъ"=>"ъ","Щ"=>"щ", 1196 "Ш"=>"ш","Ч"=>"ч","Ц"=>"ц","Х"=>"х","Ф"=>"ф","У"=>"у","Т"=>"т","С"=>"с","Р"=>"р","П"=>"п", 1197 "О"=>"о","Н"=>"н","М"=>"м","Л"=>"л","К"=>"к","Й"=>"й","И"=>"и","З"=>"з","Ж"=>"ж","Е"=>"е", 1198 "Д"=>"д","Г"=>"г","В"=>"в","Б"=>"б","А"=>"а","Ε"=>"ϵ","Σ"=>"ϲ","Ρ"=>"ϱ","Κ"=>"ϰ","Ϯ"=>"ϯ", 1199 "Ϭ"=>"ϭ","Ϫ"=>"ϫ","Ϩ"=>"ϩ","Ϧ"=>"ϧ","Ϥ"=>"ϥ","Ϣ"=>"ϣ","Ϡ"=>"ϡ","Ϟ"=>"ϟ","Ϝ"=>"ϝ","Ϛ"=>"ϛ", 1200 "Ϙ"=>"ϙ","Π"=>"ϖ","Φ"=>"ϕ","Θ"=>"ϑ","Β"=>"ϐ","Ώ"=>"ώ","Ύ"=>"ύ","Ό"=>"ό","Ϋ"=>"ϋ","Ϊ"=>"ϊ", 1201 "Ω"=>"ω","Ψ"=>"ψ","Χ"=>"χ","Φ"=>"φ","Υ"=>"υ","Τ"=>"τ","Σ"=>"σ","Σ"=>"ς","Ρ"=>"ρ","Π"=>"π", 1202 "Ο"=>"ο","Ξ"=>"ξ","Ν"=>"ν","Μ"=>"μ","Λ"=>"λ","Κ"=>"κ","Ι"=>"ι","Θ"=>"θ","Η"=>"η","Ζ"=>"ζ", 1203 "Ε"=>"ε","Δ"=>"δ","Γ"=>"γ","Β"=>"β","Α"=>"α","Ί"=>"ί","Ή"=>"ή","Έ"=>"έ","Ά"=>"ά","Ʒ"=>"ʒ", 1204 "Ʋ"=>"ʋ","Ʊ"=>"ʊ","Ʈ"=>"ʈ","Ʃ"=>"ʃ","Ʀ"=>"ʀ","Ɵ"=>"ɵ","Ɲ"=>"ɲ","Ɯ"=>"ɯ","Ɩ"=>"ɩ","Ɨ"=>"ɨ", 1205 "Ɣ"=>"ɣ","Ɛ"=>"ɛ","Ə"=>"ə","Ɗ"=>"ɗ","Ɖ"=>"ɖ","Ɔ"=>"ɔ","Ɓ"=>"ɓ","Ȳ"=>"ȳ","Ȱ"=>"ȱ","Ȯ"=>"ȯ", 1206 "Ȭ"=>"ȭ","Ȫ"=>"ȫ","Ȩ"=>"ȩ","Ȧ"=>"ȧ","Ȥ"=>"ȥ","Ȣ"=>"ȣ","Ȟ"=>"ȟ","Ȝ"=>"ȝ","Ț"=>"ț","Ș"=>"ș", 1207 "Ȗ"=>"ȗ","Ȕ"=>"ȕ","Ȓ"=>"ȓ","Ȑ"=>"ȑ","Ȏ"=>"ȏ","Ȍ"=>"ȍ","Ȋ"=>"ȋ","Ȉ"=>"ȉ","Ȇ"=>"ȇ","Ȅ"=>"ȅ", 1208 "Ȃ"=>"ȃ","Ȁ"=>"ȁ","Ǿ"=>"ǿ","Ǽ"=>"ǽ","Ǻ"=>"ǻ","Ǹ"=>"ǹ","Ǵ"=>"ǵ","Dz"=>"dz","Ǯ"=>"ǯ","Ǭ"=>"ǭ", 1209 "Ǫ"=>"ǫ","Ǩ"=>"ǩ","Ǧ"=>"ǧ","Ǥ"=>"ǥ","Ǣ"=>"ǣ","Ǡ"=>"ǡ","Ǟ"=>"ǟ","Ǝ"=>"ǝ","Ǜ"=>"ǜ","Ǚ"=>"ǚ", 1210 "Ǘ"=>"ǘ","Ǖ"=>"ǖ","Ǔ"=>"ǔ","Ǒ"=>"ǒ","Ǐ"=>"ǐ","Ǎ"=>"ǎ","Nj"=>"nj","Lj"=>"lj","Dž"=>"dž","Ƿ"=>"ƿ", 1211 "Ƽ"=>"ƽ","Ƹ"=>"ƹ","Ƶ"=>"ƶ","Ƴ"=>"ƴ","Ư"=>"ư","Ƭ"=>"ƭ","Ƨ"=>"ƨ","Ƥ"=>"ƥ","Ƣ"=>"ƣ","Ơ"=>"ơ", 1212 "Ƞ"=>"ƞ","Ƙ"=>"ƙ","Ƕ"=>"ƕ","Ƒ"=>"ƒ","Ƌ"=>"ƌ","Ƈ"=>"ƈ","Ƅ"=>"ƅ","Ƃ"=>"ƃ","S"=>"ſ","Ž"=>"ž", 1213 "Ż"=>"ż","Ź"=>"ź","Ŷ"=>"ŷ","Ŵ"=>"ŵ","Ų"=>"ų","Ű"=>"ű","Ů"=>"ů","Ŭ"=>"ŭ","Ū"=>"ū","Ũ"=>"ũ", 1214 "Ŧ"=>"ŧ","Ť"=>"ť","Ţ"=>"ţ","Š"=>"š","Ş"=>"ş","Ŝ"=>"ŝ","Ś"=>"ś","Ř"=>"ř","Ŗ"=>"ŗ","Ŕ"=>"ŕ", 1215 "Œ"=>"œ","Ő"=>"ő","Ŏ"=>"ŏ","Ō"=>"ō","Ŋ"=>"ŋ","Ň"=>"ň","Ņ"=>"ņ","Ń"=>"ń","Ł"=>"ł","Ŀ"=>"ŀ", 1216 "Ľ"=>"ľ","Ļ"=>"ļ","Ĺ"=>"ĺ","Ķ"=>"ķ","Ĵ"=>"ĵ","IJ"=>"ij","I"=>"ı","Į"=>"į","Ĭ"=>"ĭ","Ī"=>"ī", 1217 "Ĩ"=>"ĩ","Ħ"=>"ħ","Ĥ"=>"ĥ","Ģ"=>"ģ","Ġ"=>"ġ","Ğ"=>"ğ","Ĝ"=>"ĝ","Ě"=>"ě","Ę"=>"ę","Ė"=>"ė", 1218 "Ĕ"=>"ĕ","Ē"=>"ē","Đ"=>"đ","Ď"=>"ď","Č"=>"č","Ċ"=>"ċ","Ĉ"=>"ĉ","Ć"=>"ć","Ą"=>"ą","Ă"=>"ă", 1219 "Ā"=>"ā","Ÿ"=>"ÿ","Þ"=>"þ","Ý"=>"ý","Ü"=>"ü","Û"=>"û","Ú"=>"ú","Ù"=>"ù","Ø"=>"ø","Ö"=>"ö", 1220 "Õ"=>"õ","Ô"=>"ô","Ó"=>"ó","Ò"=>"ò","Ñ"=>"ñ","Ð"=>"ð","Ï"=>"ï","Î"=>"î","Í"=>"í","Ì"=>"ì", 1221 "Ë"=>"ë","Ê"=>"ê","É"=>"é","È"=>"è","Ç"=>"ç","Æ"=>"æ","Å"=>"å","Ä"=>"ä","Ã"=>"ã","Â"=>"â", 1222 "Á"=>"á","À"=>"à","Μ"=>"µ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t", 1223 "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j", 1224 "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a" 1225 ); 1226}; // end of case lookup tables 1227 1228/** 1229 * UTF-8 lookup table for lower case accented letters 1230 * 1231 * This lookuptable defines replacements for accented characters from the ASCII-7 1232 * range. This are lower case letters only. 1233 * 1234 * @author Andreas Gohr <andi@splitbrain.org> 1235 * @see utf8_deaccent() 1236 */ 1237global $UTF8_LOWER_ACCENTS; 1238if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array( 1239 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 1240 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 1241 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 1242 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 1243 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 1244 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 1245 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 1246 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 1247 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 1248 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 1249 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 1250 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 1251 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 1252 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 1253 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', 1254); 1255 1256/** 1257 * UTF-8 lookup table for upper case accented letters 1258 * 1259 * This lookuptable defines replacements for accented characters from the ASCII-7 1260 * range. This are upper case letters only. 1261 * 1262 * @author Andreas Gohr <andi@splitbrain.org> 1263 * @see utf8_deaccent() 1264 */ 1265global $UTF8_UPPER_ACCENTS; 1266if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array( 1267 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', 1268 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 1269 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', 1270 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', 1271 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', 1272 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', 1273 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', 1274 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', 1275 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 1276 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', 1277 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', 1278 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 1279 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 1280 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', 1281 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', 1282); 1283 1284/** 1285 * UTF-8 array of common special characters 1286 * 1287 * This array should contain all special characters (not a letter or digit) 1288 * defined in the various local charsets - it's not a complete list of non-alphanum 1289 * characters in UTF-8. It's not perfect but should match most cases of special 1290 * chars. 1291 * 1292 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 1293 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 1294 * 1295 * @author Andreas Gohr <andi@splitbrain.org> 1296 * @see utf8_stripspecials() 1297 */ 1298global $UTF8_SPECIAL_CHARS; 1299if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array( 1300 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 1301 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 1302 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 1303 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 1304 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 1305 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 1306 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 1307 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 1308 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 1309 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 1310 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 1311 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 1312 0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 1313 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 1314 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 1315 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 1316 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 1317 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 1318 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 1319 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 1320 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 1321 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 1322 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 1323 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 1324 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 1325 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 1326 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 1327 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 1328 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 1329 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 1330 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 1331 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 1332 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 1333 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 1334 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 1335 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 1336 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 1337 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 1338 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 1339 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 1340 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 1341 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 1342 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 1343 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 1344 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 1345 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 1346 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017, 1347 0x3018, 0x3019, 0x301a, 0x301b, 0x3036, 1348 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 1349 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 1350 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 1351 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 1352 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 1353 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 1354 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c, 1355 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b, 1356 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 1357 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea, 1358 0xffeb, 0xffec, 0xffed, 0xffee, 1359 0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703, 1360 0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b, 1361 0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713, 1362 0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b, 1363 0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf, 1364); 1365 1366// utf8 version of above data 1367global $UTF8_SPECIAL_CHARS2; 1368if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 = 1369 "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~ �'. 1370 '� ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½�'. 1371 '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. 1372 '�ׁׂ׃׳״،؛؟ـًٌٍَُِّْ٪฿–—―‗‘’‚“”�'. 1373 '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'. 1374 '⇐⇑⇒⇓⇔∀∂∃∅∆∇∈∉∋∏∑−∕∗∙√∝∞∠∧∨�'. 1375 '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'. 1376 '��┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠'. 1377 '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌▐░▒▓■▲▼◆◊●�'. 1378 '�★☎☛☞♠♣♥♦✁✂✃✄✆✇✈✉✌✍✎✏✐✑✒✓✔✕�'. 1379 '��✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✩✪✫✬✭✮✯✰✱'. 1380 '✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋�'. 1381 '�❏❐❑❒❖❘❙❚❛❜❝❞❡❢❣❤❥❦❧❿➉➓➔➘➙➚�'. 1382 '��➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶'. 1383 '➷➸➹➺➻➼➽➾'. 1384 ' 、。〃〈〉《》「」『』【】〒〔〕〖〗〘〙〚〛〶'. 1385 '�'. 1386 '�ﹼﹽ'. 1387 '!"#$%&'()*+,-./:;<=>?@[\]^`{|}~'. 1388 '⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○'. 1389 ''. 1390 ' '; 1391 1392/** 1393 * Romanization lookup table 1394 * 1395 * This lookup tables provides a way to transform strings written in a language 1396 * different from the ones based upon latin letters into plain ASCII. 1397 * 1398 * Please note: this is not a scientific transliteration table. It only works 1399 * oneway from nonlatin to ASCII and it works by simple character replacement 1400 * only. Specialities of each language are not supported. 1401 * 1402 * @author Andreas Gohr <andi@splitbrain.org> 1403 * @author Vitaly Blokhin <vitinfo@vitn.com> 1404 * @link http://www.uconv.com/translit.htm 1405 * @author Bisqwit <bisqwit@iki.fi> 1406 * @link http://kanjidict.stc.cx/hiragana.php?src=2 1407 * @link http://www.translatum.gr/converter/greek-transliteration.htm 1408 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 1409 * @link http://www.btranslations.com/resources/romanization/korean.asp 1410 * @author Arthit Suriyawongkul <arthit@gmail.com> 1411 * @author Denis Scheither <amorphis@uni-bremen.de> 1412 * @author Eivind Morland <eivind.morland@gmail.com> 1413 */ 1414global $UTF8_ROMANIZATION; 1415if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array( 1416 // scandinavian - differs from what we do in deaccent 1417 'å'=>'a','Å'=>'A','ä'=>'a','Ä'=>'A','ö'=>'o','Ö'=>'O', 1418 1419 //russian cyrillic 1420 'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G', 1421 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh', 1422 'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K', 1423 'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Н'=>'N','о'=>'o','О'=>'O', 1424 'п'=>'p','П'=>'P','р'=>'r','Р'=>'R','с'=>'s','С'=>'S','т'=>'t','Т'=>'T', 1425 'у'=>'u','У'=>'U','ф'=>'f','Ф'=>'F','х'=>'x','Х'=>'X','ц'=>'c','Ц'=>'C', 1426 'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'sch','Щ'=>'Sch','ъ'=>'', 1427 'Ъ'=>'','ы'=>'y','Ы'=>'Y','ь'=>'','Ь'=>'','э'=>'eh','Э'=>'Eh','ю'=>'ju', 1428 'Ю'=>'Ju','я'=>'ja','Я'=>'Ja', 1429 // Ukrainian cyrillic 1430 'Ґ'=>'Gh','ґ'=>'gh','Є'=>'Je','є'=>'je','І'=>'I','і'=>'i','Ї'=>'Ji','ї'=>'ji', 1431 // Georgian 1432 'ა'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th', 1433 'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','ო'=>'o','პ'=>'p','ჟ'=>'zh', 1434 'რ'=>'r','ს'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ქ'=>'kh','ღ'=>'gh','ყ'=>'q', 1435 'შ'=>'sh','ჩ'=>'ch','ც'=>'c','ძ'=>'dh','წ'=>'w','ჭ'=>'j','ხ'=>'x','ჯ'=>'jh', 1436 'ჰ'=>'xh', 1437 //Sanskrit 1438 'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry', 1439 'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','ए'=>'e','ऐ'=>'ay','ओ'=>'o','औ'=>'aw', 1440 'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh', 1441 'च'=>'c','छ'=>'ch','ज'=>'j','झ'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh', 1442 'ड'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','थ'=>'th','द'=>'d','ध'=>'dh', 1443 'न'=>'n','प'=>'p','फ'=>'ph','ब'=>'b','भ'=>'bh','म'=>'m','य'=>'z','र'=>'r', 1444 'ल'=>'l','व'=>'v','श'=>'sh','ष'=>'sqh','स'=>'s','ह'=>'x', 1445 //Sanskrit diacritics 1446 'Ā'=>'A','Ī'=>'I','Ū'=>'U','Ṛ'=>'R','Ṝ'=>'R','Ṅ'=>'N','Ñ'=>'N','Ṭ'=>'T', 1447 'Ḍ'=>'D','Ṇ'=>'N','Ś'=>'S','Ṣ'=>'S','Ṁ'=>'M','Ṃ'=>'M','Ḥ'=>'H','Ḷ'=>'L','Ḹ'=>'L', 1448 'ā'=>'a','ī'=>'i','ū'=>'u','ṛ'=>'r','ṝ'=>'r','ṅ'=>'n','ñ'=>'n','ṭ'=>'t', 1449 'ḍ'=>'d','ṇ'=>'n','ś'=>'s','ṣ'=>'s','ṁ'=>'m','ṃ'=>'m','ḥ'=>'h','ḷ'=>'l','ḹ'=>'l', 1450 //Hebrew 1451 'א'=>'a', 'ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'kh','ט'=>'th', 1452 'י'=>'y','ך'=>'h','כ'=>'k','ל'=>'l','ם'=>'m','מ'=>'m','ן'=>'n','נ'=>'n', 1453 'ס'=>'s','ע'=>'ah','ף'=>'f','פ'=>'p','ץ'=>'c','צ'=>'c','ק'=>'q','ר'=>'r', 1454 'ש'=>'sh','ת'=>'t', 1455 //Arabic 1456 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'g','ح'=>'xh','خ'=>'x','د'=>'d', 1457 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s\'','ض'=>'d\'', 1458 'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k', 1459 'ل'=>'l','م'=>'m','ن'=>'n','ه'=>'x\'','و'=>'u','ي'=>'i', 1460 1461 // Japanese characters (last update: 2008-05-09) 1462 1463 // Japanese hiragana 1464 1465 // 3 character syllables, っ doubles the consonant after 1466 'っちゃ'=>'ccha','っちぇ'=>'cche','っちょ'=>'ccho','っちゅ'=>'cchu', 1467 'っびゃ'=>'bbya','っびぇ'=>'bbye','っびぃ'=>'bbyi','っびょ'=>'bbyo','っびゅ'=>'bbyu', 1468 'っぴゃ'=>'ppya','っぴぇ'=>'ppye','っぴぃ'=>'ppyi','っぴょ'=>'ppyo','っぴゅ'=>'ppyu', 1469 'っちゃ'=>'ccha','っちぇ'=>'cche','っち'=>'cchi','っちょ'=>'ccho','っちゅ'=>'cchu', 1470 // 'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu', 1471 'っきゃ'=>'kkya','っきぇ'=>'kkye','っきぃ'=>'kkyi','っきょ'=>'kkyo','っきゅ'=>'kkyu', 1472 'っぎゃ'=>'ggya','っぎぇ'=>'ggye','っぎぃ'=>'ggyi','っぎょ'=>'ggyo','っぎゅ'=>'ggyu', 1473 'っみゃ'=>'mmya','っみぇ'=>'mmye','っみぃ'=>'mmyi','っみょ'=>'mmyo','っみゅ'=>'mmyu', 1474 'っにゃ'=>'nnya','っにぇ'=>'nnye','っにぃ'=>'nnyi','っにょ'=>'nnyo','っにゅ'=>'nnyu', 1475 'っりゃ'=>'rrya','っりぇ'=>'rrye','っりぃ'=>'rryi','っりょ'=>'rryo','っりゅ'=>'rryu', 1476 'っしゃ'=>'ssha','っしぇ'=>'sshe','っし'=>'sshi','っしょ'=>'ssho','っしゅ'=>'sshu', 1477 1478 // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway) 1479 'んあ'=>'n_a','んえ'=>'n_e','んい'=>'n_i','んお'=>'n_o','んう'=>'n_u', 1480 'んや'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu', 1481 1482 // 2 character syllables - normal 1483 'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo', 1484 'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu', 1485 'ひゃ'=>'hya','ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu', 1486 'びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu', 1487 'ぴゃ'=>'pya','ぴぇ'=>'pye','ぴぃ'=>'pyi','ぴょ'=>'pyo','ぴゅ'=>'pyu', 1488 'きゃ'=>'kya','きぇ'=>'kye','きぃ'=>'kyi','きょ'=>'kyo','きゅ'=>'kyu', 1489 'ぎゃ'=>'gya','ぎぇ'=>'gye','ぎぃ'=>'gyi','ぎょ'=>'gyo','ぎゅ'=>'gyu', 1490 'みゃ'=>'mya','みぇ'=>'mye','みぃ'=>'myi','みょ'=>'myo','みゅ'=>'myu', 1491 'にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu', 1492 'りゃ'=>'rya','りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu', 1493 'しゃ'=>'sha','しぇ'=>'she','し'=>'shi','しょ'=>'sho','しゅ'=>'shu', 1494 'じゃ'=>'ja','じぇ'=>'je','じょ'=>'jo','じゅ'=>'ju', 1495 'うぇ'=>'we','うぃ'=>'wi', 1496 'いぇ'=>'ye', 1497 1498 // 2 character syllables, っ doubles the consonant after 1499 'っば'=>'bba','っべ'=>'bbe','っび'=>'bbi','っぼ'=>'bbo','っぶ'=>'bbu', 1500 'っぱ'=>'ppa','っぺ'=>'ppe','っぴ'=>'ppi','っぽ'=>'ppo','っぷ'=>'ppu', 1501 'った'=>'tta','って'=>'tte','っち'=>'cchi','っと'=>'tto','っつ'=>'ttsu', 1502 'っだ'=>'dda','っで'=>'dde','っぢ'=>'ddi','っど'=>'ddo','っづ'=>'ddu', 1503 'っが'=>'gga','っげ'=>'gge','っぎ'=>'ggi','っご'=>'ggo','っぐ'=>'ggu', 1504 'っか'=>'kka','っけ'=>'kke','っき'=>'kki','っこ'=>'kko','っく'=>'kku', 1505 'っま'=>'mma','っめ'=>'mme','っみ'=>'mmi','っも'=>'mmo','っむ'=>'mmu', 1506 'っな'=>'nna','っね'=>'nne','っに'=>'nni','っの'=>'nno','っぬ'=>'nnu', 1507 'っら'=>'rra','っれ'=>'rre','っり'=>'rri','っろ'=>'rro','っる'=>'rru', 1508 'っさ'=>'ssa','っせ'=>'sse','っし'=>'sshi','っそ'=>'sso','っす'=>'ssu', 1509 'っざ'=>'zza','っぜ'=>'zze','っじ'=>'jji','っぞ'=>'zzo','っず'=>'zzu', 1510 1511 // 1 character syllabels 1512 'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ん'=>'n', 1513 'は'=>'ha','へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'fu', 1514 'ば'=>'ba','べ'=>'be','び'=>'bi','ぼ'=>'bo','ぶ'=>'bu', 1515 'ぱ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po','ぷ'=>'pu', 1516 'た'=>'ta','て'=>'te','ち'=>'chi','と'=>'to','つ'=>'tsu', 1517 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1518 'が'=>'ga','げ'=>'ge','ぎ'=>'gi','ご'=>'go','ぐ'=>'gu', 1519 'か'=>'ka','け'=>'ke','き'=>'ki','こ'=>'ko','く'=>'ku', 1520 'ま'=>'ma','め'=>'me','み'=>'mi','も'=>'mo','む'=>'mu', 1521 'な'=>'na','ね'=>'ne','に'=>'ni','の'=>'no','ぬ'=>'nu', 1522 'ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru', 1523 'さ'=>'sa','せ'=>'se','し'=>'shi','そ'=>'so','す'=>'su', 1524 'わ'=>'wa','を'=>'wo', 1525 'ざ'=>'za','ぜ'=>'ze','じ'=>'ji','ぞ'=>'zo','ず'=>'zu', 1526 'や'=>'ya','よ'=>'yo','ゆ'=>'yu', 1527 // old characters 1528 'ゑ'=>'we','ゐ'=>'wi', 1529 1530 // convert what's left (probably only kicks in when something's missing above) 1531 // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u', 1532 // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu', 1533 1534 // never seen one of those (disabled for the moment) 1535 // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu', 1536 // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu', 1537 // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu', 1538 // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu', 1539 // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu', 1540 // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu', 1541 // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu', 1542 // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu', 1543 // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu', 1544 // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu', 1545 // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu', 1546 // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu', 1547 // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu', 1548 // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu', 1549 1550 // 'spare' characters from other romanization systems 1551 // 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1552 // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu', 1553 // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su', 1554 // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu', 1555 //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu', 1556 //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu', 1557 //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu', 1558 //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu', 1559 //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi', 1560 //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju', 1561 1562 1563 // Japanese katakana 1564 1565 // 4 character syllables: ッ doubles the consonant after, ー doubles the vowel before (usualy written with macron, but we don't want that in our URLs) 1566 'ッビャー'=>'bbyaa','ッビェー'=>'bbyee','ッビィー'=>'bbyii','ッビョー'=>'bbyoo','ッビュー'=>'bbyuu', 1567 'ッピャー'=>'ppyaa','ッピェー'=>'ppyee','ッピィー'=>'ppyii','ッピョー'=>'ppyoo','ッピュー'=>'ppyuu', 1568 'ッキャー'=>'kkyaa','ッキェー'=>'kkyee','ッキィー'=>'kkyii','ッキョー'=>'kkyoo','ッキュー'=>'kkyuu', 1569 'ッギャー'=>'ggyaa','ッギェー'=>'ggyee','ッギィー'=>'ggyii','ッギョー'=>'ggyoo','ッギュー'=>'ggyuu', 1570 'ッミャー'=>'mmyaa','ッミェー'=>'mmyee','ッミィー'=>'mmyii','ッミョー'=>'mmyoo','ッミュー'=>'mmyuu', 1571 'ッニャー'=>'nnyaa','ッニェー'=>'nnyee','ッニィー'=>'nnyii','ッニョー'=>'nnyoo','ッニュー'=>'nnyuu', 1572 'ッリャー'=>'rryaa','ッリェー'=>'rryee','ッリィー'=>'rryii','ッリョー'=>'rryoo','ッリュー'=>'rryuu', 1573 'ッシャー'=>'sshaa','ッシェー'=>'sshee','ッシー'=>'sshii','ッショー'=>'sshoo','ッシュー'=>'sshuu', 1574 'ッチャー'=>'cchaa','ッチェー'=>'cchee','ッチー'=>'cchii','ッチョー'=>'cchoo','ッチュー'=>'cchuu', 1575 'ッティー'=>'ttii', 1576 'ッヂィー'=>'ddii', 1577 1578 // 3 character syllables - doubled vowels 1579 'ファー'=>'faa','フェー'=>'fee','フィー'=>'fii','フォー'=>'foo', 1580 'フャー'=>'fyaa','フェー'=>'fyee','フィー'=>'fyii','フョー'=>'fyoo','フュー'=>'fyuu', 1581 'ヒャー'=>'hyaa','ヒェー'=>'hyee','ヒィー'=>'hyii','ヒョー'=>'hyoo','ヒュー'=>'hyuu', 1582 'ビャー'=>'byaa','ビェー'=>'byee','ビィー'=>'byii','ビョー'=>'byoo','ビュー'=>'byuu', 1583 'ピャー'=>'pyaa','ピェー'=>'pyee','ピィー'=>'pyii','ピョー'=>'pyoo','ピュー'=>'pyuu', 1584 'キャー'=>'kyaa','キェー'=>'kyee','キィー'=>'kyii','キョー'=>'kyoo','キュー'=>'kyuu', 1585 'ギャー'=>'gyaa','ギェー'=>'gyee','ギィー'=>'gyii','ギョー'=>'gyoo','ギュー'=>'gyuu', 1586 'ミャー'=>'myaa','ミェー'=>'myee','ミィー'=>'myii','ミョー'=>'myoo','ミュー'=>'myuu', 1587 'ニャー'=>'nyaa','ニェー'=>'nyee','ニィー'=>'nyii','ニョー'=>'nyoo','ニュー'=>'nyuu', 1588 'リャー'=>'ryaa','リェー'=>'ryee','リィー'=>'ryii','リョー'=>'ryoo','リュー'=>'ryuu', 1589 'シャー'=>'shaa','シェー'=>'shee','シー'=>'shii','ショー'=>'shoo','シュー'=>'shuu', 1590 'ジャー'=>'jaa','ジェー'=>'jee','ジー'=>'jii','ジョー'=>'joo','ジュー'=>'juu', 1591 'スァー'=>'swaa','スェー'=>'swee','スィー'=>'swii','スォー'=>'swoo','スゥー'=>'swuu', 1592 'デァー'=>'daa','デェー'=>'dee','ディー'=>'dii','デォー'=>'doo','デゥー'=>'duu', 1593 'チャー'=>'chaa','チェー'=>'chee','チー'=>'chii','チョー'=>'choo','チュー'=>'chuu', 1594 'ヂャー'=>'dyaa','ヂェー'=>'dyee','ヂィー'=>'dyii','ヂョー'=>'dyoo','ヂュー'=>'dyuu', 1595 'ツャー'=>'tsaa','ツェー'=>'tsee','ツィー'=>'tsii','ツョー'=>'tsoo','ツー'=>'tsuu', 1596 'トァー'=>'twaa','トェー'=>'twee','トィー'=>'twii','トォー'=>'twoo','トゥー'=>'twuu', 1597 'ドァー'=>'dwaa','ドェー'=>'dwee','ドィー'=>'dwii','ドォー'=>'dwoo','ドゥー'=>'dwuu', 1598 'ウァー'=>'whaa','ウェー'=>'whee','ウィー'=>'whii','ウォー'=>'whoo','ウゥー'=>'whuu', 1599 'ヴャー'=>'vyaa','ヴェー'=>'vyee','ヴィー'=>'vyii','ヴョー'=>'vyoo','ヴュー'=>'vyuu', 1600 'ヴァー'=>'vaa','ヴェー'=>'vee','ヴィー'=>'vii','ヴォー'=>'voo','ヴー'=>'vuu', 1601 'ウェー'=>'wee','ウィー'=>'wii', 1602 'イェー'=>'yee', 1603 'ティー'=>'tii', 1604 'ヂィー'=>'dii', 1605 1606 // 3 character syllables - doubled consonants 1607 'ッビャ'=>'bbya','ッビェ'=>'bbye','ッビィ'=>'bbyi','ッビョ'=>'bbyo','ッビュ'=>'bbyu', 1608 'ッピャ'=>'ppya','ッピェ'=>'ppye','ッピィ'=>'ppyi','ッピョ'=>'ppyo','ッピュ'=>'ppyu', 1609 'ッキャ'=>'kkya','ッキェ'=>'kkye','ッキィ'=>'kkyi','ッキョ'=>'kkyo','ッキュ'=>'kkyu', 1610 'ッギャ'=>'ggya','ッギェ'=>'ggye','ッギィ'=>'ggyi','ッギョ'=>'ggyo','ッギュ'=>'ggyu', 1611 'ッミャ'=>'mmya','ッミェ'=>'mmye','ッミィ'=>'mmyi','ッミョ'=>'mmyo','ッミュ'=>'mmyu', 1612 'ッニャ'=>'nnya','ッニェ'=>'nnye','ッニィ'=>'nnyi','ッニョ'=>'nnyo','ッニュ'=>'nnyu', 1613 'ッリャ'=>'rrya','ッリェ'=>'rrye','ッリィ'=>'rryi','ッリョ'=>'rryo','ッリュ'=>'rryu', 1614 'ッシャ'=>'ssha','ッシェ'=>'sshe','ッシ'=>'sshi','ッショ'=>'ssho','ッシュ'=>'sshu', 1615 'ッチャ'=>'ccha','ッチェ'=>'cche','ッチ'=>'cchi','ッチョ'=>'ccho','ッチュ'=>'cchu', 1616 'ッティ'=>'tti', 1617 'ッヂィ'=>'ddi', 1618 1619 // 3 character syllables - doubled vowel and consonants 1620 'ッバー'=>'bbaa','ッベー'=>'bbee','ッビー'=>'bbii','ッボー'=>'bboo','ッブー'=>'bbuu', 1621 'ッパー'=>'ppaa','ッペー'=>'ppee','ッピー'=>'ppii','ッポー'=>'ppoo','ップー'=>'ppuu', 1622 'ッケー'=>'kkee','ッキー'=>'kkii','ッコー'=>'kkoo','ックー'=>'kkuu','ッカー'=>'kkaa', 1623 'ッガー'=>'ggaa','ッゲー'=>'ggee','ッギー'=>'ggii','ッゴー'=>'ggoo','ッグー'=>'gguu', 1624 'ッマー'=>'maa','ッメー'=>'mee','ッミー'=>'mii','ッモー'=>'moo','ッムー'=>'muu', 1625 'ッナー'=>'nnaa','ッネー'=>'nnee','ッニー'=>'nnii','ッノー'=>'nnoo','ッヌー'=>'nnuu', 1626 'ッラー'=>'rraa','ッレー'=>'rree','ッリー'=>'rrii','ッロー'=>'rroo','ッルー'=>'rruu', 1627 'ッサー'=>'ssaa','ッセー'=>'ssee','ッシー'=>'sshii','ッソー'=>'ssoo','ッスー'=>'ssuu', 1628 'ッザー'=>'zzaa','ッゼー'=>'zzee','ッジー'=>'jjii','ッゾー'=>'zzoo','ッズー'=>'zzuu', 1629 'ッター'=>'ttaa','ッテー'=>'ttee','ッチー'=>'chii','ットー'=>'ttoo','ッツー'=>'ttsuu', 1630 'ッダー'=>'ddaa','ッデー'=>'ddee','ッヂー'=>'ddii','ッドー'=>'ddoo','ッヅー'=>'dduu', 1631 1632 // 2 character syllables - normal 1633 'ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フゥ'=>'fu', 1634 // 'フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo','フュ'=>'fyu', 1635 'フャ'=>'fa','フェ'=>'fe','フィ'=>'fi','フョ'=>'fo','フュ'=>'fu', 1636 'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu', 1637 'ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo','ビュ'=>'byu', 1638 'ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu', 1639 'キャ'=>'kya','キェ'=>'kye','キィ'=>'kyi','キョ'=>'kyo','キュ'=>'kyu', 1640 'ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu', 1641 'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu', 1642 'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu', 1643 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu', 1644 'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu', 1645 'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju', 1646 'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu', 1647 'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du', 1648 'チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu', 1649 // 'ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi','ヂョ'=>'dyo','ヂュ'=>'dyu', 1650 'ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu', 1651 'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu', 1652 'ドァ'=>'dwa','ドェ'=>'dwe','ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu', 1653 'ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi','ウォ'=>'who','ウゥ'=>'whu', 1654 'ヴャ'=>'vya','ヴェ'=>'vye','ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu', 1655 'ヴァ'=>'va','ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu', 1656 'ウェ'=>'we','ウィ'=>'wi', 1657 'イェ'=>'ye', 1658 'ティ'=>'ti', 1659 'ヂィ'=>'di', 1660 1661 // 2 character syllables - doubled vocal 1662 'アー'=>'aa','エー'=>'ee','イー'=>'ii','オー'=>'oo','ウー'=>'uu', 1663 'ダー'=>'daa','デー'=>'dee','ヂー'=>'dii','ドー'=>'doo','ヅー'=>'duu', 1664 'ハー'=>'haa','ヘー'=>'hee','ヒー'=>'hii','ホー'=>'hoo','フー'=>'fuu', 1665 'バー'=>'baa','ベー'=>'bee','ビー'=>'bii','ボー'=>'boo','ブー'=>'buu', 1666 'パー'=>'paa','ペー'=>'pee','ピー'=>'pii','ポー'=>'poo','プー'=>'puu', 1667 'ケー'=>'kee','キー'=>'kii','コー'=>'koo','クー'=>'kuu','カー'=>'kaa', 1668 'ガー'=>'gaa','ゲー'=>'gee','ギー'=>'gii','ゴー'=>'goo','グー'=>'guu', 1669 'マー'=>'maa','メー'=>'mee','ミー'=>'mii','モー'=>'moo','ムー'=>'muu', 1670 'ナー'=>'naa','ネー'=>'nee','ニー'=>'nii','ノー'=>'noo','ヌー'=>'nuu', 1671 'ラー'=>'raa','レー'=>'ree','リー'=>'rii','ロー'=>'roo','ルー'=>'ruu', 1672 'サー'=>'saa','セー'=>'see','シー'=>'shii','ソー'=>'soo','スー'=>'suu', 1673 'ザー'=>'zaa','ゼー'=>'zee','ジー'=>'jii','ゾー'=>'zoo','ズー'=>'zuu', 1674 'ター'=>'taa','テー'=>'tee','チー'=>'chii','トー'=>'too','ツー'=>'tsuu', 1675 'ワー'=>'waa','ヲー'=>'woo', 1676 'ヤー'=>'yaa','ヨー'=>'yoo','ユー'=>'yuu', 1677 'ヵー'=>'kaa','ヶー'=>'kee', 1678 // old characters 1679 'ヱー'=>'wee','ヰー'=>'wii', 1680 1681 // seperate katakana 'n' 1682 'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u', 1683 'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンユ'=>'n_yu', 1684 1685 // 2 character syllables - doubled consonants 1686 'ッバ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu', 1687 'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッポ'=>'ppo','ップ'=>'ppu', 1688 'ッケ'=>'kke','ッキ'=>'kki','ッコ'=>'kko','ック'=>'kku','ッカ'=>'kka', 1689 'ッガ'=>'gga','ッゲ'=>'gge','ッギ'=>'ggi','ッゴ'=>'ggo','ッグ'=>'ggu', 1690 'ッマ'=>'ma','ッメ'=>'me','ッミ'=>'mi','ッモ'=>'mo','ッム'=>'mu', 1691 'ッナ'=>'nna','ッネ'=>'nne','ッニ'=>'nni','ッノ'=>'nno','ッヌ'=>'nnu', 1692 'ッラ'=>'rra','ッレ'=>'rre','ッリ'=>'rri','ッロ'=>'rro','ッル'=>'rru', 1693 'ッサ'=>'ssa','ッセ'=>'sse','ッシ'=>'sshi','ッソ'=>'sso','ッス'=>'ssu', 1694 'ッザ'=>'zza','ッゼ'=>'zze','ッジ'=>'jji','ッゾ'=>'zzo','ッズ'=>'zzu', 1695 'ッタ'=>'tta','ッテ'=>'tte','ッチ'=>'cchi','ット'=>'tto','ッツ'=>'ttsu', 1696 'ッダ'=>'dda','ッデ'=>'dde','ッヂ'=>'ddi','ッド'=>'ddo','ッヅ'=>'ddu', 1697 1698 // 1 character syllables 1699 'ア'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','ン'=>'n', 1700 'ハ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho','フ'=>'fu', 1701 'バ'=>'ba','ベ'=>'be','ビ'=>'bi','ボ'=>'bo','ブ'=>'bu', 1702 'パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ポ'=>'po','プ'=>'pu', 1703 'ケ'=>'ke','キ'=>'ki','コ'=>'ko','ク'=>'ku','カ'=>'ka', 1704 'ガ'=>'ga','ゲ'=>'ge','ギ'=>'gi','ゴ'=>'go','グ'=>'gu', 1705 'マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu', 1706 'ナ'=>'na','ネ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu', 1707 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ロ'=>'ro','ル'=>'ru', 1708 'サ'=>'sa','セ'=>'se','シ'=>'shi','ソ'=>'so','ス'=>'su', 1709 'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu', 1710 'タ'=>'ta','テ'=>'te','チ'=>'chi','ト'=>'to','ツ'=>'tsu', 1711 'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du', 1712 'ワ'=>'wa','ヲ'=>'wo', 1713 'ヤ'=>'ya','ヨ'=>'yo','ユ'=>'yu', 1714 'ヵ'=>'ka','ヶ'=>'ke', 1715 // old characters 1716 'ヱ'=>'we','ヰ'=>'wi', 1717 1718 // convert what's left (probably only kicks in when something's missing above) 1719 'ァ'=>'a','ェ'=>'e','ィ'=>'i','ォ'=>'o','ゥ'=>'u', 1720 'ャ'=>'ya','ョ'=>'yo','ュ'=>'yu', 1721 1722 // special characters 1723 '・'=>'_','、'=>'_', 1724 'ー'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise 1725 1726 // 'ラ'=>'la','レ'=>'le','リ'=>'li','ロ'=>'lo','ル'=>'lu', 1727 // 'チャ'=>'cya','チェ'=>'cye','チィ'=>'cyi','チョ'=>'cyo','チュ'=>'cyu', 1728 //'デャ'=>'dha','デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu', 1729 // 'リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi','リョ'=>'lyo','リュ'=>'lyu', 1730 // 'テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho','テュ'=>'thu', 1731 //'ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi','フォ'=>'fwo','フゥ'=>'fwu', 1732 //'チャ'=>'tya','チェ'=>'tye','チィ'=>'tyi','チョ'=>'tyo','チュ'=>'tyu', 1733 // 'ジャ'=>'jya','ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu', 1734 // 'ジャ'=>'zha','ジェ'=>'zhe','ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu', 1735 //'ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi','ジョ'=>'zyo','ジュ'=>'zyu', 1736 //'シャ'=>'sya','シェ'=>'sye','シィ'=>'syi','ショ'=>'syo','シュ'=>'syu', 1737 //'シ'=>'ci','フ'=>'hu',シ'=>'si','チ'=>'ti','ツ'=>'tu','イ'=>'yi','ヂ'=>'dzi', 1738 1739 // "Greeklish" 1740 'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Π'=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps', 1741 'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','π'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps', 1742 1743 // Thai 1744 'ก'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch', 1745 'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','ญ'=>'y','ฎ'=>'d','ฏ'=>'t','ฐ'=>'th', 1746 'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th', 1747 'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','ฝ'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph', 1748 'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue', 1749 'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h', 1750 'ะ'=>'a','ั'=>'a','รร'=>'a','า'=>'a','ๅ'=>'a','ำ'=>'am','ํา'=>'am', 1751 'ิ'=>'i','ี'=>'i','ึ'=>'ue','ี'=>'ue','ุ'=>'u','ู'=>'u', 1752 'เ'=>'e','แ'=>'ae','โ'=>'o','อ'=>'o', 1753 'ียะ'=>'ia','ีย'=>'ia','ือะ'=>'uea','ือ'=>'uea','ัวะ'=>'ua','ัว'=>'ua', 1754 'ใ'=>'ai','ไ'=>'ai','ัย'=>'ai','าย'=>'ai','าว'=>'ao', 1755 'ุย'=>'ui','อย'=>'oi','ือย'=>'ueai','วย'=>'uai', 1756 'ิว'=>'io','็ว'=>'eo','ียว'=>'iao', 1757 '่'=>'','้'=>'','๊'=>'','๋'=>'','็'=>'', 1758 '์'=>'','๎'=>'','ํ'=>'','ฺ'=>'', 1759 'ๆ'=>'2','๏'=>'o','ฯ'=>'-','๚'=>'-','๛'=>'-', 1760 '๐'=>'0','๑'=>'1','๒'=>'2','๓'=>'3','๔'=>'4', 1761 '๕'=>'5','๖'=>'6','๗'=>'7','๘'=>'8','๙'=>'9', 1762 1763 // Korean 1764 'ㄱ'=>'k','ㅋ'=>'kh','ㄲ'=>'kk','ㄷ'=>'t','ㅌ'=>'th','ㄸ'=>'tt','ㅂ'=>'p', 1765 'ㅍ'=>'ph','ㅃ'=>'pp','ㅈ'=>'c','ㅊ'=>'ch','ㅉ'=>'cc','ㅅ'=>'s','ㅆ'=>'ss', 1766 'ㅎ'=>'h','ㅇ'=>'ng','ㄴ'=>'n','ㄹ'=>'l','ㅁ'=>'m', 'ㅏ'=>'a','ㅓ'=>'e','ㅗ'=>'o', 1767 'ㅜ'=>'wu','ㅡ'=>'u','ㅣ'=>'i','ㅐ'=>'ay','ㅔ'=>'ey','ㅚ'=>'oy','ㅘ'=>'wa','ㅝ'=>'we', 1768 'ㅟ'=>'wi','ㅙ'=>'way','ㅞ'=>'wey','ㅢ'=>'uy','ㅑ'=>'ya','ㅕ'=>'ye','ㅛ'=>'oy', 1769 'ㅠ'=>'yu','ㅒ'=>'yay','ㅖ'=>'yey', 1770); 1771 1772 1773