1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license LGPL 2.1 (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * check for mb_string support 11 */ 12if(!defined('UTF8_MBSTRING')){ 13 if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){ 14 define('UTF8_MBSTRING',1); 15 }else{ 16 define('UTF8_MBSTRING',0); 17 } 18} 19 20/** 21 * Check if PREG was compiled with UTF-8 support 22 * 23 * Without this many of the functions below will not work, so this is a minimal requirement 24 */ 25if(!defined('UTF8_PREGSUPPORT')){ 26 define('UTF8_PREGSUPPORT', (bool) @preg_match('/^.$/u', 'ñ')); 27} 28 29/** 30 * Check if PREG was compiled with Unicode Property support 31 * 32 * This is not required for the functions below, but might be needed in a UTF-8 aware application 33 */ 34if(!defined('UTF8_PROPERTYSUPPORT')){ 35 define('UTF8_PROPERTYSUPPORT', (bool) @preg_match('/^\pL$/u', 'ñ')); 36} 37 38 39if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } 40 41if(!function_exists('utf8_isASCII')){ 42 /** 43 * Checks if a string contains 7bit ASCII only 44 * 45 * @author Andreas Haerter <andreas.haerter@dev.mail-node.com> 46 */ 47 function utf8_isASCII($str){ 48 return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); 49 } 50} 51 52if(!function_exists('utf8_strip')){ 53 /** 54 * Strips all highbyte chars 55 * 56 * Returns a pure ASCII7 string 57 * 58 * @author Andreas Gohr <andi@splitbrain.org> 59 */ 60 function utf8_strip($str){ 61 $ascii = ''; 62 $len = strlen($str); 63 for($i=0; $i<$len; $i++){ 64 if(ord($str{$i}) <128){ 65 $ascii .= $str{$i}; 66 } 67 } 68 return $ascii; 69 } 70} 71 72if(!function_exists('utf8_check')){ 73 /** 74 * Tries to detect if a string is in Unicode encoding 75 * 76 * @author <bmorel@ssi.fr> 77 * @link http://www.php.net/manual/en/function.utf8-encode.php 78 */ 79 function utf8_check($Str) { 80 $len = strlen($Str); 81 for ($i=0; $i<$len; $i++) { 82 $b = ord($Str[$i]); 83 if ($b < 0x80) continue; # 0bbbbbbb 84 elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb 85 elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb 86 elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb 87 elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb 88 elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b 89 else return false; # Does not match any model 90 91 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 92 if ((++$i == $len) || ((ord($Str[$i]) & 0xC0) != 0x80)) 93 return false; 94 } 95 } 96 return true; 97 } 98} 99 100if(!function_exists('utf8_basename')){ 101 /** 102 * A locale independent basename() implementation 103 * 104 * works around a bug in PHP's basename() implementation 105 * 106 * @see basename() 107 * @link https://bugs.php.net/bug.php?id=37738 108 * @param string $path A path 109 * @param string $suffix If the name component ends in suffix this will also be cut off 110 * @return string 111 */ 112 function utf8_basename($path, $suffix=''){ 113 $path = trim($path,'\\/'); 114 $rpos = max(strrpos($path, '/'), strrpos($path, '\\')); 115 if($rpos) $path = substr($path, $rpos+1); 116 117 $suflen = strlen($suffix); 118 if($suflen && (substr($path, -$suflen) == $suffix)){ 119 $path = substr($path, 0, -$suflen); 120 } 121 122 return $path; 123 } 124} 125 126if(!function_exists('utf8_strlen')){ 127 /** 128 * Unicode aware replacement for strlen() 129 * 130 * utf8_decode() converts characters that are not in ISO-8859-1 131 * to '?', which, for the purpose of counting, is alright - It's 132 * even faster than mb_strlen. 133 * 134 * @author <chernyshevsky at hotmail dot com> 135 * @see strlen() 136 * @see utf8_decode() 137 */ 138 function utf8_strlen($string){ 139 return strlen(utf8_decode($string)); 140 } 141} 142 143if(!function_exists('utf8_substr')){ 144 /** 145 * UTF-8 aware alternative to substr 146 * 147 * Return part of a string given character offset (and optionally length) 148 * 149 * @author Harry Fuecks <hfuecks@gmail.com> 150 * @author Chris Smith <chris@jalakai.co.uk> 151 * @param string $str 152 * @param int $offset number of UTF-8 characters offset (from left) 153 * @param int $length (optional) length in UTF-8 characters from offset 154 * @return mixed string or false if failure 155 */ 156 function utf8_substr($str, $offset, $length = null) { 157 if(UTF8_MBSTRING){ 158 if( $length === null ){ 159 return mb_substr($str, $offset); 160 }else{ 161 return mb_substr($str, $offset, $length); 162 } 163 } 164 165 /* 166 * Notes: 167 * 168 * no mb string support, so we'll use pcre regex's with 'u' flag 169 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 170 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 171 * 172 * substr documentation states false can be returned in some cases (e.g. offset > string length) 173 * mb_substr never returns false, it will return an empty string instead. 174 * 175 * calculating the number of characters in the string is a relatively expensive operation, so 176 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 177 */ 178 179 // cast parameters to appropriate types to avoid multiple notices/warnings 180 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 181 $offset = (int)$offset; 182 if (!is_null($length)) $length = (int)$length; 183 184 // handle trivial cases 185 if ($length === 0) return ''; 186 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 187 188 $offset_pattern = ''; 189 $length_pattern = ''; 190 191 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 192 if ($offset < 0) { 193 $strlen = strlen(utf8_decode($str)); // see notes 194 $offset = $strlen + $offset; 195 if ($offset < 0) $offset = 0; 196 } 197 198 // establish a pattern for offset, a non-captured group equal in length to offset 199 if ($offset > 0) { 200 $Ox = (int)($offset/65535); 201 $Oy = $offset%65535; 202 203 if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}'; 204 $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})'; 205 } else { 206 $offset_pattern = '^'; // offset == 0; just anchor the pattern 207 } 208 209 // establish a pattern for length 210 if (is_null($length)) { 211 $length_pattern = '(.*)$'; // the rest of the string 212 } else { 213 214 if (!isset($strlen)) $strlen = strlen(utf8_decode($str)); // see notes 215 if ($offset > $strlen) return ''; // another trivial case 216 217 if ($length > 0) { 218 219 $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string 220 221 $Lx = (int)($length/65535); 222 $Ly = $length%65535; 223 224 // +ve length requires ... a captured group of length characters 225 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 226 $length_pattern = '('.$length_pattern.'.{'.$Ly.'})'; 227 228 } else if ($length < 0) { 229 230 if ($length < ($offset - $strlen)) return ''; 231 232 $Lx = (int)((-$length)/65535); 233 $Ly = (-$length)%65535; 234 235 // -ve length requires ... capture everything except a group of -length characters 236 // anchored at the tail-end of the string 237 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 238 $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$'; 239 } 240 } 241 242 if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return ''; 243 return $match[1]; 244 } 245} 246 247if(!function_exists('utf8_substr_replace')){ 248 /** 249 * Unicode aware replacement for substr_replace() 250 * 251 * @author Andreas Gohr <andi@splitbrain.org> 252 * @see substr_replace() 253 */ 254 function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 255 $ret = ''; 256 if($start>0) $ret .= utf8_substr($string, 0, $start); 257 $ret .= $replacement; 258 $ret .= utf8_substr($string, $start+$length); 259 return $ret; 260 } 261} 262 263if(!function_exists('utf8_ltrim')){ 264 /** 265 * Unicode aware replacement for ltrim() 266 * 267 * @author Andreas Gohr <andi@splitbrain.org> 268 * @see ltrim() 269 * @param string $str 270 * @param string $charlist 271 * @return string 272 */ 273 function utf8_ltrim($str,$charlist=''){ 274 if($charlist == '') return ltrim($str); 275 276 //quote charlist for use in a characterclass 277 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 278 279 return preg_replace('/^['.$charlist.']+/u','',$str); 280 } 281} 282 283if(!function_exists('utf8_rtrim')){ 284 /** 285 * Unicode aware replacement for rtrim() 286 * 287 * @author Andreas Gohr <andi@splitbrain.org> 288 * @see rtrim() 289 * @param string $str 290 * @param string $charlist 291 * @return string 292 */ 293 function utf8_rtrim($str,$charlist=''){ 294 if($charlist == '') return rtrim($str); 295 296 //quote charlist for use in a characterclass 297 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 298 299 return preg_replace('/['.$charlist.']+$/u','',$str); 300 } 301} 302 303if(!function_exists('utf8_trim')){ 304 /** 305 * Unicode aware replacement for trim() 306 * 307 * @author Andreas Gohr <andi@splitbrain.org> 308 * @see trim() 309 * @param string $str 310 * @param string $charlist 311 * @return string 312 */ 313 function utf8_trim($str,$charlist='') { 314 if($charlist == '') return trim($str); 315 316 return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist); 317 } 318} 319 320if(!function_exists('utf8_strtolower')){ 321 /** 322 * This is a unicode aware replacement for strtolower() 323 * 324 * Uses mb_string extension if available 325 * 326 * @author Leo Feyer <leo@typolight.org> 327 * @see strtolower() 328 * @see utf8_strtoupper() 329 */ 330 function utf8_strtolower($string){ 331 if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8'); 332 333 global $UTF8_UPPER_TO_LOWER; 334 return strtr($string,$UTF8_UPPER_TO_LOWER); 335 } 336} 337 338if(!function_exists('utf8_strtoupper')){ 339 /** 340 * This is a unicode aware replacement for strtoupper() 341 * 342 * Uses mb_string extension if available 343 * 344 * @author Leo Feyer <leo@typolight.org> 345 * @see strtoupper() 346 * @see utf8_strtoupper() 347 */ 348 function utf8_strtoupper($string){ 349 if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); 350 351 global $UTF8_LOWER_TO_UPPER; 352 return strtr($string,$UTF8_LOWER_TO_UPPER); 353 } 354} 355 356if(!function_exists('utf8_ucfirst')){ 357 /** 358 * UTF-8 aware alternative to ucfirst 359 * Make a string's first character uppercase 360 * 361 * @author Harry Fuecks 362 * @param string 363 * @return string with first character as upper case (if applicable) 364 */ 365 function utf8_ucfirst($str){ 366 switch ( utf8_strlen($str) ) { 367 case 0: 368 return ''; 369 case 1: 370 return utf8_strtoupper($str); 371 default: 372 preg_match('/^(.{1})(.*)$/us', $str, $matches); 373 return utf8_strtoupper($matches[1]).$matches[2]; 374 } 375 } 376} 377 378if(!function_exists('utf8_ucwords')){ 379 /** 380 * UTF-8 aware alternative to ucwords 381 * Uppercase the first character of each word in a string 382 * 383 * @author Harry Fuecks 384 * @param string 385 * @return string with first char of each word uppercase 386 * @see http://www.php.net/ucwords 387 */ 388 function utf8_ucwords($str) { 389 // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; 390 // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns 391 // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords 392 $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; 393 394 return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str); 395 } 396 397 /** 398 * Callback function for preg_replace_callback call in utf8_ucwords 399 * You don't need to call this yourself 400 * 401 * @author Harry Fuecks 402 * @param array $matches matches corresponding to a single word 403 * @return string with first char of the word in uppercase 404 * @see utf8_ucwords 405 * @see utf8_strtoupper 406 */ 407 function utf8_ucwords_callback($matches) { 408 $leadingws = $matches[2]; 409 $ucfirst = utf8_strtoupper($matches[3]); 410 $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1); 411 return $leadingws . $ucword; 412 } 413} 414 415if(!function_exists('utf8_deaccent')){ 416 /** 417 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 418 * 419 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 420 * letters. Default is to deaccent both cases ($case = 0) 421 * 422 * @author Andreas Gohr <andi@splitbrain.org> 423 */ 424 function utf8_deaccent($string,$case=0){ 425 if($case <= 0){ 426 global $UTF8_LOWER_ACCENTS; 427 $string = strtr($string,$UTF8_LOWER_ACCENTS); 428 } 429 if($case >= 0){ 430 global $UTF8_UPPER_ACCENTS; 431 $string = strtr($string,$UTF8_UPPER_ACCENTS); 432 } 433 return $string; 434 } 435} 436 437if(!function_exists('utf8_romanize')){ 438 /** 439 * Romanize a non-latin string 440 * 441 * @author Andreas Gohr <andi@splitbrain.org> 442 */ 443 function utf8_romanize($string){ 444 if(utf8_isASCII($string)) return $string; //nothing to do 445 446 global $UTF8_ROMANIZATION; 447 return strtr($string,$UTF8_ROMANIZATION); 448 } 449} 450 451if(!function_exists('utf8_stripspecials')){ 452 /** 453 * Removes special characters (nonalphanumeric) from a UTF-8 string 454 * 455 * This function adds the controlchars 0x00 to 0x19 to the array of 456 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 457 * 458 * @author Andreas Gohr <andi@splitbrain.org> 459 * @param string $string The UTF8 string to strip of special chars 460 * @param string $repl Replace special with this string 461 * @param string $additional Additional chars to strip (used in regexp char class) 462 * @return string 463 */ 464 function utf8_stripspecials($string,$repl='',$additional=''){ 465 global $UTF8_SPECIAL_CHARS2; 466 467 static $specials = null; 468 if(is_null($specials)){ 469 #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 470 $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/'); 471 } 472 473 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 474 } 475} 476 477if(!function_exists('utf8_strpos')){ 478 /** 479 * This is an Unicode aware replacement for strpos 480 * 481 * @author Leo Feyer <leo@typolight.org> 482 * @see strpos() 483 * @param string 484 * @param string 485 * @param integer 486 * @return integer 487 */ 488 function utf8_strpos($haystack, $needle, $offset=0){ 489 $comp = 0; 490 $length = null; 491 492 while (is_null($length) || $length < $offset) { 493 $pos = strpos($haystack, $needle, $offset + $comp); 494 495 if ($pos === false) 496 return false; 497 498 $length = utf8_strlen(substr($haystack, 0, $pos)); 499 500 if ($length < $offset) 501 $comp = $pos - $length; 502 } 503 504 return $length; 505 } 506} 507 508if(!function_exists('utf8_tohtml')){ 509 /** 510 * Encodes UTF-8 characters to HTML entities 511 * 512 * @author Tom N Harris <tnharris@whoopdedo.org> 513 * @author <vpribish at shopping dot com> 514 * @link http://www.php.net/manual/en/function.utf8-decode.php 515 */ 516 function utf8_tohtml ($str) { 517 $ret = ''; 518 foreach (utf8_to_unicode($str) as $cp) { 519 if ($cp < 0x80) 520 $ret .= chr($cp); 521 elseif ($cp < 0x100) 522 $ret .= "&#$cp;"; 523 else 524 $ret .= '&#x'.dechex($cp).';'; 525 } 526 return $ret; 527 } 528} 529 530if(!function_exists('utf8_unhtml')){ 531 /** 532 * Decodes HTML entities to UTF-8 characters 533 * 534 * Convert any &#..; entity to a codepoint, 535 * The entities flag defaults to only decoding numeric entities. 536 * Pass HTML_ENTITIES and named entities, including & < etc. 537 * are handled as well. Avoids the problem that would occur if you 538 * had to decode "&#38;&amp;#38;" 539 * 540 * unhtmlspecialchars(utf8_unhtml($s)) -> "&&" 541 * utf8_unhtml(unhtmlspecialchars($s)) -> "&&#38;" 542 * what it should be -> "&&#38;" 543 * 544 * @author Tom N Harris <tnharris@whoopdedo.org> 545 * @param string $str UTF-8 encoded string 546 * @param boolean $entities Flag controlling decoding of named entities. 547 * @return string UTF-8 encoded string with numeric (and named) entities replaced. 548 */ 549 function utf8_unhtml($str, $entities=null) { 550 static $decoder = null; 551 if (is_null($decoder)) 552 $decoder = new utf8_entity_decoder(); 553 if (is_null($entities)) 554 return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m', 555 'utf8_decode_numeric', $str); 556 else 557 return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m', 558 array(&$decoder, 'decode'), $str); 559 } 560} 561 562if(!function_exists('utf8_decode_numeric')){ 563 /** 564 * Decodes numeric HTML entities to their correct UTF-8 characters 565 * 566 * @param $ent string A numeric entity 567 * @return string 568 */ 569 function utf8_decode_numeric($ent) { 570 switch ($ent[2]) { 571 case 'X': 572 case 'x': 573 $cp = hexdec($ent[3]); 574 break; 575 default: 576 $cp = intval($ent[3]); 577 break; 578 } 579 return unicode_to_utf8(array($cp)); 580 } 581} 582 583if(!class_exists('utf8_entity_decoder')){ 584 /** 585 * Encapsulate HTML entity decoding tables 586 */ 587 class utf8_entity_decoder { 588 var $table; 589 590 /** 591 * Initializes the decoding tables 592 */ 593 function __construct() { 594 $table = get_html_translation_table(HTML_ENTITIES); 595 $table = array_flip($table); 596 $this->table = array_map(array(&$this,'makeutf8'), $table); 597 } 598 599 /** 600 * Wrapper aorund unicode_to_utf8() 601 * 602 * @param $c string 603 * @return mixed 604 */ 605 function makeutf8($c) { 606 return unicode_to_utf8(array(ord($c))); 607 } 608 609 /** 610 * Decodes any HTML entity to it's correct UTF-8 char equivalent 611 * 612 * @param $ent string An entity 613 * @return string 614 */ 615 function decode($ent) { 616 if ($ent[1] == '#') { 617 return utf8_decode_numeric($ent); 618 } elseif (array_key_exists($ent[0],$this->table)) { 619 return $this->table[$ent[0]]; 620 } else { 621 return $ent[0]; 622 } 623 } 624 } 625} 626 627if(!function_exists('utf8_to_unicode')){ 628 /** 629 * Takes an UTF-8 string and returns an array of ints representing the 630 * Unicode characters. Astral planes are supported ie. the ints in the 631 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 632 * are not allowed. 633 * 634 * If $strict is set to true the function returns false if the input 635 * string isn't a valid UTF-8 octet sequence and raises a PHP error at 636 * level E_USER_WARNING 637 * 638 * Note: this function has been modified slightly in this library to 639 * trigger errors on encountering bad bytes 640 * 641 * @author <hsivonen@iki.fi> 642 * @author Harry Fuecks <hfuecks@gmail.com> 643 * @param string $str UTF-8 encoded string 644 * @param boolean $strict Check for invalid sequences? 645 * @return mixed array of unicode code points or false if UTF-8 invalid 646 * @see unicode_to_utf8 647 * @link http://hsivonen.iki.fi/php-utf8/ 648 * @link http://sourceforge.net/projects/phputf8/ 649 */ 650 function utf8_to_unicode($str,$strict=false) { 651 $mState = 0; // cached expected number of octets after the current octet 652 // until the beginning of the next UTF8 character sequence 653 $mUcs4 = 0; // cached Unicode character 654 $mBytes = 1; // cached expected number of octets in the current sequence 655 656 $out = array(); 657 658 $len = strlen($str); 659 660 for($i = 0; $i < $len; $i++) { 661 662 $in = ord($str{$i}); 663 664 if ( $mState == 0) { 665 666 // When mState is zero we expect either a US-ASCII character or a 667 // multi-octet sequence. 668 if (0 == (0x80 & ($in))) { 669 // US-ASCII, pass straight through. 670 $out[] = $in; 671 $mBytes = 1; 672 673 } else if (0xC0 == (0xE0 & ($in))) { 674 // First octet of 2 octet sequence 675 $mUcs4 = ($in); 676 $mUcs4 = ($mUcs4 & 0x1F) << 6; 677 $mState = 1; 678 $mBytes = 2; 679 680 } else if (0xE0 == (0xF0 & ($in))) { 681 // First octet of 3 octet sequence 682 $mUcs4 = ($in); 683 $mUcs4 = ($mUcs4 & 0x0F) << 12; 684 $mState = 2; 685 $mBytes = 3; 686 687 } else if (0xF0 == (0xF8 & ($in))) { 688 // First octet of 4 octet sequence 689 $mUcs4 = ($in); 690 $mUcs4 = ($mUcs4 & 0x07) << 18; 691 $mState = 3; 692 $mBytes = 4; 693 694 } else if (0xF8 == (0xFC & ($in))) { 695 /* First octet of 5 octet sequence. 696 * 697 * This is illegal because the encoded codepoint must be either 698 * (a) not the shortest form or 699 * (b) outside the Unicode range of 0-0x10FFFF. 700 * Rather than trying to resynchronize, we will carry on until the end 701 * of the sequence and let the later error handling code catch it. 702 */ 703 $mUcs4 = ($in); 704 $mUcs4 = ($mUcs4 & 0x03) << 24; 705 $mState = 4; 706 $mBytes = 5; 707 708 } else if (0xFC == (0xFE & ($in))) { 709 // First octet of 6 octet sequence, see comments for 5 octet sequence. 710 $mUcs4 = ($in); 711 $mUcs4 = ($mUcs4 & 1) << 30; 712 $mState = 5; 713 $mBytes = 6; 714 715 } elseif($strict) { 716 /* Current octet is neither in the US-ASCII range nor a legal first 717 * octet of a multi-octet sequence. 718 */ 719 trigger_error( 720 'utf8_to_unicode: Illegal sequence identifier '. 721 'in UTF-8 at byte '.$i, 722 E_USER_WARNING 723 ); 724 return false; 725 726 } 727 728 } else { 729 730 // When mState is non-zero, we expect a continuation of the multi-octet 731 // sequence 732 if (0x80 == (0xC0 & ($in))) { 733 734 // Legal continuation. 735 $shift = ($mState - 1) * 6; 736 $tmp = $in; 737 $tmp = ($tmp & 0x0000003F) << $shift; 738 $mUcs4 |= $tmp; 739 740 /** 741 * End of the multi-octet sequence. mUcs4 now contains the final 742 * Unicode codepoint to be output 743 */ 744 if (0 == --$mState) { 745 746 /* 747 * Check for illegal sequences and codepoints. 748 */ 749 // From Unicode 3.1, non-shortest form is illegal 750 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 751 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 752 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 753 (4 < $mBytes) || 754 // From Unicode 3.2, surrogate characters are illegal 755 (($mUcs4 & 0xFFFFF800) == 0xD800) || 756 // Codepoints outside the Unicode range are illegal 757 ($mUcs4 > 0x10FFFF)) { 758 759 if($strict){ 760 trigger_error( 761 'utf8_to_unicode: Illegal sequence or codepoint '. 762 'in UTF-8 at byte '.$i, 763 E_USER_WARNING 764 ); 765 766 return false; 767 } 768 769 } 770 771 if (0xFEFF != $mUcs4) { 772 // BOM is legal but we don't want to output it 773 $out[] = $mUcs4; 774 } 775 776 //initialize UTF8 cache 777 $mState = 0; 778 $mUcs4 = 0; 779 $mBytes = 1; 780 } 781 782 } elseif($strict) { 783 /** 784 *((0xC0 & (*in) != 0x80) && (mState != 0)) 785 * Incomplete multi-octet sequence. 786 */ 787 trigger_error( 788 'utf8_to_unicode: Incomplete multi-octet '. 789 ' sequence in UTF-8 at byte '.$i, 790 E_USER_WARNING 791 ); 792 793 return false; 794 } 795 } 796 } 797 return $out; 798 } 799} 800 801if(!function_exists('unicode_to_utf8')){ 802 /** 803 * Takes an array of ints representing the Unicode characters and returns 804 * a UTF-8 string. Astral planes are supported ie. the ints in the 805 * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 806 * are not allowed. 807 * 808 * If $strict is set to true the function returns false if the input 809 * array contains ints that represent surrogates or are outside the 810 * Unicode range and raises a PHP error at level E_USER_WARNING 811 * 812 * Note: this function has been modified slightly in this library to use 813 * output buffering to concatenate the UTF-8 string (faster) as well as 814 * reference the array by it's keys 815 * 816 * @param array $arr of unicode code points representing a string 817 * @param boolean $strict Check for invalid sequences? 818 * @return mixed UTF-8 string or false if array contains invalid code points 819 * @author <hsivonen@iki.fi> 820 * @author Harry Fuecks <hfuecks@gmail.com> 821 * @see utf8_to_unicode 822 * @link http://hsivonen.iki.fi/php-utf8/ 823 * @link http://sourceforge.net/projects/phputf8/ 824 */ 825 function unicode_to_utf8($arr,$strict=false) { 826 if (!is_array($arr)) return ''; 827 ob_start(); 828 829 foreach (array_keys($arr) as $k) { 830 831 if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { 832 # ASCII range (including control chars) 833 834 echo chr($arr[$k]); 835 836 } else if ($arr[$k] <= 0x07ff) { 837 # 2 byte sequence 838 839 echo chr(0xc0 | ($arr[$k] >> 6)); 840 echo chr(0x80 | ($arr[$k] & 0x003f)); 841 842 } else if($arr[$k] == 0xFEFF) { 843 # Byte order mark (skip) 844 845 // nop -- zap the BOM 846 847 } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { 848 # Test for illegal surrogates 849 850 // found a surrogate 851 if($strict){ 852 trigger_error( 853 'unicode_to_utf8: Illegal surrogate '. 854 'at index: '.$k.', value: '.$arr[$k], 855 E_USER_WARNING 856 ); 857 return false; 858 } 859 860 } else if ($arr[$k] <= 0xffff) { 861 # 3 byte sequence 862 863 echo chr(0xe0 | ($arr[$k] >> 12)); 864 echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); 865 echo chr(0x80 | ($arr[$k] & 0x003f)); 866 867 } else if ($arr[$k] <= 0x10ffff) { 868 # 4 byte sequence 869 870 echo chr(0xf0 | ($arr[$k] >> 18)); 871 echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); 872 echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); 873 echo chr(0x80 | ($arr[$k] & 0x3f)); 874 875 } elseif($strict) { 876 877 trigger_error( 878 'unicode_to_utf8: Codepoint out of Unicode range '. 879 'at index: '.$k.', value: '.$arr[$k], 880 E_USER_WARNING 881 ); 882 883 // out of range 884 return false; 885 } 886 } 887 888 $result = ob_get_contents(); 889 ob_end_clean(); 890 return $result; 891 } 892} 893 894if(!function_exists('utf8_to_utf16be')){ 895 /** 896 * UTF-8 to UTF-16BE conversion. 897 * 898 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 899 */ 900 function utf8_to_utf16be(&$str, $bom = false) { 901 $out = $bom ? "\xFE\xFF" : ''; 902 if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 903 904 $uni = utf8_to_unicode($str); 905 foreach($uni as $cp){ 906 $out .= pack('n',$cp); 907 } 908 return $out; 909 } 910} 911 912if(!function_exists('utf16be_to_utf8')){ 913 /** 914 * UTF-8 to UTF-16BE conversion. 915 * 916 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 917 */ 918 function utf16be_to_utf8(&$str) { 919 $uni = unpack('n*',$str); 920 return unicode_to_utf8($uni); 921 } 922} 923 924if(!function_exists('utf8_bad_replace')){ 925 /** 926 * Replace bad bytes with an alternative character 927 * 928 * ASCII character is recommended for replacement char 929 * 930 * PCRE Pattern to locate bad bytes in a UTF-8 string 931 * Comes from W3 FAQ: Multilingual Forms 932 * Note: modified to include full ASCII range including control chars 933 * 934 * @author Harry Fuecks <hfuecks@gmail.com> 935 * @see http://www.w3.org/International/questions/qa-forms-utf-8 936 * @param string $str to search 937 * @param string $replace to replace bad bytes with (defaults to '?') - use ASCII 938 * @return string 939 */ 940 function utf8_bad_replace($str, $replace = '') { 941 $UTF8_BAD = 942 '([\x00-\x7F]'. # ASCII (including control chars) 943 '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte 944 '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs 945 '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte 946 '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates 947 '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 948 '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 949 '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 950 '|(.{1}))'; # invalid byte 951 ob_start(); 952 while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { 953 if ( !isset($matches[2])) { 954 echo $matches[0]; 955 } else { 956 echo $replace; 957 } 958 $str = substr($str,strlen($matches[0])); 959 } 960 $result = ob_get_contents(); 961 ob_end_clean(); 962 return $result; 963 } 964} 965 966if(!function_exists('utf8_correctIdx')){ 967 /** 968 * adjust a byte index into a utf8 string to a utf8 character boundary 969 * 970 * @param $str string utf8 character string 971 * @param $i int byte index into $str 972 * @param $next bool direction to search for boundary, 973 * false = up (current character) 974 * true = down (next character) 975 * 976 * @return int byte index into $str now pointing to a utf8 character boundary 977 * 978 * @author chris smith <chris@jalakai.co.uk> 979 */ 980 function utf8_correctIdx(&$str,$i,$next=false) { 981 982 if ($i <= 0) return 0; 983 984 $limit = strlen($str); 985 if ($i>=$limit) return $limit; 986 987 if ($next) { 988 while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; 989 } else { 990 while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; 991 } 992 993 return $i; 994 } 995} 996 997// only needed if no mb_string available 998if(!UTF8_MBSTRING){ 999 /** 1000 * UTF-8 Case lookup table 1001 * 1002 * This lookuptable defines the upper case letters to their correspponding 1003 * lower case letter in UTF-8 1004 * 1005 * @author Andreas Gohr <andi@splitbrain.org> 1006 */ 1007 global $UTF8_LOWER_TO_UPPER; 1008 if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array( 1009 "z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T","s"=>"S","r"=>"R","q"=>"Q", 1010 "p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J","i"=>"I","h"=>"H","g"=>"G", 1011 "f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A","ῳ"=>"ῼ","ῥ"=>"Ῥ","ῡ"=>"Ῡ","ῑ"=>"Ῑ", 1012 "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ", 1013 "ᾣ"=>"ᾫ","ᾢ"=>"ᾪ","ᾡ"=>"ᾩ","ᾗ"=>"ᾟ","ᾖ"=>"ᾞ","ᾕ"=>"ᾝ","ᾔ"=>"ᾜ","ᾓ"=>"ᾛ","ᾒ"=>"ᾚ","ᾑ"=>"ᾙ", 1014 "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ", 1015 "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ", 1016 "ὲ"=>"Ὲ","ά"=>"Ά","ὰ"=>"Ὰ","ὧ"=>"Ὧ","ὦ"=>"Ὦ","ὥ"=>"Ὥ","ὤ"=>"Ὤ","ὣ"=>"Ὣ","ὢ"=>"Ὢ","ὡ"=>"Ὡ", 1017 "ὗ"=>"Ὗ","ὕ"=>"Ὕ","ὓ"=>"Ὓ","ὑ"=>"Ὑ","ὅ"=>"Ὅ","ὄ"=>"Ὄ","ὃ"=>"Ὃ","ὂ"=>"Ὂ","ὁ"=>"Ὁ","ὀ"=>"Ὀ", 1018 "ἷ"=>"Ἷ","ἶ"=>"Ἶ","ἵ"=>"Ἵ","ἴ"=>"Ἴ","ἳ"=>"Ἳ","ἲ"=>"Ἲ","ἱ"=>"Ἱ","ἰ"=>"Ἰ","ἧ"=>"Ἧ","ἦ"=>"Ἦ", 1019 "ἥ"=>"Ἥ","ἤ"=>"Ἤ","ἣ"=>"Ἣ","ἢ"=>"Ἢ","ἡ"=>"Ἡ","ἕ"=>"Ἕ","ἔ"=>"Ἔ","ἓ"=>"Ἓ","ἒ"=>"Ἒ","ἑ"=>"Ἑ", 1020 "ἐ"=>"Ἐ","ἇ"=>"Ἇ","ἆ"=>"Ἆ","ἅ"=>"Ἅ","ἄ"=>"Ἄ","ἃ"=>"Ἃ","ἂ"=>"Ἂ","ἁ"=>"Ἁ","ἀ"=>"Ἀ","ỹ"=>"Ỹ", 1021 "ỷ"=>"Ỷ","ỵ"=>"Ỵ","ỳ"=>"Ỳ","ự"=>"Ự","ữ"=>"Ữ","ử"=>"Ử","ừ"=>"Ừ","ứ"=>"Ứ","ủ"=>"Ủ","ụ"=>"Ụ", 1022 "ợ"=>"Ợ","ỡ"=>"Ỡ","ở"=>"Ở","ờ"=>"Ờ","ớ"=>"Ớ","ộ"=>"Ộ","ỗ"=>"Ỗ","ổ"=>"Ổ","ồ"=>"Ồ","ố"=>"Ố", 1023 "ỏ"=>"Ỏ","ọ"=>"Ọ","ị"=>"Ị","ỉ"=>"Ỉ","ệ"=>"Ệ","ễ"=>"Ễ","ể"=>"Ể","ề"=>"Ề","ế"=>"Ế","ẽ"=>"Ẽ", 1024 "ẻ"=>"Ẻ","ẹ"=>"Ẹ","ặ"=>"Ặ","ẵ"=>"Ẵ","ẳ"=>"Ẳ","ằ"=>"Ằ","ắ"=>"Ắ","ậ"=>"Ậ","ẫ"=>"Ẫ","ẩ"=>"Ẩ", 1025 "ầ"=>"Ầ","ấ"=>"Ấ","ả"=>"Ả","ạ"=>"Ạ","ẛ"=>"Ṡ","ẕ"=>"Ẕ","ẓ"=>"Ẓ","ẑ"=>"Ẑ","ẏ"=>"Ẏ","ẍ"=>"Ẍ", 1026 "ẋ"=>"Ẋ","ẉ"=>"Ẉ","ẇ"=>"Ẇ","ẅ"=>"Ẅ","ẃ"=>"Ẃ","ẁ"=>"Ẁ","ṿ"=>"Ṿ","ṽ"=>"Ṽ","ṻ"=>"Ṻ","ṹ"=>"Ṹ", 1027 "ṷ"=>"Ṷ","ṵ"=>"Ṵ","ṳ"=>"Ṳ","ṱ"=>"Ṱ","ṯ"=>"Ṯ","ṭ"=>"Ṭ","ṫ"=>"Ṫ","ṩ"=>"Ṩ","ṧ"=>"Ṧ","ṥ"=>"Ṥ", 1028 "ṣ"=>"Ṣ","ṡ"=>"Ṡ","ṟ"=>"Ṟ","ṝ"=>"Ṝ","ṛ"=>"Ṛ","ṙ"=>"Ṙ","ṗ"=>"Ṗ","ṕ"=>"Ṕ","ṓ"=>"Ṓ","ṑ"=>"Ṑ", 1029 "ṏ"=>"Ṏ","ṍ"=>"Ṍ","ṋ"=>"Ṋ","ṉ"=>"Ṉ","ṇ"=>"Ṇ","ṅ"=>"Ṅ","ṃ"=>"Ṃ","ṁ"=>"Ṁ","ḿ"=>"Ḿ","ḽ"=>"Ḽ", 1030 "ḻ"=>"Ḻ","ḹ"=>"Ḹ","ḷ"=>"Ḷ","ḵ"=>"Ḵ","ḳ"=>"Ḳ","ḱ"=>"Ḱ","ḯ"=>"Ḯ","ḭ"=>"Ḭ","ḫ"=>"Ḫ","ḩ"=>"Ḩ", 1031 "ḧ"=>"Ḧ","ḥ"=>"Ḥ","ḣ"=>"Ḣ","ḡ"=>"Ḡ","ḟ"=>"Ḟ","ḝ"=>"Ḝ","ḛ"=>"Ḛ","ḙ"=>"Ḙ","ḗ"=>"Ḗ","ḕ"=>"Ḕ", 1032 "ḓ"=>"Ḓ","ḑ"=>"Ḑ","ḏ"=>"Ḏ","ḍ"=>"Ḍ","ḋ"=>"Ḋ","ḉ"=>"Ḉ","ḇ"=>"Ḇ","ḅ"=>"Ḅ","ḃ"=>"Ḃ","ḁ"=>"Ḁ", 1033 "ֆ"=>"Ֆ","օ"=>"Օ","ք"=>"Ք","փ"=>"Փ","ւ"=>"Ւ","ց"=>"Ց","ր"=>"Ր","տ"=>"Տ","վ"=>"Վ","ս"=>"Ս", 1034 "ռ"=>"Ռ","ջ"=>"Ջ","պ"=>"Պ","չ"=>"Չ","ո"=>"Ո","շ"=>"Շ","ն"=>"Ն","յ"=>"Յ","մ"=>"Մ","ճ"=>"Ճ", 1035 "ղ"=>"Ղ","ձ"=>"Ձ","հ"=>"Հ","կ"=>"Կ","ծ"=>"Ծ","խ"=>"Խ","լ"=>"Լ","ի"=>"Ի","ժ"=>"Ժ","թ"=>"Թ", 1036 "ը"=>"Ը","է"=>"Է","զ"=>"Զ","ե"=>"Ե","դ"=>"Դ","գ"=>"Գ","բ"=>"Բ","ա"=>"Ա","ԏ"=>"Ԏ","ԍ"=>"Ԍ", 1037 "ԋ"=>"Ԋ","ԉ"=>"Ԉ","ԇ"=>"Ԇ","ԅ"=>"Ԅ","ԃ"=>"Ԃ","ԁ"=>"Ԁ","ӹ"=>"Ӹ","ӵ"=>"Ӵ","ӳ"=>"Ӳ","ӱ"=>"Ӱ", 1038 "ӯ"=>"Ӯ","ӭ"=>"Ӭ","ӫ"=>"Ӫ","ө"=>"Ө","ӧ"=>"Ӧ","ӥ"=>"Ӥ","ӣ"=>"Ӣ","ӡ"=>"Ӡ","ӟ"=>"Ӟ","ӝ"=>"Ӝ", 1039 "ӛ"=>"Ӛ","ә"=>"Ә","ӗ"=>"Ӗ","ӕ"=>"Ӕ","ӓ"=>"Ӓ","ӑ"=>"Ӑ","ӎ"=>"Ӎ","ӌ"=>"Ӌ","ӊ"=>"Ӊ","ӈ"=>"Ӈ", 1040 "ӆ"=>"Ӆ","ӄ"=>"Ӄ","ӂ"=>"Ӂ","ҿ"=>"Ҿ","ҽ"=>"Ҽ","һ"=>"Һ","ҹ"=>"Ҹ","ҷ"=>"Ҷ","ҵ"=>"Ҵ","ҳ"=>"Ҳ", 1041 "ұ"=>"Ұ","ү"=>"Ү","ҭ"=>"Ҭ","ҫ"=>"Ҫ","ҩ"=>"Ҩ","ҧ"=>"Ҧ","ҥ"=>"Ҥ","ң"=>"Ң","ҡ"=>"Ҡ","ҟ"=>"Ҟ", 1042 "ҝ"=>"Ҝ","қ"=>"Қ","ҙ"=>"Ҙ","җ"=>"Җ","ҕ"=>"Ҕ","ғ"=>"Ғ","ґ"=>"Ґ","ҏ"=>"Ҏ","ҍ"=>"Ҍ","ҋ"=>"Ҋ", 1043 "ҁ"=>"Ҁ","ѿ"=>"Ѿ","ѽ"=>"Ѽ","ѻ"=>"Ѻ","ѹ"=>"Ѹ","ѷ"=>"Ѷ","ѵ"=>"Ѵ","ѳ"=>"Ѳ","ѱ"=>"Ѱ","ѯ"=>"Ѯ", 1044 "ѭ"=>"Ѭ","ѫ"=>"Ѫ","ѩ"=>"Ѩ","ѧ"=>"Ѧ","ѥ"=>"Ѥ","ѣ"=>"Ѣ","ѡ"=>"Ѡ","џ"=>"Џ","ў"=>"Ў","ѝ"=>"Ѝ", 1045 "ќ"=>"Ќ","ћ"=>"Ћ","њ"=>"Њ","љ"=>"Љ","ј"=>"Ј","ї"=>"Ї","і"=>"І","ѕ"=>"Ѕ","є"=>"Є","ѓ"=>"Ѓ", 1046 "ђ"=>"Ђ","ё"=>"Ё","ѐ"=>"Ѐ","я"=>"Я","ю"=>"Ю","э"=>"Э","ь"=>"Ь","ы"=>"Ы","ъ"=>"Ъ","щ"=>"Щ", 1047 "ш"=>"Ш","ч"=>"Ч","ц"=>"Ц","х"=>"Х","ф"=>"Ф","у"=>"У","т"=>"Т","с"=>"С","р"=>"Р","п"=>"П", 1048 "о"=>"О","н"=>"Н","м"=>"М","л"=>"Л","к"=>"К","й"=>"Й","и"=>"И","з"=>"З","ж"=>"Ж","е"=>"Е", 1049 "д"=>"Д","г"=>"Г","в"=>"В","б"=>"Б","а"=>"А","ϵ"=>"Ε","ϲ"=>"Σ","ϱ"=>"Ρ","ϰ"=>"Κ","ϯ"=>"Ϯ", 1050 "ϭ"=>"Ϭ","ϫ"=>"Ϫ","ϩ"=>"Ϩ","ϧ"=>"Ϧ","ϥ"=>"Ϥ","ϣ"=>"Ϣ","ϡ"=>"Ϡ","ϟ"=>"Ϟ","ϝ"=>"Ϝ","ϛ"=>"Ϛ", 1051 "ϙ"=>"Ϙ","ϖ"=>"Π","ϕ"=>"Φ","ϑ"=>"Θ","ϐ"=>"Β","ώ"=>"Ώ","ύ"=>"Ύ","ό"=>"Ό","ϋ"=>"Ϋ","ϊ"=>"Ϊ", 1052 "ω"=>"Ω","ψ"=>"Ψ","χ"=>"Χ","φ"=>"Φ","υ"=>"Υ","τ"=>"Τ","σ"=>"Σ","ς"=>"Σ","ρ"=>"Ρ","π"=>"Π", 1053 "ο"=>"Ο","ξ"=>"Ξ","ν"=>"Ν","μ"=>"Μ","λ"=>"Λ","κ"=>"Κ","ι"=>"Ι","θ"=>"Θ","η"=>"Η","ζ"=>"Ζ", 1054 "ε"=>"Ε","δ"=>"Δ","γ"=>"Γ","β"=>"Β","α"=>"Α","ί"=>"Ί","ή"=>"Ή","έ"=>"Έ","ά"=>"Ά","ʒ"=>"Ʒ", 1055 "ʋ"=>"Ʋ","ʊ"=>"Ʊ","ʈ"=>"Ʈ","ʃ"=>"Ʃ","ʀ"=>"Ʀ","ɵ"=>"Ɵ","ɲ"=>"Ɲ","ɯ"=>"Ɯ","ɩ"=>"Ɩ","ɨ"=>"Ɨ", 1056 "ɣ"=>"Ɣ","ɛ"=>"Ɛ","ə"=>"Ə","ɗ"=>"Ɗ","ɖ"=>"Ɖ","ɔ"=>"Ɔ","ɓ"=>"Ɓ","ȳ"=>"Ȳ","ȱ"=>"Ȱ","ȯ"=>"Ȯ", 1057 "ȭ"=>"Ȭ","ȫ"=>"Ȫ","ȩ"=>"Ȩ","ȧ"=>"Ȧ","ȥ"=>"Ȥ","ȣ"=>"Ȣ","ȟ"=>"Ȟ","ȝ"=>"Ȝ","ț"=>"Ț","ș"=>"Ș", 1058 "ȗ"=>"Ȗ","ȕ"=>"Ȕ","ȓ"=>"Ȓ","ȑ"=>"Ȑ","ȏ"=>"Ȏ","ȍ"=>"Ȍ","ȋ"=>"Ȋ","ȉ"=>"Ȉ","ȇ"=>"Ȇ","ȅ"=>"Ȅ", 1059 "ȃ"=>"Ȃ","ȁ"=>"Ȁ","ǿ"=>"Ǿ","ǽ"=>"Ǽ","ǻ"=>"Ǻ","ǹ"=>"Ǹ","ǵ"=>"Ǵ","dz"=>"Dz","ǯ"=>"Ǯ","ǭ"=>"Ǭ", 1060 "ǫ"=>"Ǫ","ǩ"=>"Ǩ","ǧ"=>"Ǧ","ǥ"=>"Ǥ","ǣ"=>"Ǣ","ǡ"=>"Ǡ","ǟ"=>"Ǟ","ǝ"=>"Ǝ","ǜ"=>"Ǜ","ǚ"=>"Ǚ", 1061 "ǘ"=>"Ǘ","ǖ"=>"Ǖ","ǔ"=>"Ǔ","ǒ"=>"Ǒ","ǐ"=>"Ǐ","ǎ"=>"Ǎ","nj"=>"Nj","lj"=>"Lj","dž"=>"Dž","ƿ"=>"Ƿ", 1062 "ƽ"=>"Ƽ","ƹ"=>"Ƹ","ƶ"=>"Ƶ","ƴ"=>"Ƴ","ư"=>"Ư","ƭ"=>"Ƭ","ƨ"=>"Ƨ","ƥ"=>"Ƥ","ƣ"=>"Ƣ","ơ"=>"Ơ", 1063 "ƞ"=>"Ƞ","ƙ"=>"Ƙ","ƕ"=>"Ƕ","ƒ"=>"Ƒ","ƌ"=>"Ƌ","ƈ"=>"Ƈ","ƅ"=>"Ƅ","ƃ"=>"Ƃ","ſ"=>"S","ž"=>"Ž", 1064 "ż"=>"Ż","ź"=>"Ź","ŷ"=>"Ŷ","ŵ"=>"Ŵ","ų"=>"Ų","ű"=>"Ű","ů"=>"Ů","ŭ"=>"Ŭ","ū"=>"Ū","ũ"=>"Ũ", 1065 "ŧ"=>"Ŧ","ť"=>"Ť","ţ"=>"Ţ","š"=>"Š","ş"=>"Ş","ŝ"=>"Ŝ","ś"=>"Ś","ř"=>"Ř","ŗ"=>"Ŗ","ŕ"=>"Ŕ", 1066 "œ"=>"Œ","ő"=>"Ő","ŏ"=>"Ŏ","ō"=>"Ō","ŋ"=>"Ŋ","ň"=>"Ň","ņ"=>"Ņ","ń"=>"Ń","ł"=>"Ł","ŀ"=>"Ŀ", 1067 "ľ"=>"Ľ","ļ"=>"Ļ","ĺ"=>"Ĺ","ķ"=>"Ķ","ĵ"=>"Ĵ","ij"=>"IJ","ı"=>"I","į"=>"Į","ĭ"=>"Ĭ","ī"=>"Ī", 1068 "ĩ"=>"Ĩ","ħ"=>"Ħ","ĥ"=>"Ĥ","ģ"=>"Ģ","ġ"=>"Ġ","ğ"=>"Ğ","ĝ"=>"Ĝ","ě"=>"Ě","ę"=>"Ę","ė"=>"Ė", 1069 "ĕ"=>"Ĕ","ē"=>"Ē","đ"=>"Đ","ď"=>"Ď","č"=>"Č","ċ"=>"Ċ","ĉ"=>"Ĉ","ć"=>"Ć","ą"=>"Ą","ă"=>"Ă", 1070 "ā"=>"Ā","ÿ"=>"Ÿ","þ"=>"Þ","ý"=>"Ý","ü"=>"Ü","û"=>"Û","ú"=>"Ú","ù"=>"Ù","ø"=>"Ø","ö"=>"Ö", 1071 "õ"=>"Õ","ô"=>"Ô","ó"=>"Ó","ò"=>"Ò","ñ"=>"Ñ","ð"=>"Ð","ï"=>"Ï","î"=>"Î","í"=>"Í","ì"=>"Ì", 1072 "ë"=>"Ë","ê"=>"Ê","é"=>"É","è"=>"È","ç"=>"Ç","æ"=>"Æ","å"=>"Å","ä"=>"Ä","ã"=>"Ã","â"=>"Â", 1073 "á"=>"Á","à"=>"À","µ"=>"Μ","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T", 1074 "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J", 1075 "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A" 1076 ); 1077 1078 /** 1079 * UTF-8 Case lookup table 1080 * 1081 * This lookuptable defines the lower case letters to their corresponding 1082 * upper case letter in UTF-8 1083 * 1084 * @author Andreas Gohr <andi@splitbrain.org> 1085 */ 1086 global $UTF8_UPPER_TO_LOWER; 1087 if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array ( 1088 "Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t","S"=>"s","R"=>"r","Q"=>"q", 1089 "P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j","I"=>"i","H"=>"h","G"=>"g", 1090 "F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a","ῼ"=>"ῳ","Ῥ"=>"ῥ","Ῡ"=>"ῡ","Ῑ"=>"ῑ", 1091 "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ", 1092 "ᾫ"=>"ᾣ","ᾪ"=>"ᾢ","ᾩ"=>"ᾡ","ᾟ"=>"ᾗ","ᾞ"=>"ᾖ","ᾝ"=>"ᾕ","ᾜ"=>"ᾔ","ᾛ"=>"ᾓ","ᾚ"=>"ᾒ","ᾙ"=>"ᾑ", 1093 "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ", 1094 "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ", 1095 "Ὲ"=>"ὲ","Ά"=>"ά","Ὰ"=>"ὰ","Ὧ"=>"ὧ","Ὦ"=>"ὦ","Ὥ"=>"ὥ","Ὤ"=>"ὤ","Ὣ"=>"ὣ","Ὢ"=>"ὢ","Ὡ"=>"ὡ", 1096 "Ὗ"=>"ὗ","Ὕ"=>"ὕ","Ὓ"=>"ὓ","Ὑ"=>"ὑ","Ὅ"=>"ὅ","Ὄ"=>"ὄ","Ὃ"=>"ὃ","Ὂ"=>"ὂ","Ὁ"=>"ὁ","Ὀ"=>"ὀ", 1097 "Ἷ"=>"ἷ","Ἶ"=>"ἶ","Ἵ"=>"ἵ","Ἴ"=>"ἴ","Ἳ"=>"ἳ","Ἲ"=>"ἲ","Ἱ"=>"ἱ","Ἰ"=>"ἰ","Ἧ"=>"ἧ","Ἦ"=>"ἦ", 1098 "Ἥ"=>"ἥ","Ἤ"=>"ἤ","Ἣ"=>"ἣ","Ἢ"=>"ἢ","Ἡ"=>"ἡ","Ἕ"=>"ἕ","Ἔ"=>"ἔ","Ἓ"=>"ἓ","Ἒ"=>"ἒ","Ἑ"=>"ἑ", 1099 "Ἐ"=>"ἐ","Ἇ"=>"ἇ","Ἆ"=>"ἆ","Ἅ"=>"ἅ","Ἄ"=>"ἄ","Ἃ"=>"ἃ","Ἂ"=>"ἂ","Ἁ"=>"ἁ","Ἀ"=>"ἀ","Ỹ"=>"ỹ", 1100 "Ỷ"=>"ỷ","Ỵ"=>"ỵ","Ỳ"=>"ỳ","Ự"=>"ự","Ữ"=>"ữ","Ử"=>"ử","Ừ"=>"ừ","Ứ"=>"ứ","Ủ"=>"ủ","Ụ"=>"ụ", 1101 "Ợ"=>"ợ","Ỡ"=>"ỡ","Ở"=>"ở","Ờ"=>"ờ","Ớ"=>"ớ","Ộ"=>"ộ","Ỗ"=>"ỗ","Ổ"=>"ổ","Ồ"=>"ồ","Ố"=>"ố", 1102 "Ỏ"=>"ỏ","Ọ"=>"ọ","Ị"=>"ị","Ỉ"=>"ỉ","Ệ"=>"ệ","Ễ"=>"ễ","Ể"=>"ể","Ề"=>"ề","Ế"=>"ế","Ẽ"=>"ẽ", 1103 "Ẻ"=>"ẻ","Ẹ"=>"ẹ","Ặ"=>"ặ","Ẵ"=>"ẵ","Ẳ"=>"ẳ","Ằ"=>"ằ","Ắ"=>"ắ","Ậ"=>"ậ","Ẫ"=>"ẫ","Ẩ"=>"ẩ", 1104 "Ầ"=>"ầ","Ấ"=>"ấ","Ả"=>"ả","Ạ"=>"ạ","Ṡ"=>"ẛ","Ẕ"=>"ẕ","Ẓ"=>"ẓ","Ẑ"=>"ẑ","Ẏ"=>"ẏ","Ẍ"=>"ẍ", 1105 "Ẋ"=>"ẋ","Ẉ"=>"ẉ","Ẇ"=>"ẇ","Ẅ"=>"ẅ","Ẃ"=>"ẃ","Ẁ"=>"ẁ","Ṿ"=>"ṿ","Ṽ"=>"ṽ","Ṻ"=>"ṻ","Ṹ"=>"ṹ", 1106 "Ṷ"=>"ṷ","Ṵ"=>"ṵ","Ṳ"=>"ṳ","Ṱ"=>"ṱ","Ṯ"=>"ṯ","Ṭ"=>"ṭ","Ṫ"=>"ṫ","Ṩ"=>"ṩ","Ṧ"=>"ṧ","Ṥ"=>"ṥ", 1107 "Ṣ"=>"ṣ","Ṡ"=>"ṡ","Ṟ"=>"ṟ","Ṝ"=>"ṝ","Ṛ"=>"ṛ","Ṙ"=>"ṙ","Ṗ"=>"ṗ","Ṕ"=>"ṕ","Ṓ"=>"ṓ","Ṑ"=>"ṑ", 1108 "Ṏ"=>"ṏ","Ṍ"=>"ṍ","Ṋ"=>"ṋ","Ṉ"=>"ṉ","Ṇ"=>"ṇ","Ṅ"=>"ṅ","Ṃ"=>"ṃ","Ṁ"=>"ṁ","Ḿ"=>"ḿ","Ḽ"=>"ḽ", 1109 "Ḻ"=>"ḻ","Ḹ"=>"ḹ","Ḷ"=>"ḷ","Ḵ"=>"ḵ","Ḳ"=>"ḳ","Ḱ"=>"ḱ","Ḯ"=>"ḯ","Ḭ"=>"ḭ","Ḫ"=>"ḫ","Ḩ"=>"ḩ", 1110 "Ḧ"=>"ḧ","Ḥ"=>"ḥ","Ḣ"=>"ḣ","Ḡ"=>"ḡ","Ḟ"=>"ḟ","Ḝ"=>"ḝ","Ḛ"=>"ḛ","Ḙ"=>"ḙ","Ḗ"=>"ḗ","Ḕ"=>"ḕ", 1111 "Ḓ"=>"ḓ","Ḑ"=>"ḑ","Ḏ"=>"ḏ","Ḍ"=>"ḍ","Ḋ"=>"ḋ","Ḉ"=>"ḉ","Ḇ"=>"ḇ","Ḅ"=>"ḅ","Ḃ"=>"ḃ","Ḁ"=>"ḁ", 1112 "Ֆ"=>"ֆ","Օ"=>"օ","Ք"=>"ք","Փ"=>"փ","Ւ"=>"ւ","Ց"=>"ց","Ր"=>"ր","Տ"=>"տ","Վ"=>"վ","Ս"=>"ս", 1113 "Ռ"=>"ռ","Ջ"=>"ջ","Պ"=>"պ","Չ"=>"չ","Ո"=>"ո","Շ"=>"շ","Ն"=>"ն","Յ"=>"յ","Մ"=>"մ","Ճ"=>"ճ", 1114 "Ղ"=>"ղ","Ձ"=>"ձ","Հ"=>"հ","Կ"=>"կ","Ծ"=>"ծ","Խ"=>"խ","Լ"=>"լ","Ի"=>"ի","Ժ"=>"ժ","Թ"=>"թ", 1115 "Ը"=>"ը","Է"=>"է","Զ"=>"զ","Ե"=>"ե","Դ"=>"դ","Գ"=>"գ","Բ"=>"բ","Ա"=>"ա","Ԏ"=>"ԏ","Ԍ"=>"ԍ", 1116 "Ԋ"=>"ԋ","Ԉ"=>"ԉ","Ԇ"=>"ԇ","Ԅ"=>"ԅ","Ԃ"=>"ԃ","Ԁ"=>"ԁ","Ӹ"=>"ӹ","Ӵ"=>"ӵ","Ӳ"=>"ӳ","Ӱ"=>"ӱ", 1117 "Ӯ"=>"ӯ","Ӭ"=>"ӭ","Ӫ"=>"ӫ","Ө"=>"ө","Ӧ"=>"ӧ","Ӥ"=>"ӥ","Ӣ"=>"ӣ","Ӡ"=>"ӡ","Ӟ"=>"ӟ","Ӝ"=>"ӝ", 1118 "Ӛ"=>"ӛ","Ә"=>"ә","Ӗ"=>"ӗ","Ӕ"=>"ӕ","Ӓ"=>"ӓ","Ӑ"=>"ӑ","Ӎ"=>"ӎ","Ӌ"=>"ӌ","Ӊ"=>"ӊ","Ӈ"=>"ӈ", 1119 "Ӆ"=>"ӆ","Ӄ"=>"ӄ","Ӂ"=>"ӂ","Ҿ"=>"ҿ","Ҽ"=>"ҽ","Һ"=>"һ","Ҹ"=>"ҹ","Ҷ"=>"ҷ","Ҵ"=>"ҵ","Ҳ"=>"ҳ", 1120 "Ұ"=>"ұ","Ү"=>"ү","Ҭ"=>"ҭ","Ҫ"=>"ҫ","Ҩ"=>"ҩ","Ҧ"=>"ҧ","Ҥ"=>"ҥ","Ң"=>"ң","Ҡ"=>"ҡ","Ҟ"=>"ҟ", 1121 "Ҝ"=>"ҝ","Қ"=>"қ","Ҙ"=>"ҙ","Җ"=>"җ","Ҕ"=>"ҕ","Ғ"=>"ғ","Ґ"=>"ґ","Ҏ"=>"ҏ","Ҍ"=>"ҍ","Ҋ"=>"ҋ", 1122 "Ҁ"=>"ҁ","Ѿ"=>"ѿ","Ѽ"=>"ѽ","Ѻ"=>"ѻ","Ѹ"=>"ѹ","Ѷ"=>"ѷ","Ѵ"=>"ѵ","Ѳ"=>"ѳ","Ѱ"=>"ѱ","Ѯ"=>"ѯ", 1123 "Ѭ"=>"ѭ","Ѫ"=>"ѫ","Ѩ"=>"ѩ","Ѧ"=>"ѧ","Ѥ"=>"ѥ","Ѣ"=>"ѣ","Ѡ"=>"ѡ","Џ"=>"џ","Ў"=>"ў","Ѝ"=>"ѝ", 1124 "Ќ"=>"ќ","Ћ"=>"ћ","Њ"=>"њ","Љ"=>"љ","Ј"=>"ј","Ї"=>"ї","І"=>"і","Ѕ"=>"ѕ","Є"=>"є","Ѓ"=>"ѓ", 1125 "Ђ"=>"ђ","Ё"=>"ё","Ѐ"=>"ѐ","Я"=>"я","Ю"=>"ю","Э"=>"э","Ь"=>"ь","Ы"=>"ы","Ъ"=>"ъ","Щ"=>"щ", 1126 "Ш"=>"ш","Ч"=>"ч","Ц"=>"ц","Х"=>"х","Ф"=>"ф","У"=>"у","Т"=>"т","С"=>"с","Р"=>"р","П"=>"п", 1127 "О"=>"о","Н"=>"н","М"=>"м","Л"=>"л","К"=>"к","Й"=>"й","И"=>"и","З"=>"з","Ж"=>"ж","Е"=>"е", 1128 "Д"=>"д","Г"=>"г","В"=>"в","Б"=>"б","А"=>"а","Ε"=>"ϵ","Σ"=>"ϲ","Ρ"=>"ϱ","Κ"=>"ϰ","Ϯ"=>"ϯ", 1129 "Ϭ"=>"ϭ","Ϫ"=>"ϫ","Ϩ"=>"ϩ","Ϧ"=>"ϧ","Ϥ"=>"ϥ","Ϣ"=>"ϣ","Ϡ"=>"ϡ","Ϟ"=>"ϟ","Ϝ"=>"ϝ","Ϛ"=>"ϛ", 1130 "Ϙ"=>"ϙ","Π"=>"ϖ","Φ"=>"ϕ","Θ"=>"ϑ","Β"=>"ϐ","Ώ"=>"ώ","Ύ"=>"ύ","Ό"=>"ό","Ϋ"=>"ϋ","Ϊ"=>"ϊ", 1131 "Ω"=>"ω","Ψ"=>"ψ","Χ"=>"χ","Φ"=>"φ","Υ"=>"υ","Τ"=>"τ","Σ"=>"σ","Σ"=>"ς","Ρ"=>"ρ","Π"=>"π", 1132 "Ο"=>"ο","Ξ"=>"ξ","Ν"=>"ν","Μ"=>"μ","Λ"=>"λ","Κ"=>"κ","Ι"=>"ι","Θ"=>"θ","Η"=>"η","Ζ"=>"ζ", 1133 "Ε"=>"ε","Δ"=>"δ","Γ"=>"γ","Β"=>"β","Α"=>"α","Ί"=>"ί","Ή"=>"ή","Έ"=>"έ","Ά"=>"ά","Ʒ"=>"ʒ", 1134 "Ʋ"=>"ʋ","Ʊ"=>"ʊ","Ʈ"=>"ʈ","Ʃ"=>"ʃ","Ʀ"=>"ʀ","Ɵ"=>"ɵ","Ɲ"=>"ɲ","Ɯ"=>"ɯ","Ɩ"=>"ɩ","Ɨ"=>"ɨ", 1135 "Ɣ"=>"ɣ","Ɛ"=>"ɛ","Ə"=>"ə","Ɗ"=>"ɗ","Ɖ"=>"ɖ","Ɔ"=>"ɔ","Ɓ"=>"ɓ","Ȳ"=>"ȳ","Ȱ"=>"ȱ","Ȯ"=>"ȯ", 1136 "Ȭ"=>"ȭ","Ȫ"=>"ȫ","Ȩ"=>"ȩ","Ȧ"=>"ȧ","Ȥ"=>"ȥ","Ȣ"=>"ȣ","Ȟ"=>"ȟ","Ȝ"=>"ȝ","Ț"=>"ț","Ș"=>"ș", 1137 "Ȗ"=>"ȗ","Ȕ"=>"ȕ","Ȓ"=>"ȓ","Ȑ"=>"ȑ","Ȏ"=>"ȏ","Ȍ"=>"ȍ","Ȋ"=>"ȋ","Ȉ"=>"ȉ","Ȇ"=>"ȇ","Ȅ"=>"ȅ", 1138 "Ȃ"=>"ȃ","Ȁ"=>"ȁ","Ǿ"=>"ǿ","Ǽ"=>"ǽ","Ǻ"=>"ǻ","Ǹ"=>"ǹ","Ǵ"=>"ǵ","Dz"=>"dz","Ǯ"=>"ǯ","Ǭ"=>"ǭ", 1139 "Ǫ"=>"ǫ","Ǩ"=>"ǩ","Ǧ"=>"ǧ","Ǥ"=>"ǥ","Ǣ"=>"ǣ","Ǡ"=>"ǡ","Ǟ"=>"ǟ","Ǝ"=>"ǝ","Ǜ"=>"ǜ","Ǚ"=>"ǚ", 1140 "Ǘ"=>"ǘ","Ǖ"=>"ǖ","Ǔ"=>"ǔ","Ǒ"=>"ǒ","Ǐ"=>"ǐ","Ǎ"=>"ǎ","Nj"=>"nj","Lj"=>"lj","Dž"=>"dž","Ƿ"=>"ƿ", 1141 "Ƽ"=>"ƽ","Ƹ"=>"ƹ","Ƶ"=>"ƶ","Ƴ"=>"ƴ","Ư"=>"ư","Ƭ"=>"ƭ","Ƨ"=>"ƨ","Ƥ"=>"ƥ","Ƣ"=>"ƣ","Ơ"=>"ơ", 1142 "Ƞ"=>"ƞ","Ƙ"=>"ƙ","Ƕ"=>"ƕ","Ƒ"=>"ƒ","Ƌ"=>"ƌ","Ƈ"=>"ƈ","Ƅ"=>"ƅ","Ƃ"=>"ƃ","S"=>"ſ","Ž"=>"ž", 1143 "Ż"=>"ż","Ź"=>"ź","Ŷ"=>"ŷ","Ŵ"=>"ŵ","Ų"=>"ų","Ű"=>"ű","Ů"=>"ů","Ŭ"=>"ŭ","Ū"=>"ū","Ũ"=>"ũ", 1144 "Ŧ"=>"ŧ","Ť"=>"ť","Ţ"=>"ţ","Š"=>"š","Ş"=>"ş","Ŝ"=>"ŝ","Ś"=>"ś","Ř"=>"ř","Ŗ"=>"ŗ","Ŕ"=>"ŕ", 1145 "Œ"=>"œ","Ő"=>"ő","Ŏ"=>"ŏ","Ō"=>"ō","Ŋ"=>"ŋ","Ň"=>"ň","Ņ"=>"ņ","Ń"=>"ń","Ł"=>"ł","Ŀ"=>"ŀ", 1146 "Ľ"=>"ľ","Ļ"=>"ļ","Ĺ"=>"ĺ","Ķ"=>"ķ","Ĵ"=>"ĵ","IJ"=>"ij","I"=>"ı","Į"=>"į","Ĭ"=>"ĭ","Ī"=>"ī", 1147 "Ĩ"=>"ĩ","Ħ"=>"ħ","Ĥ"=>"ĥ","Ģ"=>"ģ","Ġ"=>"ġ","Ğ"=>"ğ","Ĝ"=>"ĝ","Ě"=>"ě","Ę"=>"ę","Ė"=>"ė", 1148 "Ĕ"=>"ĕ","Ē"=>"ē","Đ"=>"đ","Ď"=>"ď","Č"=>"č","Ċ"=>"ċ","Ĉ"=>"ĉ","Ć"=>"ć","Ą"=>"ą","Ă"=>"ă", 1149 "Ā"=>"ā","Ÿ"=>"ÿ","Þ"=>"þ","Ý"=>"ý","Ü"=>"ü","Û"=>"û","Ú"=>"ú","Ù"=>"ù","Ø"=>"ø","Ö"=>"ö", 1150 "Õ"=>"õ","Ô"=>"ô","Ó"=>"ó","Ò"=>"ò","Ñ"=>"ñ","Ð"=>"ð","Ï"=>"ï","Î"=>"î","Í"=>"í","Ì"=>"ì", 1151 "Ë"=>"ë","Ê"=>"ê","É"=>"é","È"=>"è","Ç"=>"ç","Æ"=>"æ","Å"=>"å","Ä"=>"ä","Ã"=>"ã","Â"=>"â", 1152 "Á"=>"á","À"=>"à","Μ"=>"µ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t", 1153 "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j", 1154 "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a" 1155 ); 1156}; // end of case lookup tables 1157 1158/** 1159 * UTF-8 lookup table for lower case accented letters 1160 * 1161 * This lookuptable defines replacements for accented characters from the ASCII-7 1162 * range. This are lower case letters only. 1163 * 1164 * @author Andreas Gohr <andi@splitbrain.org> 1165 * @see utf8_deaccent() 1166 */ 1167global $UTF8_LOWER_ACCENTS; 1168if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array( 1169 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 1170 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 1171 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 1172 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 1173 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 1174 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 1175 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 1176 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 1177 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 1178 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 1179 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 1180 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 1181 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 1182 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 1183 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', 1184); 1185 1186/** 1187 * UTF-8 lookup table for upper case accented letters 1188 * 1189 * This lookuptable defines replacements for accented characters from the ASCII-7 1190 * range. This are upper case letters only. 1191 * 1192 * @author Andreas Gohr <andi@splitbrain.org> 1193 * @see utf8_deaccent() 1194 */ 1195global $UTF8_UPPER_ACCENTS; 1196if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array( 1197 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', 1198 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 1199 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', 1200 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', 1201 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', 1202 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', 1203 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', 1204 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', 1205 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 1206 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', 1207 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', 1208 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 1209 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 1210 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', 1211 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', 1212); 1213 1214/** 1215 * UTF-8 array of common special characters 1216 * 1217 * This array should contain all special characters (not a letter or digit) 1218 * defined in the various local charsets - it's not a complete list of non-alphanum 1219 * characters in UTF-8. It's not perfect but should match most cases of special 1220 * chars. 1221 * 1222 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 1223 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 1224 * 1225 * @author Andreas Gohr <andi@splitbrain.org> 1226 * @see utf8_stripspecials() 1227 */ 1228global $UTF8_SPECIAL_CHARS; 1229if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array( 1230 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 1231 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 1232 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 1233 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 1234 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 1235 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 1236 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 1237 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 1238 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 1239 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 1240 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 1241 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 1242 0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 1243 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 1244 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 1245 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 1246 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 1247 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 1248 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 1249 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 1250 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 1251 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 1252 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 1253 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 1254 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 1255 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 1256 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 1257 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 1258 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 1259 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 1260 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 1261 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 1262 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 1263 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 1264 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 1265 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 1266 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 1267 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 1268 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 1269 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 1270 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 1271 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 1272 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 1273 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 1274 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 1275 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 1276 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017, 1277 0x3018, 0x3019, 0x301a, 0x301b, 0x3036, 1278 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 1279 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 1280 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 1281 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 1282 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 1283 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 1284 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c, 1285 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b, 1286 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 1287 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea, 1288 0xffeb, 0xffec, 0xffed, 0xffee, 1289 0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703, 1290 0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b, 1291 0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713, 1292 0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b, 1293 0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf, 1294); 1295 1296// utf8 version of above data 1297global $UTF8_SPECIAL_CHARS2; 1298if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 = 1299 "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~ �'. 1300 '� ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½�'. 1301 '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. 1302 '�ׁׂ׃׳״،؛؟ـًٌٍَُِّْ٪฿–—―‗‘’‚“”�'. 1303 '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'. 1304 '⇐⇑⇒⇓⇔∀∂∃∅∆∇∈∉∋∏∑−∕∗∙√∝∞∠∧∨�'. 1305 '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'. 1306 '��┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠'. 1307 '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌▐░▒▓■▲▼◆◊●�'. 1308 '�★☎☛☞♠♣♥♦✁✂✃✄✆✇✈✉✌✍✎✏✐✑✒✓✔✕�'. 1309 '��✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✩✪✫✬✭✮✯✰✱'. 1310 '✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋�'. 1311 '�❏❐❑❒❖❘❙❚❛❜❝❞❡❢❣❤❥❦❧❿➉➓➔➘➙➚�'. 1312 '��➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶'. 1313 '➷➸➹➺➻➼➽➾'. 1314 ' 、。〃〈〉《》「」『』【】〒〔〕〖〗〘〙〚〛〶'. 1315 '�'. 1316 '�ﹼﹽ'. 1317 '!"#$%&'()*+,-./:;<=>?@[\]^`{|}~'. 1318 '⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○'. 1319 ''. 1320 ' '; 1321 1322/** 1323 * Romanization lookup table 1324 * 1325 * This lookup tables provides a way to transform strings written in a language 1326 * different from the ones based upon latin letters into plain ASCII. 1327 * 1328 * Please note: this is not a scientific transliteration table. It only works 1329 * oneway from nonlatin to ASCII and it works by simple character replacement 1330 * only. Specialities of each language are not supported. 1331 * 1332 * @author Andreas Gohr <andi@splitbrain.org> 1333 * @author Vitaly Blokhin <vitinfo@vitn.com> 1334 * @link http://www.uconv.com/translit.htm 1335 * @author Bisqwit <bisqwit@iki.fi> 1336 * @link http://kanjidict.stc.cx/hiragana.php?src=2 1337 * @link http://www.translatum.gr/converter/greek-transliteration.htm 1338 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 1339 * @link http://www.btranslations.com/resources/romanization/korean.asp 1340 * @author Arthit Suriyawongkul <arthit@gmail.com> 1341 * @author Denis Scheither <amorphis@uni-bremen.de> 1342 * @author Eivind Morland <eivind.morland@gmail.com> 1343 */ 1344global $UTF8_ROMANIZATION; 1345if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array( 1346 // scandinavian - differs from what we do in deaccent 1347 'å'=>'a','Å'=>'A','ä'=>'a','Ä'=>'A','ö'=>'o','Ö'=>'O', 1348 1349 //russian cyrillic 1350 'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G', 1351 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh', 1352 'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K', 1353 'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Н'=>'N','о'=>'o','О'=>'O', 1354 'п'=>'p','П'=>'P','р'=>'r','Р'=>'R','с'=>'s','С'=>'S','т'=>'t','Т'=>'T', 1355 'у'=>'u','У'=>'U','ф'=>'f','Ф'=>'F','х'=>'x','Х'=>'X','ц'=>'c','Ц'=>'C', 1356 'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'sch','Щ'=>'Sch','ъ'=>'', 1357 'Ъ'=>'','ы'=>'y','Ы'=>'Y','ь'=>'','Ь'=>'','э'=>'eh','Э'=>'Eh','ю'=>'ju', 1358 'Ю'=>'Ju','я'=>'ja','Я'=>'Ja', 1359 // Ukrainian cyrillic 1360 'Ґ'=>'Gh','ґ'=>'gh','Є'=>'Je','є'=>'je','І'=>'I','і'=>'i','Ї'=>'Ji','ї'=>'ji', 1361 // Georgian 1362 'ა'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th', 1363 'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','ო'=>'o','პ'=>'p','ჟ'=>'zh', 1364 'რ'=>'r','ს'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ქ'=>'kh','ღ'=>'gh','ყ'=>'q', 1365 'შ'=>'sh','ჩ'=>'ch','ც'=>'c','ძ'=>'dh','წ'=>'w','ჭ'=>'j','ხ'=>'x','ჯ'=>'jh', 1366 'ჰ'=>'xh', 1367 //Sanskrit 1368 'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry', 1369 'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','ए'=>'e','ऐ'=>'ay','ओ'=>'o','औ'=>'aw', 1370 'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh', 1371 'च'=>'c','छ'=>'ch','ज'=>'j','झ'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh', 1372 'ड'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','थ'=>'th','द'=>'d','ध'=>'dh', 1373 'न'=>'n','प'=>'p','फ'=>'ph','ब'=>'b','भ'=>'bh','म'=>'m','य'=>'z','र'=>'r', 1374 'ल'=>'l','व'=>'v','श'=>'sh','ष'=>'sqh','स'=>'s','ह'=>'x', 1375 //Sanskrit diacritics 1376 'Ā'=>'A','Ī'=>'I','Ū'=>'U','Ṛ'=>'R','Ṝ'=>'R','Ṅ'=>'N','Ñ'=>'N','Ṭ'=>'T', 1377 'Ḍ'=>'D','Ṇ'=>'N','Ś'=>'S','Ṣ'=>'S','Ṁ'=>'M','Ṃ'=>'M','Ḥ'=>'H','Ḷ'=>'L','Ḹ'=>'L', 1378 'ā'=>'a','ī'=>'i','ū'=>'u','ṛ'=>'r','ṝ'=>'r','ṅ'=>'n','ñ'=>'n','ṭ'=>'t', 1379 'ḍ'=>'d','ṇ'=>'n','ś'=>'s','ṣ'=>'s','ṁ'=>'m','ṃ'=>'m','ḥ'=>'h','ḷ'=>'l','ḹ'=>'l', 1380 //Hebrew 1381 'א'=>'a', 'ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'kh','ט'=>'th', 1382 'י'=>'y','ך'=>'h','כ'=>'k','ל'=>'l','ם'=>'m','מ'=>'m','ן'=>'n','נ'=>'n', 1383 'ס'=>'s','ע'=>'ah','ף'=>'f','פ'=>'p','ץ'=>'c','צ'=>'c','ק'=>'q','ר'=>'r', 1384 'ש'=>'sh','ת'=>'t', 1385 //Arabic 1386 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'g','ح'=>'xh','خ'=>'x','د'=>'d', 1387 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s\'','ض'=>'d\'', 1388 'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k', 1389 'ل'=>'l','م'=>'m','ن'=>'n','ه'=>'x\'','و'=>'u','ي'=>'i', 1390 1391 // Japanese characters (last update: 2008-05-09) 1392 1393 // Japanese hiragana 1394 1395 // 3 character syllables, っ doubles the consonant after 1396 'っちゃ'=>'ccha','っちぇ'=>'cche','っちょ'=>'ccho','っちゅ'=>'cchu', 1397 'っびゃ'=>'bbya','っびぇ'=>'bbye','っびぃ'=>'bbyi','っびょ'=>'bbyo','っびゅ'=>'bbyu', 1398 'っぴゃ'=>'ppya','っぴぇ'=>'ppye','っぴぃ'=>'ppyi','っぴょ'=>'ppyo','っぴゅ'=>'ppyu', 1399 'っちゃ'=>'ccha','っちぇ'=>'cche','っち'=>'cchi','っちょ'=>'ccho','っちゅ'=>'cchu', 1400 // 'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu', 1401 'っきゃ'=>'kkya','っきぇ'=>'kkye','っきぃ'=>'kkyi','っきょ'=>'kkyo','っきゅ'=>'kkyu', 1402 'っぎゃ'=>'ggya','っぎぇ'=>'ggye','っぎぃ'=>'ggyi','っぎょ'=>'ggyo','っぎゅ'=>'ggyu', 1403 'っみゃ'=>'mmya','っみぇ'=>'mmye','っみぃ'=>'mmyi','っみょ'=>'mmyo','っみゅ'=>'mmyu', 1404 'っにゃ'=>'nnya','っにぇ'=>'nnye','っにぃ'=>'nnyi','っにょ'=>'nnyo','っにゅ'=>'nnyu', 1405 'っりゃ'=>'rrya','っりぇ'=>'rrye','っりぃ'=>'rryi','っりょ'=>'rryo','っりゅ'=>'rryu', 1406 'っしゃ'=>'ssha','っしぇ'=>'sshe','っし'=>'sshi','っしょ'=>'ssho','っしゅ'=>'sshu', 1407 1408 // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway) 1409 'んあ'=>'n_a','んえ'=>'n_e','んい'=>'n_i','んお'=>'n_o','んう'=>'n_u', 1410 'んや'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu', 1411 1412 // 2 character syllables - normal 1413 'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo', 1414 'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu', 1415 'ひゃ'=>'hya','ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu', 1416 'びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu', 1417 'ぴゃ'=>'pya','ぴぇ'=>'pye','ぴぃ'=>'pyi','ぴょ'=>'pyo','ぴゅ'=>'pyu', 1418 'きゃ'=>'kya','きぇ'=>'kye','きぃ'=>'kyi','きょ'=>'kyo','きゅ'=>'kyu', 1419 'ぎゃ'=>'gya','ぎぇ'=>'gye','ぎぃ'=>'gyi','ぎょ'=>'gyo','ぎゅ'=>'gyu', 1420 'みゃ'=>'mya','みぇ'=>'mye','みぃ'=>'myi','みょ'=>'myo','みゅ'=>'myu', 1421 'にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu', 1422 'りゃ'=>'rya','りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu', 1423 'しゃ'=>'sha','しぇ'=>'she','し'=>'shi','しょ'=>'sho','しゅ'=>'shu', 1424 'じゃ'=>'ja','じぇ'=>'je','じょ'=>'jo','じゅ'=>'ju', 1425 'うぇ'=>'we','うぃ'=>'wi', 1426 'いぇ'=>'ye', 1427 1428 // 2 character syllables, っ doubles the consonant after 1429 'っば'=>'bba','っべ'=>'bbe','っび'=>'bbi','っぼ'=>'bbo','っぶ'=>'bbu', 1430 'っぱ'=>'ppa','っぺ'=>'ppe','っぴ'=>'ppi','っぽ'=>'ppo','っぷ'=>'ppu', 1431 'った'=>'tta','って'=>'tte','っち'=>'cchi','っと'=>'tto','っつ'=>'ttsu', 1432 'っだ'=>'dda','っで'=>'dde','っぢ'=>'ddi','っど'=>'ddo','っづ'=>'ddu', 1433 'っが'=>'gga','っげ'=>'gge','っぎ'=>'ggi','っご'=>'ggo','っぐ'=>'ggu', 1434 'っか'=>'kka','っけ'=>'kke','っき'=>'kki','っこ'=>'kko','っく'=>'kku', 1435 'っま'=>'mma','っめ'=>'mme','っみ'=>'mmi','っも'=>'mmo','っむ'=>'mmu', 1436 'っな'=>'nna','っね'=>'nne','っに'=>'nni','っの'=>'nno','っぬ'=>'nnu', 1437 'っら'=>'rra','っれ'=>'rre','っり'=>'rri','っろ'=>'rro','っる'=>'rru', 1438 'っさ'=>'ssa','っせ'=>'sse','っし'=>'sshi','っそ'=>'sso','っす'=>'ssu', 1439 'っざ'=>'zza','っぜ'=>'zze','っじ'=>'jji','っぞ'=>'zzo','っず'=>'zzu', 1440 1441 // 1 character syllabels 1442 'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ん'=>'n', 1443 'は'=>'ha','へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'fu', 1444 'ば'=>'ba','べ'=>'be','び'=>'bi','ぼ'=>'bo','ぶ'=>'bu', 1445 'ぱ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po','ぷ'=>'pu', 1446 'た'=>'ta','て'=>'te','ち'=>'chi','と'=>'to','つ'=>'tsu', 1447 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1448 'が'=>'ga','げ'=>'ge','ぎ'=>'gi','ご'=>'go','ぐ'=>'gu', 1449 'か'=>'ka','け'=>'ke','き'=>'ki','こ'=>'ko','く'=>'ku', 1450 'ま'=>'ma','め'=>'me','み'=>'mi','も'=>'mo','む'=>'mu', 1451 'な'=>'na','ね'=>'ne','に'=>'ni','の'=>'no','ぬ'=>'nu', 1452 'ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru', 1453 'さ'=>'sa','せ'=>'se','し'=>'shi','そ'=>'so','す'=>'su', 1454 'わ'=>'wa','を'=>'wo', 1455 'ざ'=>'za','ぜ'=>'ze','じ'=>'ji','ぞ'=>'zo','ず'=>'zu', 1456 'や'=>'ya','よ'=>'yo','ゆ'=>'yu', 1457 // old characters 1458 'ゑ'=>'we','ゐ'=>'wi', 1459 1460 // convert what's left (probably only kicks in when something's missing above) 1461 // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u', 1462 // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu', 1463 1464 // never seen one of those (disabled for the moment) 1465 // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu', 1466 // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu', 1467 // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu', 1468 // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu', 1469 // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu', 1470 // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu', 1471 // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu', 1472 // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu', 1473 // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu', 1474 // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu', 1475 // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu', 1476 // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu', 1477 // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu', 1478 // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu', 1479 1480 // 'spare' characters from other romanization systems 1481 // 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1482 // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu', 1483 // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su', 1484 // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu', 1485 //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu', 1486 //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu', 1487 //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu', 1488 //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu', 1489 //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi', 1490 //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju', 1491 1492 1493 // Japanese katakana 1494 1495 // 4 character syllables: ッ doubles the consonant after, ー doubles the vowel before (usualy written with macron, but we don't want that in our URLs) 1496 'ッビャー'=>'bbyaa','ッビェー'=>'bbyee','ッビィー'=>'bbyii','ッビョー'=>'bbyoo','ッビュー'=>'bbyuu', 1497 'ッピャー'=>'ppyaa','ッピェー'=>'ppyee','ッピィー'=>'ppyii','ッピョー'=>'ppyoo','ッピュー'=>'ppyuu', 1498 'ッキャー'=>'kkyaa','ッキェー'=>'kkyee','ッキィー'=>'kkyii','ッキョー'=>'kkyoo','ッキュー'=>'kkyuu', 1499 'ッギャー'=>'ggyaa','ッギェー'=>'ggyee','ッギィー'=>'ggyii','ッギョー'=>'ggyoo','ッギュー'=>'ggyuu', 1500 'ッミャー'=>'mmyaa','ッミェー'=>'mmyee','ッミィー'=>'mmyii','ッミョー'=>'mmyoo','ッミュー'=>'mmyuu', 1501 'ッニャー'=>'nnyaa','ッニェー'=>'nnyee','ッニィー'=>'nnyii','ッニョー'=>'nnyoo','ッニュー'=>'nnyuu', 1502 'ッリャー'=>'rryaa','ッリェー'=>'rryee','ッリィー'=>'rryii','ッリョー'=>'rryoo','ッリュー'=>'rryuu', 1503 'ッシャー'=>'sshaa','ッシェー'=>'sshee','ッシー'=>'sshii','ッショー'=>'sshoo','ッシュー'=>'sshuu', 1504 'ッチャー'=>'cchaa','ッチェー'=>'cchee','ッチー'=>'cchii','ッチョー'=>'cchoo','ッチュー'=>'cchuu', 1505 'ッティー'=>'ttii', 1506 'ッヂィー'=>'ddii', 1507 1508 // 3 character syllables - doubled vowels 1509 'ファー'=>'faa','フェー'=>'fee','フィー'=>'fii','フォー'=>'foo', 1510 'フャー'=>'fyaa','フェー'=>'fyee','フィー'=>'fyii','フョー'=>'fyoo','フュー'=>'fyuu', 1511 'ヒャー'=>'hyaa','ヒェー'=>'hyee','ヒィー'=>'hyii','ヒョー'=>'hyoo','ヒュー'=>'hyuu', 1512 'ビャー'=>'byaa','ビェー'=>'byee','ビィー'=>'byii','ビョー'=>'byoo','ビュー'=>'byuu', 1513 'ピャー'=>'pyaa','ピェー'=>'pyee','ピィー'=>'pyii','ピョー'=>'pyoo','ピュー'=>'pyuu', 1514 'キャー'=>'kyaa','キェー'=>'kyee','キィー'=>'kyii','キョー'=>'kyoo','キュー'=>'kyuu', 1515 'ギャー'=>'gyaa','ギェー'=>'gyee','ギィー'=>'gyii','ギョー'=>'gyoo','ギュー'=>'gyuu', 1516 'ミャー'=>'myaa','ミェー'=>'myee','ミィー'=>'myii','ミョー'=>'myoo','ミュー'=>'myuu', 1517 'ニャー'=>'nyaa','ニェー'=>'nyee','ニィー'=>'nyii','ニョー'=>'nyoo','ニュー'=>'nyuu', 1518 'リャー'=>'ryaa','リェー'=>'ryee','リィー'=>'ryii','リョー'=>'ryoo','リュー'=>'ryuu', 1519 'シャー'=>'shaa','シェー'=>'shee','シー'=>'shii','ショー'=>'shoo','シュー'=>'shuu', 1520 'ジャー'=>'jaa','ジェー'=>'jee','ジー'=>'jii','ジョー'=>'joo','ジュー'=>'juu', 1521 'スァー'=>'swaa','スェー'=>'swee','スィー'=>'swii','スォー'=>'swoo','スゥー'=>'swuu', 1522 'デァー'=>'daa','デェー'=>'dee','ディー'=>'dii','デォー'=>'doo','デゥー'=>'duu', 1523 'チャー'=>'chaa','チェー'=>'chee','チー'=>'chii','チョー'=>'choo','チュー'=>'chuu', 1524 'ヂャー'=>'dyaa','ヂェー'=>'dyee','ヂィー'=>'dyii','ヂョー'=>'dyoo','ヂュー'=>'dyuu', 1525 'ツャー'=>'tsaa','ツェー'=>'tsee','ツィー'=>'tsii','ツョー'=>'tsoo','ツー'=>'tsuu', 1526 'トァー'=>'twaa','トェー'=>'twee','トィー'=>'twii','トォー'=>'twoo','トゥー'=>'twuu', 1527 'ドァー'=>'dwaa','ドェー'=>'dwee','ドィー'=>'dwii','ドォー'=>'dwoo','ドゥー'=>'dwuu', 1528 'ウァー'=>'whaa','ウェー'=>'whee','ウィー'=>'whii','ウォー'=>'whoo','ウゥー'=>'whuu', 1529 'ヴャー'=>'vyaa','ヴェー'=>'vyee','ヴィー'=>'vyii','ヴョー'=>'vyoo','ヴュー'=>'vyuu', 1530 'ヴァー'=>'vaa','ヴェー'=>'vee','ヴィー'=>'vii','ヴォー'=>'voo','ヴー'=>'vuu', 1531 'ウェー'=>'wee','ウィー'=>'wii', 1532 'イェー'=>'yee', 1533 'ティー'=>'tii', 1534 'ヂィー'=>'dii', 1535 1536 // 3 character syllables - doubled consonants 1537 'ッビャ'=>'bbya','ッビェ'=>'bbye','ッビィ'=>'bbyi','ッビョ'=>'bbyo','ッビュ'=>'bbyu', 1538 'ッピャ'=>'ppya','ッピェ'=>'ppye','ッピィ'=>'ppyi','ッピョ'=>'ppyo','ッピュ'=>'ppyu', 1539 'ッキャ'=>'kkya','ッキェ'=>'kkye','ッキィ'=>'kkyi','ッキョ'=>'kkyo','ッキュ'=>'kkyu', 1540 'ッギャ'=>'ggya','ッギェ'=>'ggye','ッギィ'=>'ggyi','ッギョ'=>'ggyo','ッギュ'=>'ggyu', 1541 'ッミャ'=>'mmya','ッミェ'=>'mmye','ッミィ'=>'mmyi','ッミョ'=>'mmyo','ッミュ'=>'mmyu', 1542 'ッニャ'=>'nnya','ッニェ'=>'nnye','ッニィ'=>'nnyi','ッニョ'=>'nnyo','ッニュ'=>'nnyu', 1543 'ッリャ'=>'rrya','ッリェ'=>'rrye','ッリィ'=>'rryi','ッリョ'=>'rryo','ッリュ'=>'rryu', 1544 'ッシャ'=>'ssha','ッシェ'=>'sshe','ッシ'=>'sshi','ッショ'=>'ssho','ッシュ'=>'sshu', 1545 'ッチャ'=>'ccha','ッチェ'=>'cche','ッチ'=>'cchi','ッチョ'=>'ccho','ッチュ'=>'cchu', 1546 'ッティ'=>'tti', 1547 'ッヂィ'=>'ddi', 1548 1549 // 3 character syllables - doubled vowel and consonants 1550 'ッバー'=>'bbaa','ッベー'=>'bbee','ッビー'=>'bbii','ッボー'=>'bboo','ッブー'=>'bbuu', 1551 'ッパー'=>'ppaa','ッペー'=>'ppee','ッピー'=>'ppii','ッポー'=>'ppoo','ップー'=>'ppuu', 1552 'ッケー'=>'kkee','ッキー'=>'kkii','ッコー'=>'kkoo','ックー'=>'kkuu','ッカー'=>'kkaa', 1553 'ッガー'=>'ggaa','ッゲー'=>'ggee','ッギー'=>'ggii','ッゴー'=>'ggoo','ッグー'=>'gguu', 1554 'ッマー'=>'maa','ッメー'=>'mee','ッミー'=>'mii','ッモー'=>'moo','ッムー'=>'muu', 1555 'ッナー'=>'nnaa','ッネー'=>'nnee','ッニー'=>'nnii','ッノー'=>'nnoo','ッヌー'=>'nnuu', 1556 'ッラー'=>'rraa','ッレー'=>'rree','ッリー'=>'rrii','ッロー'=>'rroo','ッルー'=>'rruu', 1557 'ッサー'=>'ssaa','ッセー'=>'ssee','ッシー'=>'sshii','ッソー'=>'ssoo','ッスー'=>'ssuu', 1558 'ッザー'=>'zzaa','ッゼー'=>'zzee','ッジー'=>'jjii','ッゾー'=>'zzoo','ッズー'=>'zzuu', 1559 'ッター'=>'ttaa','ッテー'=>'ttee','ッチー'=>'chii','ットー'=>'ttoo','ッツー'=>'ttsuu', 1560 'ッダー'=>'ddaa','ッデー'=>'ddee','ッヂー'=>'ddii','ッドー'=>'ddoo','ッヅー'=>'dduu', 1561 1562 // 2 character syllables - normal 1563 'ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フゥ'=>'fu', 1564 // 'フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo','フュ'=>'fyu', 1565 'フャ'=>'fa','フェ'=>'fe','フィ'=>'fi','フョ'=>'fo','フュ'=>'fu', 1566 'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu', 1567 'ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo','ビュ'=>'byu', 1568 'ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu', 1569 'キャ'=>'kya','キェ'=>'kye','キィ'=>'kyi','キョ'=>'kyo','キュ'=>'kyu', 1570 'ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu', 1571 'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu', 1572 'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu', 1573 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu', 1574 'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu', 1575 'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju', 1576 'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu', 1577 'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du', 1578 'チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu', 1579 // 'ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi','ヂョ'=>'dyo','ヂュ'=>'dyu', 1580 'ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu', 1581 'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu', 1582 'ドァ'=>'dwa','ドェ'=>'dwe','ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu', 1583 'ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi','ウォ'=>'who','ウゥ'=>'whu', 1584 'ヴャ'=>'vya','ヴェ'=>'vye','ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu', 1585 'ヴァ'=>'va','ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu', 1586 'ウェ'=>'we','ウィ'=>'wi', 1587 'イェ'=>'ye', 1588 'ティ'=>'ti', 1589 'ヂィ'=>'di', 1590 1591 // 2 character syllables - doubled vocal 1592 'アー'=>'aa','エー'=>'ee','イー'=>'ii','オー'=>'oo','ウー'=>'uu', 1593 'ダー'=>'daa','デー'=>'dee','ヂー'=>'dii','ドー'=>'doo','ヅー'=>'duu', 1594 'ハー'=>'haa','ヘー'=>'hee','ヒー'=>'hii','ホー'=>'hoo','フー'=>'fuu', 1595 'バー'=>'baa','ベー'=>'bee','ビー'=>'bii','ボー'=>'boo','ブー'=>'buu', 1596 'パー'=>'paa','ペー'=>'pee','ピー'=>'pii','ポー'=>'poo','プー'=>'puu', 1597 'ケー'=>'kee','キー'=>'kii','コー'=>'koo','クー'=>'kuu','カー'=>'kaa', 1598 'ガー'=>'gaa','ゲー'=>'gee','ギー'=>'gii','ゴー'=>'goo','グー'=>'guu', 1599 'マー'=>'maa','メー'=>'mee','ミー'=>'mii','モー'=>'moo','ムー'=>'muu', 1600 'ナー'=>'naa','ネー'=>'nee','ニー'=>'nii','ノー'=>'noo','ヌー'=>'nuu', 1601 'ラー'=>'raa','レー'=>'ree','リー'=>'rii','ロー'=>'roo','ルー'=>'ruu', 1602 'サー'=>'saa','セー'=>'see','シー'=>'shii','ソー'=>'soo','スー'=>'suu', 1603 'ザー'=>'zaa','ゼー'=>'zee','ジー'=>'jii','ゾー'=>'zoo','ズー'=>'zuu', 1604 'ター'=>'taa','テー'=>'tee','チー'=>'chii','トー'=>'too','ツー'=>'tsuu', 1605 'ワー'=>'waa','ヲー'=>'woo', 1606 'ヤー'=>'yaa','ヨー'=>'yoo','ユー'=>'yuu', 1607 'ヵー'=>'kaa','ヶー'=>'kee', 1608 // old characters 1609 'ヱー'=>'wee','ヰー'=>'wii', 1610 1611 // seperate katakana 'n' 1612 'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u', 1613 'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンユ'=>'n_yu', 1614 1615 // 2 character syllables - doubled consonants 1616 'ッバ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu', 1617 'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッポ'=>'ppo','ップ'=>'ppu', 1618 'ッケ'=>'kke','ッキ'=>'kki','ッコ'=>'kko','ック'=>'kku','ッカ'=>'kka', 1619 'ッガ'=>'gga','ッゲ'=>'gge','ッギ'=>'ggi','ッゴ'=>'ggo','ッグ'=>'ggu', 1620 'ッマ'=>'ma','ッメ'=>'me','ッミ'=>'mi','ッモ'=>'mo','ッム'=>'mu', 1621 'ッナ'=>'nna','ッネ'=>'nne','ッニ'=>'nni','ッノ'=>'nno','ッヌ'=>'nnu', 1622 'ッラ'=>'rra','ッレ'=>'rre','ッリ'=>'rri','ッロ'=>'rro','ッル'=>'rru', 1623 'ッサ'=>'ssa','ッセ'=>'sse','ッシ'=>'sshi','ッソ'=>'sso','ッス'=>'ssu', 1624 'ッザ'=>'zza','ッゼ'=>'zze','ッジ'=>'jji','ッゾ'=>'zzo','ッズ'=>'zzu', 1625 'ッタ'=>'tta','ッテ'=>'tte','ッチ'=>'cchi','ット'=>'tto','ッツ'=>'ttsu', 1626 'ッダ'=>'dda','ッデ'=>'dde','ッヂ'=>'ddi','ッド'=>'ddo','ッヅ'=>'ddu', 1627 1628 // 1 character syllables 1629 'ア'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','ン'=>'n', 1630 'ハ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho','フ'=>'fu', 1631 'バ'=>'ba','ベ'=>'be','ビ'=>'bi','ボ'=>'bo','ブ'=>'bu', 1632 'パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ポ'=>'po','プ'=>'pu', 1633 'ケ'=>'ke','キ'=>'ki','コ'=>'ko','ク'=>'ku','カ'=>'ka', 1634 'ガ'=>'ga','ゲ'=>'ge','ギ'=>'gi','ゴ'=>'go','グ'=>'gu', 1635 'マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu', 1636 'ナ'=>'na','ネ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu', 1637 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ロ'=>'ro','ル'=>'ru', 1638 'サ'=>'sa','セ'=>'se','シ'=>'shi','ソ'=>'so','ス'=>'su', 1639 'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu', 1640 'タ'=>'ta','テ'=>'te','チ'=>'chi','ト'=>'to','ツ'=>'tsu', 1641 'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du', 1642 'ワ'=>'wa','ヲ'=>'wo', 1643 'ヤ'=>'ya','ヨ'=>'yo','ユ'=>'yu', 1644 'ヵ'=>'ka','ヶ'=>'ke', 1645 // old characters 1646 'ヱ'=>'we','ヰ'=>'wi', 1647 1648 // convert what's left (probably only kicks in when something's missing above) 1649 'ァ'=>'a','ェ'=>'e','ィ'=>'i','ォ'=>'o','ゥ'=>'u', 1650 'ャ'=>'ya','ョ'=>'yo','ュ'=>'yu', 1651 1652 // special characters 1653 '・'=>'_','、'=>'_', 1654 'ー'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise 1655 1656 // 'ラ'=>'la','レ'=>'le','リ'=>'li','ロ'=>'lo','ル'=>'lu', 1657 // 'チャ'=>'cya','チェ'=>'cye','チィ'=>'cyi','チョ'=>'cyo','チュ'=>'cyu', 1658 //'デャ'=>'dha','デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu', 1659 // 'リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi','リョ'=>'lyo','リュ'=>'lyu', 1660 // 'テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho','テュ'=>'thu', 1661 //'ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi','フォ'=>'fwo','フゥ'=>'fwu', 1662 //'チャ'=>'tya','チェ'=>'tye','チィ'=>'tyi','チョ'=>'tyo','チュ'=>'tyu', 1663 // 'ジャ'=>'jya','ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu', 1664 // 'ジャ'=>'zha','ジェ'=>'zhe','ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu', 1665 //'ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi','ジョ'=>'zyo','ジュ'=>'zyu', 1666 //'シャ'=>'sya','シェ'=>'sye','シィ'=>'syi','ショ'=>'syo','シュ'=>'syu', 1667 //'シ'=>'ci','フ'=>'hu',シ'=>'si','チ'=>'ti','ツ'=>'tu','イ'=>'yi','ヂ'=>'dzi', 1668 1669 // "Greeklish" 1670 'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Π'=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps', 1671 'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','π'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps', 1672 1673 // Thai 1674 'ก'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch', 1675 'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','ญ'=>'y','ฎ'=>'d','ฏ'=>'t','ฐ'=>'th', 1676 'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th', 1677 'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','ฝ'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph', 1678 'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue', 1679 'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h', 1680 'ะ'=>'a','ั'=>'a','รร'=>'a','า'=>'a','ๅ'=>'a','ำ'=>'am','ํา'=>'am', 1681 'ิ'=>'i','ี'=>'i','ึ'=>'ue','ี'=>'ue','ุ'=>'u','ู'=>'u', 1682 'เ'=>'e','แ'=>'ae','โ'=>'o','อ'=>'o', 1683 'ียะ'=>'ia','ีย'=>'ia','ือะ'=>'uea','ือ'=>'uea','ัวะ'=>'ua','ัว'=>'ua', 1684 'ใ'=>'ai','ไ'=>'ai','ัย'=>'ai','าย'=>'ai','าว'=>'ao', 1685 'ุย'=>'ui','อย'=>'oi','ือย'=>'ueai','วย'=>'uai', 1686 'ิว'=>'io','็ว'=>'eo','ียว'=>'iao', 1687 '่'=>'','้'=>'','๊'=>'','๋'=>'','็'=>'', 1688 '์'=>'','๎'=>'','ํ'=>'','ฺ'=>'', 1689 'ๆ'=>'2','๏'=>'o','ฯ'=>'-','๚'=>'-','๛'=>'-', 1690 '๐'=>'0','๑'=>'1','๒'=>'2','๓'=>'3','๔'=>'4', 1691 '๕'=>'5','๖'=>'6','๗'=>'7','๘'=>'8','๙'=>'9', 1692 1693 // Korean 1694 'ㄱ'=>'k','ㅋ'=>'kh','ㄲ'=>'kk','ㄷ'=>'t','ㅌ'=>'th','ㄸ'=>'tt','ㅂ'=>'p', 1695 'ㅍ'=>'ph','ㅃ'=>'pp','ㅈ'=>'c','ㅊ'=>'ch','ㅉ'=>'cc','ㅅ'=>'s','ㅆ'=>'ss', 1696 'ㅎ'=>'h','ㅇ'=>'ng','ㄴ'=>'n','ㄹ'=>'l','ㅁ'=>'m', 'ㅏ'=>'a','ㅓ'=>'e','ㅗ'=>'o', 1697 'ㅜ'=>'wu','ㅡ'=>'u','ㅣ'=>'i','ㅐ'=>'ay','ㅔ'=>'ey','ㅚ'=>'oy','ㅘ'=>'wa','ㅝ'=>'we', 1698 'ㅟ'=>'wi','ㅙ'=>'way','ㅞ'=>'wey','ㅢ'=>'uy','ㅑ'=>'ya','ㅕ'=>'ye','ㅛ'=>'oy', 1699 'ㅠ'=>'yu','ㅒ'=>'yay','ㅖ'=>'yey', 1700); 1701 1702 1703