1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license LGPL (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * check for mb_string support 11 */ 12if(!defined('UTF8_MBSTRING')){ 13 if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){ 14 define('UTF8_MBSTRING',1); 15 }else{ 16 define('UTF8_MBSTRING',0); 17 } 18} 19 20if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } 21 22if(!function_exists('utf8_isASCII')){ 23 /** 24 * Checks if a string contains 7bit ASCII only 25 * 26 * @author Andreas Haerter <netzmeister@andreas-haerter.de> 27 */ 28 function utf8_isASCII($str){ 29 return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); 30 } 31} 32 33if(!function_exists('utf8_strip')){ 34 /** 35 * Strips all highbyte chars 36 * 37 * Returns a pure ASCII7 string 38 * 39 * @author Andreas Gohr <andi@splitbrain.org> 40 */ 41 function utf8_strip($str){ 42 $ascii = ''; 43 $len = strlen($str); 44 for($i=0; $i<$len; $i++){ 45 if(ord($str{$i}) <128){ 46 $ascii .= $str{$i}; 47 } 48 } 49 return $ascii; 50 } 51} 52 53if(!function_exists('utf8_check')){ 54 /** 55 * Tries to detect if a string is in Unicode encoding 56 * 57 * @author <bmorel@ssi.fr> 58 * @link http://www.php.net/manual/en/function.utf8-encode.php 59 */ 60 function utf8_check($Str) { 61 $len = strlen($Str); 62 for ($i=0; $i<$len; $i++) { 63 $b = ord($Str[$i]); 64 if ($b < 0x80) continue; # 0bbbbbbb 65 elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb 66 elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb 67 elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb 68 elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb 69 elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b 70 else return false; # Does not match any model 71 72 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 73 if ((++$i == $len) || ((ord($Str[$i]) & 0xC0) != 0x80)) 74 return false; 75 } 76 } 77 return true; 78 } 79} 80 81if(!function_exists('utf8_strlen')){ 82 /** 83 * Unicode aware replacement for strlen() 84 * 85 * utf8_decode() converts characters that are not in ISO-8859-1 86 * to '?', which, for the purpose of counting, is alright - It's 87 * even faster than mb_strlen. 88 * 89 * @author <chernyshevsky at hotmail dot com> 90 * @see strlen() 91 * @see utf8_decode() 92 */ 93 function utf8_strlen($string){ 94 return strlen(utf8_decode($string)); 95 } 96} 97 98if(!function_exists('utf8_substr')){ 99 /** 100 * UTF-8 aware alternative to substr 101 * 102 * Return part of a string given character offset (and optionally length) 103 * 104 * @author Harry Fuecks <hfuecks@gmail.com> 105 * @author Chris Smith <chris@jalakai.co.uk> 106 * @param string 107 * @param integer number of UTF-8 characters offset (from left) 108 * @param integer (optional) length in UTF-8 characters from offset 109 * @return mixed string or false if failure 110 */ 111 function utf8_substr($str, $offset, $length = null) { 112 if(UTF8_MBSTRING){ 113 if( $length === null ){ 114 return mb_substr($str, $offset); 115 }else{ 116 return mb_substr($str, $offset, $length); 117 } 118 } 119 120 /* 121 * Notes: 122 * 123 * no mb string support, so we'll use pcre regex's with 'u' flag 124 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 125 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 126 * 127 * substr documentation states false can be returned in some cases (e.g. offset > string length) 128 * mb_substr never returns false, it will return an empty string instead. 129 * 130 * calculating the number of characters in the string is a relatively expensive operation, so 131 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 132 */ 133 134 // cast parameters to appropriate types to avoid multiple notices/warnings 135 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 136 $offset = (int)$offset; 137 if (!is_null($length)) $length = (int)$length; 138 139 // handle trivial cases 140 if ($length === 0) return ''; 141 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 142 143 $offset_pattern = ''; 144 $length_pattern = ''; 145 146 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 147 if ($offset < 0) { 148 $strlen = strlen(utf8_decode($str)); // see notes 149 $offset = $strlen + $offset; 150 if ($offset < 0) $offset = 0; 151 } 152 153 // establish a pattern for offset, a non-captured group equal in length to offset 154 if ($offset > 0) { 155 $Ox = (int)($offset/65535); 156 $Oy = $offset%65535; 157 158 if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}'; 159 $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})'; 160 } else { 161 $offset_pattern = '^'; // offset == 0; just anchor the pattern 162 } 163 164 // establish a pattern for length 165 if (is_null($length)) { 166 $length_pattern = '(.*)$'; // the rest of the string 167 } else { 168 169 if (!isset($strlen)) $strlen = strlen(utf8_decode($str)); // see notes 170 if ($offset > $strlen) return ''; // another trivial case 171 172 if ($length > 0) { 173 174 $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string 175 176 $Lx = (int)($length/65535); 177 $Ly = $length%65535; 178 179 // +ve length requires ... a captured group of length characters 180 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 181 $length_pattern = '('.$length_pattern.'.{'.$Ly.'})'; 182 183 } else if ($length < 0) { 184 185 if ($length < ($offset - $strlen)) return ''; 186 187 $Lx = (int)((-$length)/65535); 188 $Ly = (-$length)%65535; 189 190 // -ve length requires ... capture everything except a group of -length characters 191 // anchored at the tail-end of the string 192 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 193 $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$'; 194 } 195 } 196 197 if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return ''; 198 return $match[1]; 199 } 200} 201 202if(!function_exists('utf8_substr_replace')){ 203 /** 204 * Unicode aware replacement for substr_replace() 205 * 206 * @author Andreas Gohr <andi@splitbrain.org> 207 * @see substr_replace() 208 */ 209 function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 210 $ret = ''; 211 if($start>0) $ret .= utf8_substr($string, 0, $start); 212 $ret .= $replacement; 213 $ret .= utf8_substr($string, $start+$length); 214 return $ret; 215 } 216} 217 218if(!function_exists('utf8_ltrim')){ 219 /** 220 * Unicode aware replacement for ltrim() 221 * 222 * @author Andreas Gohr <andi@splitbrain.org> 223 * @see ltrim() 224 * @return string 225 */ 226 function utf8_ltrim($str,$charlist=''){ 227 if($charlist == '') return ltrim($str); 228 229 //quote charlist for use in a characterclass 230 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 231 232 return preg_replace('/^['.$charlist.']+/u','',$str); 233 } 234} 235 236if(!function_exists('utf8_rtrim')){ 237 /** 238 * Unicode aware replacement for rtrim() 239 * 240 * @author Andreas Gohr <andi@splitbrain.org> 241 * @see rtrim() 242 * @return string 243 */ 244 function utf8_rtrim($str,$charlist=''){ 245 if($charlist == '') return rtrim($str); 246 247 //quote charlist for use in a characterclass 248 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 249 250 return preg_replace('/['.$charlist.']+$/u','',$str); 251 } 252} 253 254if(!function_exists('utf8_trim')){ 255 /** 256 * Unicode aware replacement for trim() 257 * 258 * @author Andreas Gohr <andi@splitbrain.org> 259 * @see trim() 260 * @return string 261 */ 262 function utf8_trim($str,$charlist='') { 263 if($charlist == '') return trim($str); 264 265 return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist); 266 } 267} 268 269if(!function_exists('utf8_strtolower')){ 270 /** 271 * This is a unicode aware replacement for strtolower() 272 * 273 * Uses mb_string extension if available 274 * 275 * @author Leo Feyer <leo@typolight.org> 276 * @see strtolower() 277 * @see utf8_strtoupper() 278 */ 279 function utf8_strtolower($string){ 280 if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8'); 281 282 global $UTF8_UPPER_TO_LOWER; 283 return strtr($string,$UTF8_UPPER_TO_LOWER); 284 } 285} 286 287if(!function_exists('utf8_strtoupper')){ 288 /** 289 * This is a unicode aware replacement for strtoupper() 290 * 291 * Uses mb_string extension if available 292 * 293 * @author Leo Feyer <leo@typolight.org> 294 * @see strtoupper() 295 * @see utf8_strtoupper() 296 */ 297 function utf8_strtoupper($string){ 298 if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); 299 300 global $UTF8_LOWER_TO_UPPER; 301 return strtr($string,$UTF8_LOWER_TO_UPPER); 302 } 303} 304 305if(!function_exists('utf8_ucfirst')){ 306 /** 307 * UTF-8 aware alternative to ucfirst 308 * Make a string's first character uppercase 309 * 310 * @author Harry Fuecks 311 * @param string 312 * @return string with first character as upper case (if applicable) 313 */ 314 function utf8_ucfirst($str){ 315 switch ( utf8_strlen($str) ) { 316 case 0: 317 return ''; 318 case 1: 319 return utf8_strtoupper($str); 320 default: 321 preg_match('/^(.{1})(.*)$/us', $str, $matches); 322 return utf8_strtoupper($matches[1]).$matches[2]; 323 } 324 } 325} 326 327if(!function_exists('utf8_ucwords')){ 328 /** 329 * UTF-8 aware alternative to ucwords 330 * Uppercase the first character of each word in a string 331 * 332 * @author Harry Fuecks 333 * @param string 334 * @return string with first char of each word uppercase 335 * @see http://www.php.net/ucwords 336 */ 337 function utf8_ucwords($str) { 338 // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; 339 // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns 340 // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords 341 $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; 342 343 return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str); 344 } 345 346 /** 347 * Callback function for preg_replace_callback call in utf8_ucwords 348 * You don't need to call this yourself 349 * 350 * @author Harry Fuecks 351 * @param array of matches corresponding to a single word 352 * @return string with first char of the word in uppercase 353 * @see utf8_ucwords 354 * @see utf8_strtoupper 355 */ 356 function utf8_ucwords_callback($matches) { 357 $leadingws = $matches[2]; 358 $ucfirst = utf8_strtoupper($matches[3]); 359 $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1); 360 return $leadingws . $ucword; 361 } 362} 363 364if(!function_exists('utf8_deaccent')){ 365 /** 366 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 367 * 368 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 369 * letters. Default is to deaccent both cases ($case = 0) 370 * 371 * @author Andreas Gohr <andi@splitbrain.org> 372 */ 373 function utf8_deaccent($string,$case=0){ 374 if($case <= 0){ 375 global $UTF8_LOWER_ACCENTS; 376 $string = strtr($string,$UTF8_LOWER_ACCENTS); 377 } 378 if($case >= 0){ 379 global $UTF8_UPPER_ACCENTS; 380 $string = strtr($string,$UTF8_UPPER_ACCENTS); 381 } 382 return $string; 383 } 384} 385 386if(!function_exists('utf8_romanize')){ 387 /** 388 * Romanize a non-latin string 389 * 390 * @author Andreas Gohr <andi@splitbrain.org> 391 */ 392 function utf8_romanize($string){ 393 if(utf8_isASCII($string)) return $string; //nothing to do 394 395 global $UTF8_ROMANIZATION; 396 return strtr($string,$UTF8_ROMANIZATION); 397 } 398} 399 400if(!function_exists('utf8_stripspecials')){ 401 /** 402 * Removes special characters (nonalphanumeric) from a UTF-8 string 403 * 404 * This function adds the controlchars 0x00 to 0x19 to the array of 405 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 406 * 407 * @author Andreas Gohr <andi@splitbrain.org> 408 * @param string $string The UTF8 string to strip of special chars 409 * @param string $repl Replace special with this string 410 * @param string $additional Additional chars to strip (used in regexp char class) 411 */ 412 function utf8_stripspecials($string,$repl='',$additional=''){ 413 global $UTF8_SPECIAL_CHARS; 414 global $UTF8_SPECIAL_CHARS2; 415 416 static $specials = null; 417 if(is_null($specials)){ 418 #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 419 $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/'); 420 } 421 422 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 423 } 424} 425 426if(!function_exists('utf8_strpos')){ 427 /** 428 * This is an Unicode aware replacement for strpos 429 * 430 * @author Leo Feyer <leo@typolight.org> 431 * @see strpos() 432 * @param string 433 * @param string 434 * @param integer 435 * @return integer 436 */ 437 function utf8_strpos($haystack, $needle, $offset=0){ 438 $comp = 0; 439 $length = null; 440 441 while (is_null($length) || $length < $offset) { 442 $pos = strpos($haystack, $needle, $offset + $comp); 443 444 if ($pos === false) 445 return false; 446 447 $length = utf8_strlen(substr($haystack, 0, $pos)); 448 449 if ($length < $offset) 450 $comp = $pos - $length; 451 } 452 453 return $length; 454 } 455} 456 457if(!function_exists('utf8_tohtml')){ 458 /** 459 * Encodes UTF-8 characters to HTML entities 460 * 461 * @author Tom N Harris <tnharris@whoopdedo.org> 462 * @author <vpribish at shopping dot com> 463 * @link http://www.php.net/manual/en/function.utf8-decode.php 464 */ 465 function utf8_tohtml ($str) { 466 $ret = ''; 467 foreach (utf8_to_unicode($str) as $cp) { 468 if ($cp < 0x80) 469 $ret .= chr($cp); 470 elseif ($cp < 0x100) 471 $ret .= "&#$cp;"; 472 else 473 $ret .= '&#x'.dechex($cp).';'; 474 } 475 return $ret; 476 } 477} 478 479if(!function_exists('utf8_unhtml')){ 480 /** 481 * Decodes HTML entities to UTF-8 characters 482 * 483 * Convert any &#..; entity to a codepoint, 484 * The entities flag defaults to only decoding numeric entities. 485 * Pass HTML_ENTITIES and named entities, including & < etc. 486 * are handled as well. Avoids the problem that would occur if you 487 * had to decode "&#38;&amp;#38;" 488 * 489 * unhtmlspecialchars(utf8_unhtml($s)) -> "&&" 490 * utf8_unhtml(unhtmlspecialchars($s)) -> "&&#38;" 491 * what it should be -> "&&#38;" 492 * 493 * @author Tom N Harris <tnharris@whoopdedo.org> 494 * @param string $str UTF-8 encoded string 495 * @param boolean $entities Flag controlling decoding of named entities. 496 * @return UTF-8 encoded string with numeric (and named) entities replaced. 497 */ 498 function utf8_unhtml($str, $entities=null) { 499 static $decoder = null; 500 if (is_null($decoder)) 501 $decoder = new utf8_entity_decoder(); 502 if (is_null($entities)) 503 return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m', 504 'utf8_decode_numeric', $str); 505 else 506 return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m', 507 array(&$decoder, 'decode'), $str); 508 } 509} 510 511if(!function_exists('utf8_decode_numeric')){ 512 function utf8_decode_numeric($ent) { 513 switch ($ent[2]) { 514 case 'X': 515 case 'x': 516 $cp = hexdec($ent[3]); 517 break; 518 default: 519 $cp = intval($ent[3]); 520 break; 521 } 522 return unicode_to_utf8(array($cp)); 523 } 524} 525 526if(!class_exists('utf8_entity_decoder')){ 527 class utf8_entity_decoder { 528 var $table; 529 function utf8_entity_decoder() { 530 $table = get_html_translation_table(HTML_ENTITIES); 531 $table = array_flip($table); 532 $this->table = array_map(array(&$this,'makeutf8'), $table); 533 } 534 function makeutf8($c) { 535 return unicode_to_utf8(array(ord($c))); 536 } 537 function decode($ent) { 538 if ($ent[1] == '#') { 539 return utf8_decode_numeric($ent); 540 } elseif (array_key_exists($ent[0],$this->table)) { 541 return $this->table[$ent[0]]; 542 } else { 543 return $ent[0]; 544 } 545 } 546 } 547} 548 549if(!function_exists('utf8_to_unicode')){ 550 /** 551 * Takes an UTF-8 string and returns an array of ints representing the 552 * Unicode characters. Astral planes are supported ie. the ints in the 553 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 554 * are not allowed. 555 * 556 * If $strict is set to true the function returns false if the input 557 * string isn't a valid UTF-8 octet sequence and raises a PHP error at 558 * level E_USER_WARNING 559 * 560 * Note: this function has been modified slightly in this library to 561 * trigger errors on encountering bad bytes 562 * 563 * @author <hsivonen@iki.fi> 564 * @author Harry Fuecks <hfuecks@gmail.com> 565 * @param string UTF-8 encoded string 566 * @param boolean Check for invalid sequences? 567 * @return mixed array of unicode code points or false if UTF-8 invalid 568 * @see unicode_to_utf8 569 * @link http://hsivonen.iki.fi/php-utf8/ 570 * @link http://sourceforge.net/projects/phputf8/ 571 */ 572 function utf8_to_unicode($str,$strict=false) { 573 $mState = 0; // cached expected number of octets after the current octet 574 // until the beginning of the next UTF8 character sequence 575 $mUcs4 = 0; // cached Unicode character 576 $mBytes = 1; // cached expected number of octets in the current sequence 577 578 $out = array(); 579 580 $len = strlen($str); 581 582 for($i = 0; $i < $len; $i++) { 583 584 $in = ord($str{$i}); 585 586 if ( $mState == 0) { 587 588 // When mState is zero we expect either a US-ASCII character or a 589 // multi-octet sequence. 590 if (0 == (0x80 & ($in))) { 591 // US-ASCII, pass straight through. 592 $out[] = $in; 593 $mBytes = 1; 594 595 } else if (0xC0 == (0xE0 & ($in))) { 596 // First octet of 2 octet sequence 597 $mUcs4 = ($in); 598 $mUcs4 = ($mUcs4 & 0x1F) << 6; 599 $mState = 1; 600 $mBytes = 2; 601 602 } else if (0xE0 == (0xF0 & ($in))) { 603 // First octet of 3 octet sequence 604 $mUcs4 = ($in); 605 $mUcs4 = ($mUcs4 & 0x0F) << 12; 606 $mState = 2; 607 $mBytes = 3; 608 609 } else if (0xF0 == (0xF8 & ($in))) { 610 // First octet of 4 octet sequence 611 $mUcs4 = ($in); 612 $mUcs4 = ($mUcs4 & 0x07) << 18; 613 $mState = 3; 614 $mBytes = 4; 615 616 } else if (0xF8 == (0xFC & ($in))) { 617 /* First octet of 5 octet sequence. 618 * 619 * This is illegal because the encoded codepoint must be either 620 * (a) not the shortest form or 621 * (b) outside the Unicode range of 0-0x10FFFF. 622 * Rather than trying to resynchronize, we will carry on until the end 623 * of the sequence and let the later error handling code catch it. 624 */ 625 $mUcs4 = ($in); 626 $mUcs4 = ($mUcs4 & 0x03) << 24; 627 $mState = 4; 628 $mBytes = 5; 629 630 } else if (0xFC == (0xFE & ($in))) { 631 // First octet of 6 octet sequence, see comments for 5 octet sequence. 632 $mUcs4 = ($in); 633 $mUcs4 = ($mUcs4 & 1) << 30; 634 $mState = 5; 635 $mBytes = 6; 636 637 } elseif($strict) { 638 /* Current octet is neither in the US-ASCII range nor a legal first 639 * octet of a multi-octet sequence. 640 */ 641 trigger_error( 642 'utf8_to_unicode: Illegal sequence identifier '. 643 'in UTF-8 at byte '.$i, 644 E_USER_WARNING 645 ); 646 return false; 647 648 } 649 650 } else { 651 652 // When mState is non-zero, we expect a continuation of the multi-octet 653 // sequence 654 if (0x80 == (0xC0 & ($in))) { 655 656 // Legal continuation. 657 $shift = ($mState - 1) * 6; 658 $tmp = $in; 659 $tmp = ($tmp & 0x0000003F) << $shift; 660 $mUcs4 |= $tmp; 661 662 /** 663 * End of the multi-octet sequence. mUcs4 now contains the final 664 * Unicode codepoint to be output 665 */ 666 if (0 == --$mState) { 667 668 /* 669 * Check for illegal sequences and codepoints. 670 */ 671 // From Unicode 3.1, non-shortest form is illegal 672 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 673 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 674 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 675 (4 < $mBytes) || 676 // From Unicode 3.2, surrogate characters are illegal 677 (($mUcs4 & 0xFFFFF800) == 0xD800) || 678 // Codepoints outside the Unicode range are illegal 679 ($mUcs4 > 0x10FFFF)) { 680 681 if($strict){ 682 trigger_error( 683 'utf8_to_unicode: Illegal sequence or codepoint '. 684 'in UTF-8 at byte '.$i, 685 E_USER_WARNING 686 ); 687 688 return false; 689 } 690 691 } 692 693 if (0xFEFF != $mUcs4) { 694 // BOM is legal but we don't want to output it 695 $out[] = $mUcs4; 696 } 697 698 //initialize UTF8 cache 699 $mState = 0; 700 $mUcs4 = 0; 701 $mBytes = 1; 702 } 703 704 } elseif($strict) { 705 /** 706 *((0xC0 & (*in) != 0x80) && (mState != 0)) 707 * Incomplete multi-octet sequence. 708 */ 709 trigger_error( 710 'utf8_to_unicode: Incomplete multi-octet '. 711 ' sequence in UTF-8 at byte '.$i, 712 E_USER_WARNING 713 ); 714 715 return false; 716 } 717 } 718 } 719 return $out; 720 } 721} 722 723if(!function_exists('unicode_to_utf8')){ 724 /** 725 * Takes an array of ints representing the Unicode characters and returns 726 * a UTF-8 string. Astral planes are supported ie. the ints in the 727 * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 728 * are not allowed. 729 * 730 * If $strict is set to true the function returns false if the input 731 * array contains ints that represent surrogates or are outside the 732 * Unicode range and raises a PHP error at level E_USER_WARNING 733 * 734 * Note: this function has been modified slightly in this library to use 735 * output buffering to concatenate the UTF-8 string (faster) as well as 736 * reference the array by it's keys 737 * 738 * @param array of unicode code points representing a string 739 * @param boolean Check for invalid sequences? 740 * @return mixed UTF-8 string or false if array contains invalid code points 741 * @author <hsivonen@iki.fi> 742 * @author Harry Fuecks <hfuecks@gmail.com> 743 * @see utf8_to_unicode 744 * @link http://hsivonen.iki.fi/php-utf8/ 745 * @link http://sourceforge.net/projects/phputf8/ 746 */ 747 function unicode_to_utf8($arr,$strict=false) { 748 if (!is_array($arr)) return ''; 749 ob_start(); 750 751 foreach (array_keys($arr) as $k) { 752 753 if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { 754 # ASCII range (including control chars) 755 756 echo chr($arr[$k]); 757 758 } else if ($arr[$k] <= 0x07ff) { 759 # 2 byte sequence 760 761 echo chr(0xc0 | ($arr[$k] >> 6)); 762 echo chr(0x80 | ($arr[$k] & 0x003f)); 763 764 } else if($arr[$k] == 0xFEFF) { 765 # Byte order mark (skip) 766 767 // nop -- zap the BOM 768 769 } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { 770 # Test for illegal surrogates 771 772 // found a surrogate 773 if($strict){ 774 trigger_error( 775 'unicode_to_utf8: Illegal surrogate '. 776 'at index: '.$k.', value: '.$arr[$k], 777 E_USER_WARNING 778 ); 779 return false; 780 } 781 782 } else if ($arr[$k] <= 0xffff) { 783 # 3 byte sequence 784 785 echo chr(0xe0 | ($arr[$k] >> 12)); 786 echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); 787 echo chr(0x80 | ($arr[$k] & 0x003f)); 788 789 } else if ($arr[$k] <= 0x10ffff) { 790 # 4 byte sequence 791 792 echo chr(0xf0 | ($arr[$k] >> 18)); 793 echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); 794 echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); 795 echo chr(0x80 | ($arr[$k] & 0x3f)); 796 797 } elseif($strict) { 798 799 trigger_error( 800 'unicode_to_utf8: Codepoint out of Unicode range '. 801 'at index: '.$k.', value: '.$arr[$k], 802 E_USER_WARNING 803 ); 804 805 // out of range 806 return false; 807 } 808 } 809 810 $result = ob_get_contents(); 811 ob_end_clean(); 812 return $result; 813 } 814} 815 816if(!function_exists('utf8_to_utf16be')){ 817 /** 818 * UTF-8 to UTF-16BE conversion. 819 * 820 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 821 */ 822 function utf8_to_utf16be(&$str, $bom = false) { 823 $out = $bom ? "\xFE\xFF" : ''; 824 if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 825 826 $uni = utf8_to_unicode($str); 827 foreach($uni as $cp){ 828 $out .= pack('n',$cp); 829 } 830 return $out; 831 } 832} 833 834if(!function_exists('utf16be_to_utf8')){ 835 /** 836 * UTF-8 to UTF-16BE conversion. 837 * 838 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 839 */ 840 function utf16be_to_utf8(&$str) { 841 $uni = unpack('n*',$str); 842 return unicode_to_utf8($uni); 843 } 844} 845 846if(!function_exists('utf8_bad_replace')){ 847 /** 848 * Replace bad bytes with an alternative character 849 * 850 * ASCII character is recommended for replacement char 851 * 852 * PCRE Pattern to locate bad bytes in a UTF-8 string 853 * Comes from W3 FAQ: Multilingual Forms 854 * Note: modified to include full ASCII range including control chars 855 * 856 * @author Harry Fuecks <hfuecks@gmail.com> 857 * @see http://www.w3.org/International/questions/qa-forms-utf-8 858 * @param string to search 859 * @param string to replace bad bytes with (defaults to '?') - use ASCII 860 * @return string 861 */ 862 function utf8_bad_replace($str, $replace = '') { 863 $UTF8_BAD = 864 '([\x00-\x7F]'. # ASCII (including control chars) 865 '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte 866 '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs 867 '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte 868 '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates 869 '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 870 '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 871 '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 872 '|(.{1}))'; # invalid byte 873 ob_start(); 874 while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { 875 if ( !isset($matches[2])) { 876 echo $matches[0]; 877 } else { 878 echo $replace; 879 } 880 $str = substr($str,strlen($matches[0])); 881 } 882 $result = ob_get_contents(); 883 ob_end_clean(); 884 return $result; 885 } 886} 887 888if(!function_exists('utf8_correctIdx')){ 889 /** 890 * adjust a byte index into a utf8 string to a utf8 character boundary 891 * 892 * @param $str string utf8 character string 893 * @param $i int byte index into $str 894 * @param $next bool direction to search for boundary, 895 * false = up (current character) 896 * true = down (next character) 897 * 898 * @return int byte index into $str now pointing to a utf8 character boundary 899 * 900 * @author chris smith <chris@jalakai.co.uk> 901 */ 902 function utf8_correctIdx(&$str,$i,$next=false) { 903 904 if ($i <= 0) return 0; 905 906 $limit = strlen($str); 907 if ($i>=$limit) return $limit; 908 909 if ($next) { 910 while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; 911 } else { 912 while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; 913 } 914 915 return $i; 916 } 917} 918 919// only needed if no mb_string available 920if(!UTF8_MBSTRING){ 921 /** 922 * UTF-8 Case lookup table 923 * 924 * This lookuptable defines the upper case letters to their correspponding 925 * lower case letter in UTF-8 926 * 927 * @author Andreas Gohr <andi@splitbrain.org> 928 */ 929 global $UTF8_LOWER_TO_UPPER; 930 if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array( 931 "z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T","s"=>"S","r"=>"R","q"=>"Q", 932 "p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J","i"=>"I","h"=>"H","g"=>"G", 933 "f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A","ῳ"=>"ῼ","ῥ"=>"Ῥ","ῡ"=>"Ῡ","ῑ"=>"Ῑ", 934 "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ", 935 "ᾣ"=>"ᾫ","ᾢ"=>"ᾪ","ᾡ"=>"ᾩ","ᾗ"=>"ᾟ","ᾖ"=>"ᾞ","ᾕ"=>"ᾝ","ᾔ"=>"ᾜ","ᾓ"=>"ᾛ","ᾒ"=>"ᾚ","ᾑ"=>"ᾙ", 936 "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ", 937 "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ", 938 "ὲ"=>"Ὲ","ά"=>"Ά","ὰ"=>"Ὰ","ὧ"=>"Ὧ","ὦ"=>"Ὦ","ὥ"=>"Ὥ","ὤ"=>"Ὤ","ὣ"=>"Ὣ","ὢ"=>"Ὢ","ὡ"=>"Ὡ", 939 "ὗ"=>"Ὗ","ὕ"=>"Ὕ","ὓ"=>"Ὓ","ὑ"=>"Ὑ","ὅ"=>"Ὅ","ὄ"=>"Ὄ","ὃ"=>"Ὃ","ὂ"=>"Ὂ","ὁ"=>"Ὁ","ὀ"=>"Ὀ", 940 "ἷ"=>"Ἷ","ἶ"=>"Ἶ","ἵ"=>"Ἵ","ἴ"=>"Ἴ","ἳ"=>"Ἳ","ἲ"=>"Ἲ","ἱ"=>"Ἱ","ἰ"=>"Ἰ","ἧ"=>"Ἧ","ἦ"=>"Ἦ", 941 "ἥ"=>"Ἥ","ἤ"=>"Ἤ","ἣ"=>"Ἣ","ἢ"=>"Ἢ","ἡ"=>"Ἡ","ἕ"=>"Ἕ","ἔ"=>"Ἔ","ἓ"=>"Ἓ","ἒ"=>"Ἒ","ἑ"=>"Ἑ", 942 "ἐ"=>"Ἐ","ἇ"=>"Ἇ","ἆ"=>"Ἆ","ἅ"=>"Ἅ","ἄ"=>"Ἄ","ἃ"=>"Ἃ","ἂ"=>"Ἂ","ἁ"=>"Ἁ","ἀ"=>"Ἀ","ỹ"=>"Ỹ", 943 "ỷ"=>"Ỷ","ỵ"=>"Ỵ","ỳ"=>"Ỳ","ự"=>"Ự","ữ"=>"Ữ","ử"=>"Ử","ừ"=>"Ừ","ứ"=>"Ứ","ủ"=>"Ủ","ụ"=>"Ụ", 944 "ợ"=>"Ợ","ỡ"=>"Ỡ","ở"=>"Ở","ờ"=>"Ờ","ớ"=>"Ớ","ộ"=>"Ộ","ỗ"=>"Ỗ","ổ"=>"Ổ","ồ"=>"Ồ","ố"=>"Ố", 945 "ỏ"=>"Ỏ","ọ"=>"Ọ","ị"=>"Ị","ỉ"=>"Ỉ","ệ"=>"Ệ","ễ"=>"Ễ","ể"=>"Ể","ề"=>"Ề","ế"=>"Ế","ẽ"=>"Ẽ", 946 "ẻ"=>"Ẻ","ẹ"=>"Ẹ","ặ"=>"Ặ","ẵ"=>"Ẵ","ẳ"=>"Ẳ","ằ"=>"Ằ","ắ"=>"Ắ","ậ"=>"Ậ","ẫ"=>"Ẫ","ẩ"=>"Ẩ", 947 "ầ"=>"Ầ","ấ"=>"Ấ","ả"=>"Ả","ạ"=>"Ạ","ẛ"=>"Ṡ","ẕ"=>"Ẕ","ẓ"=>"Ẓ","ẑ"=>"Ẑ","ẏ"=>"Ẏ","ẍ"=>"Ẍ", 948 "ẋ"=>"Ẋ","ẉ"=>"Ẉ","ẇ"=>"Ẇ","ẅ"=>"Ẅ","ẃ"=>"Ẃ","ẁ"=>"Ẁ","ṿ"=>"Ṿ","ṽ"=>"Ṽ","ṻ"=>"Ṻ","ṹ"=>"Ṹ", 949 "ṷ"=>"Ṷ","ṵ"=>"Ṵ","ṳ"=>"Ṳ","ṱ"=>"Ṱ","ṯ"=>"Ṯ","ṭ"=>"Ṭ","ṫ"=>"Ṫ","ṩ"=>"Ṩ","ṧ"=>"Ṧ","ṥ"=>"Ṥ", 950 "ṣ"=>"Ṣ","ṡ"=>"Ṡ","ṟ"=>"Ṟ","ṝ"=>"Ṝ","ṛ"=>"Ṛ","ṙ"=>"Ṙ","ṗ"=>"Ṗ","ṕ"=>"Ṕ","ṓ"=>"Ṓ","ṑ"=>"Ṑ", 951 "ṏ"=>"Ṏ","ṍ"=>"Ṍ","ṋ"=>"Ṋ","ṉ"=>"Ṉ","ṇ"=>"Ṇ","ṅ"=>"Ṅ","ṃ"=>"Ṃ","ṁ"=>"Ṁ","ḿ"=>"Ḿ","ḽ"=>"Ḽ", 952 "ḻ"=>"Ḻ","ḹ"=>"Ḹ","ḷ"=>"Ḷ","ḵ"=>"Ḵ","ḳ"=>"Ḳ","ḱ"=>"Ḱ","ḯ"=>"Ḯ","ḭ"=>"Ḭ","ḫ"=>"Ḫ","ḩ"=>"Ḩ", 953 "ḧ"=>"Ḧ","ḥ"=>"Ḥ","ḣ"=>"Ḣ","ḡ"=>"Ḡ","ḟ"=>"Ḟ","ḝ"=>"Ḝ","ḛ"=>"Ḛ","ḙ"=>"Ḙ","ḗ"=>"Ḗ","ḕ"=>"Ḕ", 954 "ḓ"=>"Ḓ","ḑ"=>"Ḑ","ḏ"=>"Ḏ","ḍ"=>"Ḍ","ḋ"=>"Ḋ","ḉ"=>"Ḉ","ḇ"=>"Ḇ","ḅ"=>"Ḅ","ḃ"=>"Ḃ","ḁ"=>"Ḁ", 955 "ֆ"=>"Ֆ","օ"=>"Օ","ք"=>"Ք","փ"=>"Փ","ւ"=>"Ւ","ց"=>"Ց","ր"=>"Ր","տ"=>"Տ","վ"=>"Վ","ս"=>"Ս", 956 "ռ"=>"Ռ","ջ"=>"Ջ","պ"=>"Պ","չ"=>"Չ","ո"=>"Ո","շ"=>"Շ","ն"=>"Ն","յ"=>"Յ","մ"=>"Մ","ճ"=>"Ճ", 957 "ղ"=>"Ղ","ձ"=>"Ձ","հ"=>"Հ","կ"=>"Կ","ծ"=>"Ծ","խ"=>"Խ","լ"=>"Լ","ի"=>"Ի","ժ"=>"Ժ","թ"=>"Թ", 958 "ը"=>"Ը","է"=>"Է","զ"=>"Զ","ե"=>"Ե","դ"=>"Դ","գ"=>"Գ","բ"=>"Բ","ա"=>"Ա","ԏ"=>"Ԏ","ԍ"=>"Ԍ", 959 "ԋ"=>"Ԋ","ԉ"=>"Ԉ","ԇ"=>"Ԇ","ԅ"=>"Ԅ","ԃ"=>"Ԃ","ԁ"=>"Ԁ","ӹ"=>"Ӹ","ӵ"=>"Ӵ","ӳ"=>"Ӳ","ӱ"=>"Ӱ", 960 "ӯ"=>"Ӯ","ӭ"=>"Ӭ","ӫ"=>"Ӫ","ө"=>"Ө","ӧ"=>"Ӧ","ӥ"=>"Ӥ","ӣ"=>"Ӣ","ӡ"=>"Ӡ","ӟ"=>"Ӟ","ӝ"=>"Ӝ", 961 "ӛ"=>"Ӛ","ә"=>"Ә","ӗ"=>"Ӗ","ӕ"=>"Ӕ","ӓ"=>"Ӓ","ӑ"=>"Ӑ","ӎ"=>"Ӎ","ӌ"=>"Ӌ","ӊ"=>"Ӊ","ӈ"=>"Ӈ", 962 "ӆ"=>"Ӆ","ӄ"=>"Ӄ","ӂ"=>"Ӂ","ҿ"=>"Ҿ","ҽ"=>"Ҽ","һ"=>"Һ","ҹ"=>"Ҹ","ҷ"=>"Ҷ","ҵ"=>"Ҵ","ҳ"=>"Ҳ", 963 "ұ"=>"Ұ","ү"=>"Ү","ҭ"=>"Ҭ","ҫ"=>"Ҫ","ҩ"=>"Ҩ","ҧ"=>"Ҧ","ҥ"=>"Ҥ","ң"=>"Ң","ҡ"=>"Ҡ","ҟ"=>"Ҟ", 964 "ҝ"=>"Ҝ","қ"=>"Қ","ҙ"=>"Ҙ","җ"=>"Җ","ҕ"=>"Ҕ","ғ"=>"Ғ","ґ"=>"Ґ","ҏ"=>"Ҏ","ҍ"=>"Ҍ","ҋ"=>"Ҋ", 965 "ҁ"=>"Ҁ","ѿ"=>"Ѿ","ѽ"=>"Ѽ","ѻ"=>"Ѻ","ѹ"=>"Ѹ","ѷ"=>"Ѷ","ѵ"=>"Ѵ","ѳ"=>"Ѳ","ѱ"=>"Ѱ","ѯ"=>"Ѯ", 966 "ѭ"=>"Ѭ","ѫ"=>"Ѫ","ѩ"=>"Ѩ","ѧ"=>"Ѧ","ѥ"=>"Ѥ","ѣ"=>"Ѣ","ѡ"=>"Ѡ","џ"=>"Џ","ў"=>"Ў","ѝ"=>"Ѝ", 967 "ќ"=>"Ќ","ћ"=>"Ћ","њ"=>"Њ","љ"=>"Љ","ј"=>"Ј","ї"=>"Ї","і"=>"І","ѕ"=>"Ѕ","є"=>"Є","ѓ"=>"Ѓ", 968 "ђ"=>"Ђ","ё"=>"Ё","ѐ"=>"Ѐ","я"=>"Я","ю"=>"Ю","э"=>"Э","ь"=>"Ь","ы"=>"Ы","ъ"=>"Ъ","щ"=>"Щ", 969 "ш"=>"Ш","ч"=>"Ч","ц"=>"Ц","х"=>"Х","ф"=>"Ф","у"=>"У","т"=>"Т","с"=>"С","р"=>"Р","п"=>"П", 970 "о"=>"О","н"=>"Н","м"=>"М","л"=>"Л","к"=>"К","й"=>"Й","и"=>"И","з"=>"З","ж"=>"Ж","е"=>"Е", 971 "д"=>"Д","г"=>"Г","в"=>"В","б"=>"Б","а"=>"А","ϵ"=>"Ε","ϲ"=>"Σ","ϱ"=>"Ρ","ϰ"=>"Κ","ϯ"=>"Ϯ", 972 "ϭ"=>"Ϭ","ϫ"=>"Ϫ","ϩ"=>"Ϩ","ϧ"=>"Ϧ","ϥ"=>"Ϥ","ϣ"=>"Ϣ","ϡ"=>"Ϡ","ϟ"=>"Ϟ","ϝ"=>"Ϝ","ϛ"=>"Ϛ", 973 "ϙ"=>"Ϙ","ϖ"=>"Π","ϕ"=>"Φ","ϑ"=>"Θ","ϐ"=>"Β","ώ"=>"Ώ","ύ"=>"Ύ","ό"=>"Ό","ϋ"=>"Ϋ","ϊ"=>"Ϊ", 974 "ω"=>"Ω","ψ"=>"Ψ","χ"=>"Χ","φ"=>"Φ","υ"=>"Υ","τ"=>"Τ","σ"=>"Σ","ς"=>"Σ","ρ"=>"Ρ","π"=>"Π", 975 "ο"=>"Ο","ξ"=>"Ξ","ν"=>"Ν","μ"=>"Μ","λ"=>"Λ","κ"=>"Κ","ι"=>"Ι","θ"=>"Θ","η"=>"Η","ζ"=>"Ζ", 976 "ε"=>"Ε","δ"=>"Δ","γ"=>"Γ","β"=>"Β","α"=>"Α","ί"=>"Ί","ή"=>"Ή","έ"=>"Έ","ά"=>"Ά","ʒ"=>"Ʒ", 977 "ʋ"=>"Ʋ","ʊ"=>"Ʊ","ʈ"=>"Ʈ","ʃ"=>"Ʃ","ʀ"=>"Ʀ","ɵ"=>"Ɵ","ɲ"=>"Ɲ","ɯ"=>"Ɯ","ɩ"=>"Ɩ","ɨ"=>"Ɨ", 978 "ɣ"=>"Ɣ","ɛ"=>"Ɛ","ə"=>"Ə","ɗ"=>"Ɗ","ɖ"=>"Ɖ","ɔ"=>"Ɔ","ɓ"=>"Ɓ","ȳ"=>"Ȳ","ȱ"=>"Ȱ","ȯ"=>"Ȯ", 979 "ȭ"=>"Ȭ","ȫ"=>"Ȫ","ȩ"=>"Ȩ","ȧ"=>"Ȧ","ȥ"=>"Ȥ","ȣ"=>"Ȣ","ȟ"=>"Ȟ","ȝ"=>"Ȝ","ț"=>"Ț","ș"=>"Ș", 980 "ȗ"=>"Ȗ","ȕ"=>"Ȕ","ȓ"=>"Ȓ","ȑ"=>"Ȑ","ȏ"=>"Ȏ","ȍ"=>"Ȍ","ȋ"=>"Ȋ","ȉ"=>"Ȉ","ȇ"=>"Ȇ","ȅ"=>"Ȅ", 981 "ȃ"=>"Ȃ","ȁ"=>"Ȁ","ǿ"=>"Ǿ","ǽ"=>"Ǽ","ǻ"=>"Ǻ","ǹ"=>"Ǹ","ǵ"=>"Ǵ","dz"=>"Dz","ǯ"=>"Ǯ","ǭ"=>"Ǭ", 982 "ǫ"=>"Ǫ","ǩ"=>"Ǩ","ǧ"=>"Ǧ","ǥ"=>"Ǥ","ǣ"=>"Ǣ","ǡ"=>"Ǡ","ǟ"=>"Ǟ","ǝ"=>"Ǝ","ǜ"=>"Ǜ","ǚ"=>"Ǚ", 983 "ǘ"=>"Ǘ","ǖ"=>"Ǖ","ǔ"=>"Ǔ","ǒ"=>"Ǒ","ǐ"=>"Ǐ","ǎ"=>"Ǎ","nj"=>"Nj","lj"=>"Lj","dž"=>"Dž","ƿ"=>"Ƿ", 984 "ƽ"=>"Ƽ","ƹ"=>"Ƹ","ƶ"=>"Ƶ","ƴ"=>"Ƴ","ư"=>"Ư","ƭ"=>"Ƭ","ƨ"=>"Ƨ","ƥ"=>"Ƥ","ƣ"=>"Ƣ","ơ"=>"Ơ", 985 "ƞ"=>"Ƞ","ƙ"=>"Ƙ","ƕ"=>"Ƕ","ƒ"=>"Ƒ","ƌ"=>"Ƌ","ƈ"=>"Ƈ","ƅ"=>"Ƅ","ƃ"=>"Ƃ","ſ"=>"S","ž"=>"Ž", 986 "ż"=>"Ż","ź"=>"Ź","ŷ"=>"Ŷ","ŵ"=>"Ŵ","ų"=>"Ų","ű"=>"Ű","ů"=>"Ů","ŭ"=>"Ŭ","ū"=>"Ū","ũ"=>"Ũ", 987 "ŧ"=>"Ŧ","ť"=>"Ť","ţ"=>"Ţ","š"=>"Š","ş"=>"Ş","ŝ"=>"Ŝ","ś"=>"Ś","ř"=>"Ř","ŗ"=>"Ŗ","ŕ"=>"Ŕ", 988 "œ"=>"Œ","ő"=>"Ő","ŏ"=>"Ŏ","ō"=>"Ō","ŋ"=>"Ŋ","ň"=>"Ň","ņ"=>"Ņ","ń"=>"Ń","ł"=>"Ł","ŀ"=>"Ŀ", 989 "ľ"=>"Ľ","ļ"=>"Ļ","ĺ"=>"Ĺ","ķ"=>"Ķ","ĵ"=>"Ĵ","ij"=>"IJ","ı"=>"I","į"=>"Į","ĭ"=>"Ĭ","ī"=>"Ī", 990 "ĩ"=>"Ĩ","ħ"=>"Ħ","ĥ"=>"Ĥ","ģ"=>"Ģ","ġ"=>"Ġ","ğ"=>"Ğ","ĝ"=>"Ĝ","ě"=>"Ě","ę"=>"Ę","ė"=>"Ė", 991 "ĕ"=>"Ĕ","ē"=>"Ē","đ"=>"Đ","ď"=>"Ď","č"=>"Č","ċ"=>"Ċ","ĉ"=>"Ĉ","ć"=>"Ć","ą"=>"Ą","ă"=>"Ă", 992 "ā"=>"Ā","ÿ"=>"Ÿ","þ"=>"Þ","ý"=>"Ý","ü"=>"Ü","û"=>"Û","ú"=>"Ú","ù"=>"Ù","ø"=>"Ø","ö"=>"Ö", 993 "õ"=>"Õ","ô"=>"Ô","ó"=>"Ó","ò"=>"Ò","ñ"=>"Ñ","ð"=>"Ð","ï"=>"Ï","î"=>"Î","í"=>"Í","ì"=>"Ì", 994 "ë"=>"Ë","ê"=>"Ê","é"=>"É","è"=>"È","ç"=>"Ç","æ"=>"Æ","å"=>"Å","ä"=>"Ä","ã"=>"Ã","â"=>"Â", 995 "á"=>"Á","à"=>"À","µ"=>"Μ","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T", 996 "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J", 997 "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A" 998 ); 999 1000 /** 1001 * UTF-8 Case lookup table 1002 * 1003 * This lookuptable defines the lower case letters to their correspponding 1004 * upper case letter in UTF-8 1005 * 1006 * @author Andreas Gohr <andi@splitbrain.org> 1007 */ 1008 global $UTF8_UPPER_TO_LOWER; 1009 if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array ( 1010 "Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t","S"=>"s","R"=>"r","Q"=>"q", 1011 "P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j","I"=>"i","H"=>"h","G"=>"g", 1012 "F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a","ῼ"=>"ῳ","Ῥ"=>"ῥ","Ῡ"=>"ῡ","Ῑ"=>"ῑ", 1013 "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ", 1014 "ᾫ"=>"ᾣ","ᾪ"=>"ᾢ","ᾩ"=>"ᾡ","ᾟ"=>"ᾗ","ᾞ"=>"ᾖ","ᾝ"=>"ᾕ","ᾜ"=>"ᾔ","ᾛ"=>"ᾓ","ᾚ"=>"ᾒ","ᾙ"=>"ᾑ", 1015 "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ", 1016 "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ", 1017 "Ὲ"=>"ὲ","Ά"=>"ά","Ὰ"=>"ὰ","Ὧ"=>"ὧ","Ὦ"=>"ὦ","Ὥ"=>"ὥ","Ὤ"=>"ὤ","Ὣ"=>"ὣ","Ὢ"=>"ὢ","Ὡ"=>"ὡ", 1018 "Ὗ"=>"ὗ","Ὕ"=>"ὕ","Ὓ"=>"ὓ","Ὑ"=>"ὑ","Ὅ"=>"ὅ","Ὄ"=>"ὄ","Ὃ"=>"ὃ","Ὂ"=>"ὂ","Ὁ"=>"ὁ","Ὀ"=>"ὀ", 1019 "Ἷ"=>"ἷ","Ἶ"=>"ἶ","Ἵ"=>"ἵ","Ἴ"=>"ἴ","Ἳ"=>"ἳ","Ἲ"=>"ἲ","Ἱ"=>"ἱ","Ἰ"=>"ἰ","Ἧ"=>"ἧ","Ἦ"=>"ἦ", 1020 "Ἥ"=>"ἥ","Ἤ"=>"ἤ","Ἣ"=>"ἣ","Ἢ"=>"ἢ","Ἡ"=>"ἡ","Ἕ"=>"ἕ","Ἔ"=>"ἔ","Ἓ"=>"ἓ","Ἒ"=>"ἒ","Ἑ"=>"ἑ", 1021 "Ἐ"=>"ἐ","Ἇ"=>"ἇ","Ἆ"=>"ἆ","Ἅ"=>"ἅ","Ἄ"=>"ἄ","Ἃ"=>"ἃ","Ἂ"=>"ἂ","Ἁ"=>"ἁ","Ἀ"=>"ἀ","Ỹ"=>"ỹ", 1022 "Ỷ"=>"ỷ","Ỵ"=>"ỵ","Ỳ"=>"ỳ","Ự"=>"ự","Ữ"=>"ữ","Ử"=>"ử","Ừ"=>"ừ","Ứ"=>"ứ","Ủ"=>"ủ","Ụ"=>"ụ", 1023 "Ợ"=>"ợ","Ỡ"=>"ỡ","Ở"=>"ở","Ờ"=>"ờ","Ớ"=>"ớ","Ộ"=>"ộ","Ỗ"=>"ỗ","Ổ"=>"ổ","Ồ"=>"ồ","Ố"=>"ố", 1024 "Ỏ"=>"ỏ","Ọ"=>"ọ","Ị"=>"ị","Ỉ"=>"ỉ","Ệ"=>"ệ","Ễ"=>"ễ","Ể"=>"ể","Ề"=>"ề","Ế"=>"ế","Ẽ"=>"ẽ", 1025 "Ẻ"=>"ẻ","Ẹ"=>"ẹ","Ặ"=>"ặ","Ẵ"=>"ẵ","Ẳ"=>"ẳ","Ằ"=>"ằ","Ắ"=>"ắ","Ậ"=>"ậ","Ẫ"=>"ẫ","Ẩ"=>"ẩ", 1026 "Ầ"=>"ầ","Ấ"=>"ấ","Ả"=>"ả","Ạ"=>"ạ","Ṡ"=>"ẛ","Ẕ"=>"ẕ","Ẓ"=>"ẓ","Ẑ"=>"ẑ","Ẏ"=>"ẏ","Ẍ"=>"ẍ", 1027 "Ẋ"=>"ẋ","Ẉ"=>"ẉ","Ẇ"=>"ẇ","Ẅ"=>"ẅ","Ẃ"=>"ẃ","Ẁ"=>"ẁ","Ṿ"=>"ṿ","Ṽ"=>"ṽ","Ṻ"=>"ṻ","Ṹ"=>"ṹ", 1028 "Ṷ"=>"ṷ","Ṵ"=>"ṵ","Ṳ"=>"ṳ","Ṱ"=>"ṱ","Ṯ"=>"ṯ","Ṭ"=>"ṭ","Ṫ"=>"ṫ","Ṩ"=>"ṩ","Ṧ"=>"ṧ","Ṥ"=>"ṥ", 1029 "Ṣ"=>"ṣ","Ṡ"=>"ṡ","Ṟ"=>"ṟ","Ṝ"=>"ṝ","Ṛ"=>"ṛ","Ṙ"=>"ṙ","Ṗ"=>"ṗ","Ṕ"=>"ṕ","Ṓ"=>"ṓ","Ṑ"=>"ṑ", 1030 "Ṏ"=>"ṏ","Ṍ"=>"ṍ","Ṋ"=>"ṋ","Ṉ"=>"ṉ","Ṇ"=>"ṇ","Ṅ"=>"ṅ","Ṃ"=>"ṃ","Ṁ"=>"ṁ","Ḿ"=>"ḿ","Ḽ"=>"ḽ", 1031 "Ḻ"=>"ḻ","Ḹ"=>"ḹ","Ḷ"=>"ḷ","Ḵ"=>"ḵ","Ḳ"=>"ḳ","Ḱ"=>"ḱ","Ḯ"=>"ḯ","Ḭ"=>"ḭ","Ḫ"=>"ḫ","Ḩ"=>"ḩ", 1032 "Ḧ"=>"ḧ","Ḥ"=>"ḥ","Ḣ"=>"ḣ","Ḡ"=>"ḡ","Ḟ"=>"ḟ","Ḝ"=>"ḝ","Ḛ"=>"ḛ","Ḙ"=>"ḙ","Ḗ"=>"ḗ","Ḕ"=>"ḕ", 1033 "Ḓ"=>"ḓ","Ḑ"=>"ḑ","Ḏ"=>"ḏ","Ḍ"=>"ḍ","Ḋ"=>"ḋ","Ḉ"=>"ḉ","Ḇ"=>"ḇ","Ḅ"=>"ḅ","Ḃ"=>"ḃ","Ḁ"=>"ḁ", 1034 "Ֆ"=>"ֆ","Օ"=>"օ","Ք"=>"ք","Փ"=>"փ","Ւ"=>"ւ","Ց"=>"ց","Ր"=>"ր","Տ"=>"տ","Վ"=>"վ","Ս"=>"ս", 1035 "Ռ"=>"ռ","Ջ"=>"ջ","Պ"=>"պ","Չ"=>"չ","Ո"=>"ո","Շ"=>"շ","Ն"=>"ն","Յ"=>"յ","Մ"=>"մ","Ճ"=>"ճ", 1036 "Ղ"=>"ղ","Ձ"=>"ձ","Հ"=>"հ","Կ"=>"կ","Ծ"=>"ծ","Խ"=>"խ","Լ"=>"լ","Ի"=>"ի","Ժ"=>"ժ","Թ"=>"թ", 1037 "Ը"=>"ը","Է"=>"է","Զ"=>"զ","Ե"=>"ե","Դ"=>"դ","Գ"=>"գ","Բ"=>"բ","Ա"=>"ա","Ԏ"=>"ԏ","Ԍ"=>"ԍ", 1038 "Ԋ"=>"ԋ","Ԉ"=>"ԉ","Ԇ"=>"ԇ","Ԅ"=>"ԅ","Ԃ"=>"ԃ","Ԁ"=>"ԁ","Ӹ"=>"ӹ","Ӵ"=>"ӵ","Ӳ"=>"ӳ","Ӱ"=>"ӱ", 1039 "Ӯ"=>"ӯ","Ӭ"=>"ӭ","Ӫ"=>"ӫ","Ө"=>"ө","Ӧ"=>"ӧ","Ӥ"=>"ӥ","Ӣ"=>"ӣ","Ӡ"=>"ӡ","Ӟ"=>"ӟ","Ӝ"=>"ӝ", 1040 "Ӛ"=>"ӛ","Ә"=>"ә","Ӗ"=>"ӗ","Ӕ"=>"ӕ","Ӓ"=>"ӓ","Ӑ"=>"ӑ","Ӎ"=>"ӎ","Ӌ"=>"ӌ","Ӊ"=>"ӊ","Ӈ"=>"ӈ", 1041 "Ӆ"=>"ӆ","Ӄ"=>"ӄ","Ӂ"=>"ӂ","Ҿ"=>"ҿ","Ҽ"=>"ҽ","Һ"=>"һ","Ҹ"=>"ҹ","Ҷ"=>"ҷ","Ҵ"=>"ҵ","Ҳ"=>"ҳ", 1042 "Ұ"=>"ұ","Ү"=>"ү","Ҭ"=>"ҭ","Ҫ"=>"ҫ","Ҩ"=>"ҩ","Ҧ"=>"ҧ","Ҥ"=>"ҥ","Ң"=>"ң","Ҡ"=>"ҡ","Ҟ"=>"ҟ", 1043 "Ҝ"=>"ҝ","Қ"=>"қ","Ҙ"=>"ҙ","Җ"=>"җ","Ҕ"=>"ҕ","Ғ"=>"ғ","Ґ"=>"ґ","Ҏ"=>"ҏ","Ҍ"=>"ҍ","Ҋ"=>"ҋ", 1044 "Ҁ"=>"ҁ","Ѿ"=>"ѿ","Ѽ"=>"ѽ","Ѻ"=>"ѻ","Ѹ"=>"ѹ","Ѷ"=>"ѷ","Ѵ"=>"ѵ","Ѳ"=>"ѳ","Ѱ"=>"ѱ","Ѯ"=>"ѯ", 1045 "Ѭ"=>"ѭ","Ѫ"=>"ѫ","Ѩ"=>"ѩ","Ѧ"=>"ѧ","Ѥ"=>"ѥ","Ѣ"=>"ѣ","Ѡ"=>"ѡ","Џ"=>"џ","Ў"=>"ў","Ѝ"=>"ѝ", 1046 "Ќ"=>"ќ","Ћ"=>"ћ","Њ"=>"њ","Љ"=>"љ","Ј"=>"ј","Ї"=>"ї","І"=>"і","Ѕ"=>"ѕ","Є"=>"є","Ѓ"=>"ѓ", 1047 "Ђ"=>"ђ","Ё"=>"ё","Ѐ"=>"ѐ","Я"=>"я","Ю"=>"ю","Э"=>"э","Ь"=>"ь","Ы"=>"ы","Ъ"=>"ъ","Щ"=>"щ", 1048 "Ш"=>"ш","Ч"=>"ч","Ц"=>"ц","Х"=>"х","Ф"=>"ф","У"=>"у","Т"=>"т","С"=>"с","Р"=>"р","П"=>"п", 1049 "О"=>"о","Н"=>"н","М"=>"м","Л"=>"л","К"=>"к","Й"=>"й","И"=>"и","З"=>"з","Ж"=>"ж","Е"=>"е", 1050 "Д"=>"д","Г"=>"г","В"=>"в","Б"=>"б","А"=>"а","Ε"=>"ϵ","Σ"=>"ϲ","Ρ"=>"ϱ","Κ"=>"ϰ","Ϯ"=>"ϯ", 1051 "Ϭ"=>"ϭ","Ϫ"=>"ϫ","Ϩ"=>"ϩ","Ϧ"=>"ϧ","Ϥ"=>"ϥ","Ϣ"=>"ϣ","Ϡ"=>"ϡ","Ϟ"=>"ϟ","Ϝ"=>"ϝ","Ϛ"=>"ϛ", 1052 "Ϙ"=>"ϙ","Π"=>"ϖ","Φ"=>"ϕ","Θ"=>"ϑ","Β"=>"ϐ","Ώ"=>"ώ","Ύ"=>"ύ","Ό"=>"ό","Ϋ"=>"ϋ","Ϊ"=>"ϊ", 1053 "Ω"=>"ω","Ψ"=>"ψ","Χ"=>"χ","Φ"=>"φ","Υ"=>"υ","Τ"=>"τ","Σ"=>"σ","Σ"=>"ς","Ρ"=>"ρ","Π"=>"π", 1054 "Ο"=>"ο","Ξ"=>"ξ","Ν"=>"ν","Μ"=>"μ","Λ"=>"λ","Κ"=>"κ","Ι"=>"ι","Θ"=>"θ","Η"=>"η","Ζ"=>"ζ", 1055 "Ε"=>"ε","Δ"=>"δ","Γ"=>"γ","Β"=>"β","Α"=>"α","Ί"=>"ί","Ή"=>"ή","Έ"=>"έ","Ά"=>"ά","Ʒ"=>"ʒ", 1056 "Ʋ"=>"ʋ","Ʊ"=>"ʊ","Ʈ"=>"ʈ","Ʃ"=>"ʃ","Ʀ"=>"ʀ","Ɵ"=>"ɵ","Ɲ"=>"ɲ","Ɯ"=>"ɯ","Ɩ"=>"ɩ","Ɨ"=>"ɨ", 1057 "Ɣ"=>"ɣ","Ɛ"=>"ɛ","Ə"=>"ə","Ɗ"=>"ɗ","Ɖ"=>"ɖ","Ɔ"=>"ɔ","Ɓ"=>"ɓ","Ȳ"=>"ȳ","Ȱ"=>"ȱ","Ȯ"=>"ȯ", 1058 "Ȭ"=>"ȭ","Ȫ"=>"ȫ","Ȩ"=>"ȩ","Ȧ"=>"ȧ","Ȥ"=>"ȥ","Ȣ"=>"ȣ","Ȟ"=>"ȟ","Ȝ"=>"ȝ","Ț"=>"ț","Ș"=>"ș", 1059 "Ȗ"=>"ȗ","Ȕ"=>"ȕ","Ȓ"=>"ȓ","Ȑ"=>"ȑ","Ȏ"=>"ȏ","Ȍ"=>"ȍ","Ȋ"=>"ȋ","Ȉ"=>"ȉ","Ȇ"=>"ȇ","Ȅ"=>"ȅ", 1060 "Ȃ"=>"ȃ","Ȁ"=>"ȁ","Ǿ"=>"ǿ","Ǽ"=>"ǽ","Ǻ"=>"ǻ","Ǹ"=>"ǹ","Ǵ"=>"ǵ","Dz"=>"dz","Ǯ"=>"ǯ","Ǭ"=>"ǭ", 1061 "Ǫ"=>"ǫ","Ǩ"=>"ǩ","Ǧ"=>"ǧ","Ǥ"=>"ǥ","Ǣ"=>"ǣ","Ǡ"=>"ǡ","Ǟ"=>"ǟ","Ǝ"=>"ǝ","Ǜ"=>"ǜ","Ǚ"=>"ǚ", 1062 "Ǘ"=>"ǘ","Ǖ"=>"ǖ","Ǔ"=>"ǔ","Ǒ"=>"ǒ","Ǐ"=>"ǐ","Ǎ"=>"ǎ","Nj"=>"nj","Lj"=>"lj","Dž"=>"dž","Ƿ"=>"ƿ", 1063 "Ƽ"=>"ƽ","Ƹ"=>"ƹ","Ƶ"=>"ƶ","Ƴ"=>"ƴ","Ư"=>"ư","Ƭ"=>"ƭ","Ƨ"=>"ƨ","Ƥ"=>"ƥ","Ƣ"=>"ƣ","Ơ"=>"ơ", 1064 "Ƞ"=>"ƞ","Ƙ"=>"ƙ","Ƕ"=>"ƕ","Ƒ"=>"ƒ","Ƌ"=>"ƌ","Ƈ"=>"ƈ","Ƅ"=>"ƅ","Ƃ"=>"ƃ","S"=>"ſ","Ž"=>"ž", 1065 "Ż"=>"ż","Ź"=>"ź","Ŷ"=>"ŷ","Ŵ"=>"ŵ","Ų"=>"ų","Ű"=>"ű","Ů"=>"ů","Ŭ"=>"ŭ","Ū"=>"ū","Ũ"=>"ũ", 1066 "Ŧ"=>"ŧ","Ť"=>"ť","Ţ"=>"ţ","Š"=>"š","Ş"=>"ş","Ŝ"=>"ŝ","Ś"=>"ś","Ř"=>"ř","Ŗ"=>"ŗ","Ŕ"=>"ŕ", 1067 "Œ"=>"œ","Ő"=>"ő","Ŏ"=>"ŏ","Ō"=>"ō","Ŋ"=>"ŋ","Ň"=>"ň","Ņ"=>"ņ","Ń"=>"ń","Ł"=>"ł","Ŀ"=>"ŀ", 1068 "Ľ"=>"ľ","Ļ"=>"ļ","Ĺ"=>"ĺ","Ķ"=>"ķ","Ĵ"=>"ĵ","IJ"=>"ij","I"=>"ı","Į"=>"į","Ĭ"=>"ĭ","Ī"=>"ī", 1069 "Ĩ"=>"ĩ","Ħ"=>"ħ","Ĥ"=>"ĥ","Ģ"=>"ģ","Ġ"=>"ġ","Ğ"=>"ğ","Ĝ"=>"ĝ","Ě"=>"ě","Ę"=>"ę","Ė"=>"ė", 1070 "Ĕ"=>"ĕ","Ē"=>"ē","Đ"=>"đ","Ď"=>"ď","Č"=>"č","Ċ"=>"ċ","Ĉ"=>"ĉ","Ć"=>"ć","Ą"=>"ą","Ă"=>"ă", 1071 "Ā"=>"ā","Ÿ"=>"ÿ","Þ"=>"þ","Ý"=>"ý","Ü"=>"ü","Û"=>"û","Ú"=>"ú","Ù"=>"ù","Ø"=>"ø","Ö"=>"ö", 1072 "Õ"=>"õ","Ô"=>"ô","Ó"=>"ó","Ò"=>"ò","Ñ"=>"ñ","Ð"=>"ð","Ï"=>"ï","Î"=>"î","Í"=>"í","Ì"=>"ì", 1073 "Ë"=>"ë","Ê"=>"ê","É"=>"é","È"=>"è","Ç"=>"ç","Æ"=>"æ","Å"=>"å","Ä"=>"ä","Ã"=>"ã","Â"=>"â", 1074 "Á"=>"á","À"=>"à","Μ"=>"µ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t", 1075 "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j", 1076 "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a" 1077 ); 1078}; // end of case lookup tables 1079 1080/** 1081 * UTF-8 lookup table for lower case accented letters 1082 * 1083 * This lookuptable defines replacements for accented characters from the ASCII-7 1084 * range. This are lower case letters only. 1085 * 1086 * @author Andreas Gohr <andi@splitbrain.org> 1087 * @see utf8_deaccent() 1088 */ 1089global $UTF8_LOWER_ACCENTS; 1090if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array( 1091 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 1092 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 1093 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 1094 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 1095 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 1096 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 1097 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 1098 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 1099 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 1100 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 1101 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 1102 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 1103 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 1104 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 1105 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', 1106); 1107 1108/** 1109 * UTF-8 lookup table for upper case accented letters 1110 * 1111 * This lookuptable defines replacements for accented characters from the ASCII-7 1112 * range. This are upper case letters only. 1113 * 1114 * @author Andreas Gohr <andi@splitbrain.org> 1115 * @see utf8_deaccent() 1116 */ 1117global $UTF8_UPPER_ACCENTS; 1118if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array( 1119 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', 1120 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 1121 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', 1122 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', 1123 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', 1124 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', 1125 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', 1126 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', 1127 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 1128 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', 1129 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', 1130 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 1131 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 1132 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', 1133 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', 1134); 1135 1136/** 1137 * UTF-8 array of common special characters 1138 * 1139 * This array should contain all special characters (not a letter or digit) 1140 * defined in the various local charsets - it's not a complete list of non-alphanum 1141 * characters in UTF-8. It's not perfect but should match most cases of special 1142 * chars. 1143 * 1144 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 1145 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 1146 * 1147 * @author Andreas Gohr <andi@splitbrain.org> 1148 * @see utf8_stripspecials() 1149 */ 1150global $UTF8_SPECIAL_CHARS; 1151if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array( 1152 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 1153 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 1154 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 1155 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 1156 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 1157 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 1158 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 1159 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 1160 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 1161 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 1162 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 1163 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 1164 0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 1165 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 1166 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 1167 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 1168 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 1169 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 1170 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 1171 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 1172 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 1173 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 1174 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 1175 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 1176 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 1177 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 1178 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 1179 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 1180 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 1181 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 1182 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 1183 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 1184 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 1185 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 1186 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 1187 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 1188 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 1189 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 1190 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 1191 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 1192 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 1193 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 1194 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 1195 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 1196 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 1197 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 1198 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017, 1199 0x3018, 0x3019, 0x301a, 0x301b, 0x3036, 1200 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 1201 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 1202 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 1203 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 1204 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 1205 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 1206 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c, 1207 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b, 1208 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 1209 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea, 1210 0xffeb, 0xffec, 0xffed, 0xffee, 1211 0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703, 1212 0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b, 1213 0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713, 1214 0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b, 1215 0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf, 1216); 1217 1218// utf8 version of above data 1219global $UTF8_SPECIAL_CHARS2; 1220if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 = 1221 "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~ �'. 1222 '� ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½�'. 1223 '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. 1224 '�ׁׂ׃׳״،؛؟ـًٌٍَُِّْ٪฿–—―‗‘’‚“”�'. 1225 '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'. 1226 '⇐⇑⇒⇓⇔∀∂∃∅∆∇∈∉∋∏∑−∕∗∙√∝∞∠∧∨�'. 1227 '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'. 1228 '��┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠'. 1229 '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌▐░▒▓■▲▼◆◊●�'. 1230 '�★☎☛☞♠♣♥♦✁✂✃✄✆✇✈✉✌✍✎✏✐✑✒✓✔✕�'. 1231 '��✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✩✪✫✬✭✮✯✰✱'. 1232 '✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋�'. 1233 '�❏❐❑❒❖❘❙❚❛❜❝❞❡❢❣❤❥❦❧❿➉➓➔➘➙➚�'. 1234 '��➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶'. 1235 '➷➸➹➺➻➼➽➾'. 1236 ' 、。〃〈〉《》「」『』【】〒〔〕〖〗〘〙〚〛〶'. 1237 '�'. 1238 '�ﹼﹽ'. 1239 '!"#$%&'()*+,-./:;<=>?@[\]^`{|}~'. 1240 '⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○'. 1241 ''. 1242 ' '; 1243 1244/** 1245 * Romanization lookup table 1246 * 1247 * This lookup tables provides a way to transform strings written in a language 1248 * different from the ones based upon latin letters into plain ASCII. 1249 * 1250 * Please note: this is not a scientific transliteration table. It only works 1251 * oneway from nonlatin to ASCII and it works by simple character replacement 1252 * only. Specialities of each language are not supported. 1253 * 1254 * @author Andreas Gohr <andi@splitbrain.org> 1255 * @author Vitaly Blokhin <vitinfo@vitn.com> 1256 * @link http://www.uconv.com/translit.htm 1257 * @author Bisqwit <bisqwit@iki.fi> 1258 * @link http://kanjidict.stc.cx/hiragana.php?src=2 1259 * @link http://www.translatum.gr/converter/greek-transliteration.htm 1260 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 1261 * @link http://www.btranslations.com/resources/romanization/korean.asp 1262 * @author Arthit Suriyawongkul <arthit@gmail.com> 1263 * @author Denis Scheither <amorphis@uni-bremen.de> 1264 */ 1265global $UTF8_ROMANIZATION; 1266if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array( 1267 // scandinavian - differs from what we do in deaccent 1268 'å'=>'a','Å'=>'A','ä'=>'a','Ä'=>'A','ö'=>'o','Ö'=>'O', 1269 1270 //russian cyrillic 1271 'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G', 1272 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh', 1273 'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K', 1274 'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Н'=>'N','о'=>'o','О'=>'O', 1275 'п'=>'p','П'=>'P','р'=>'r','Р'=>'R','с'=>'s','С'=>'S','т'=>'t','Т'=>'T', 1276 'у'=>'u','У'=>'U','ф'=>'f','Ф'=>'F','х'=>'x','Х'=>'X','ц'=>'c','Ц'=>'C', 1277 'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'sch','Щ'=>'Sch','ъ'=>'', 1278 'Ъ'=>'','ы'=>'y','Ы'=>'Y','ь'=>'','Ь'=>'','э'=>'eh','Э'=>'Eh','ю'=>'ju', 1279 'Ю'=>'Ju','я'=>'ja','Я'=>'Ja', 1280 // Ukrainian cyrillic 1281 'Ґ'=>'Gh','ґ'=>'gh','Є'=>'Je','є'=>'je','І'=>'I','і'=>'i','Ї'=>'Ji','ї'=>'ji', 1282 // Georgian 1283 'ა'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th', 1284 'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','ო'=>'o','პ'=>'p','ჟ'=>'zh', 1285 'რ'=>'r','ს'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ქ'=>'kh','ღ'=>'gh','ყ'=>'q', 1286 'შ'=>'sh','ჩ'=>'ch','ც'=>'c','ძ'=>'dh','წ'=>'w','ჭ'=>'j','ხ'=>'x','ჯ'=>'jh', 1287 'ჰ'=>'xh', 1288 //Sanskrit 1289 'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry', 1290 'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','ए'=>'e','ऐ'=>'ay','ओ'=>'o','औ'=>'aw', 1291 'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh', 1292 'च'=>'c','छ'=>'ch','ज'=>'j','झ'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh', 1293 'ड'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','थ'=>'th','द'=>'d','ध'=>'dh', 1294 'न'=>'n','प'=>'p','फ'=>'ph','ब'=>'b','भ'=>'bh','म'=>'m','य'=>'z','र'=>'r', 1295 'ल'=>'l','व'=>'v','श'=>'sh','ष'=>'sqh','स'=>'s','ह'=>'x', 1296 //Hebrew 1297 'א'=>'a', 'ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'kh','ט'=>'th', 1298 'י'=>'y','ך'=>'h','כ'=>'k','ל'=>'l','ם'=>'m','מ'=>'m','ן'=>'n','נ'=>'n', 1299 'ס'=>'s','ע'=>'ah','ף'=>'f','פ'=>'p','ץ'=>'c','צ'=>'c','ק'=>'q','ר'=>'r', 1300 'ש'=>'sh','ת'=>'t', 1301 //Arabic 1302 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'g','ح'=>'xh','خ'=>'x','د'=>'d', 1303 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s\'','ض'=>'d\'', 1304 'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k', 1305 'ل'=>'l','م'=>'m','ن'=>'n','ه'=>'x\'','و'=>'u','ي'=>'i', 1306 1307 // Japanese characters (last update: 2008-05-09) 1308 1309 // Japanese hiragana 1310 1311 // 3 character syllables, っ doubles the consonant after 1312 'っちゃ'=>'ccha','っちぇ'=>'cche','っちょ'=>'ccho','っちゅ'=>'cchu', 1313 'っびゃ'=>'bbya','っびぇ'=>'bbye','っびぃ'=>'bbyi','っびょ'=>'bbyo','っびゅ'=>'bbyu', 1314 'っぴゃ'=>'ppya','っぴぇ'=>'ppye','っぴぃ'=>'ppyi','っぴょ'=>'ppyo','っぴゅ'=>'ppyu', 1315 'っちゃ'=>'ccha','っちぇ'=>'cche','っち'=>'cchi','っちょ'=>'ccho','っちゅ'=>'cchu', 1316 // 'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu', 1317 'っきゃ'=>'kkya','っきぇ'=>'kkye','っきぃ'=>'kkyi','っきょ'=>'kkyo','っきゅ'=>'kkyu', 1318 'っぎゃ'=>'ggya','っぎぇ'=>'ggye','っぎぃ'=>'ggyi','っぎょ'=>'ggyo','っぎゅ'=>'ggyu', 1319 'っみゃ'=>'mmya','っみぇ'=>'mmye','っみぃ'=>'mmyi','っみょ'=>'mmyo','っみゅ'=>'mmyu', 1320 'っにゃ'=>'nnya','っにぇ'=>'nnye','っにぃ'=>'nnyi','っにょ'=>'nnyo','っにゅ'=>'nnyu', 1321 'っりゃ'=>'rrya','っりぇ'=>'rrye','っりぃ'=>'rryi','っりょ'=>'rryo','っりゅ'=>'rryu', 1322 'っしゃ'=>'ssha','っしぇ'=>'sshe','っし'=>'sshi','っしょ'=>'ssho','っしゅ'=>'sshu', 1323 1324 // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway) 1325 'んあ'=>'n_a','んえ'=>'n_e','んい'=>'n_i','んお'=>'n_o','んう'=>'n_u', 1326 'んや'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu', 1327 1328 // 2 character syllables - normal 1329 'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo', 1330 'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu', 1331 'ひゃ'=>'hya','ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu', 1332 'びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu', 1333 'ぴゃ'=>'pya','ぴぇ'=>'pye','ぴぃ'=>'pyi','ぴょ'=>'pyo','ぴゅ'=>'pyu', 1334 'きゃ'=>'kya','きぇ'=>'kye','きぃ'=>'kyi','きょ'=>'kyo','きゅ'=>'kyu', 1335 'ぎゃ'=>'gya','ぎぇ'=>'gye','ぎぃ'=>'gyi','ぎょ'=>'gyo','ぎゅ'=>'gyu', 1336 'みゃ'=>'mya','みぇ'=>'mye','みぃ'=>'myi','みょ'=>'myo','みゅ'=>'myu', 1337 'にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu', 1338 'りゃ'=>'rya','りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu', 1339 'しゃ'=>'sha','しぇ'=>'she','し'=>'shi','しょ'=>'sho','しゅ'=>'shu', 1340 'じゃ'=>'ja','じぇ'=>'je','じょ'=>'jo','じゅ'=>'ju', 1341 'うぇ'=>'we','うぃ'=>'wi', 1342 'いぇ'=>'ye', 1343 1344 // 2 character syllables, っ doubles the consonant after 1345 'っば'=>'bba','っべ'=>'bbe','っび'=>'bbi','っぼ'=>'bbo','っぶ'=>'bbu', 1346 'っぱ'=>'ppa','っぺ'=>'ppe','っぴ'=>'ppi','っぽ'=>'ppo','っぷ'=>'ppu', 1347 'った'=>'tta','って'=>'tte','っち'=>'cchi','っと'=>'tto','っつ'=>'ttsu', 1348 'っだ'=>'dda','っで'=>'dde','っぢ'=>'ddi','っど'=>'ddo','っづ'=>'ddu', 1349 'っが'=>'gga','っげ'=>'gge','っぎ'=>'ggi','っご'=>'ggo','っぐ'=>'ggu', 1350 'っか'=>'kka','っけ'=>'kke','っき'=>'kki','っこ'=>'kko','っく'=>'kku', 1351 'っま'=>'mma','っめ'=>'mme','っみ'=>'mmi','っも'=>'mmo','っむ'=>'mmu', 1352 'っな'=>'nna','っね'=>'nne','っに'=>'nni','っの'=>'nno','っぬ'=>'nnu', 1353 'っら'=>'rra','っれ'=>'rre','っり'=>'rri','っろ'=>'rro','っる'=>'rru', 1354 'っさ'=>'ssa','っせ'=>'sse','っし'=>'sshi','っそ'=>'sso','っす'=>'ssu', 1355 'っざ'=>'zza','っぜ'=>'zze','っじ'=>'jji','っぞ'=>'zzo','っず'=>'zzu', 1356 1357 // 1 character syllabels 1358 'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ん'=>'n', 1359 'は'=>'ha','へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'fu', 1360 'ば'=>'ba','べ'=>'be','び'=>'bi','ぼ'=>'bo','ぶ'=>'bu', 1361 'ぱ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po','ぷ'=>'pu', 1362 'た'=>'ta','て'=>'te','ち'=>'chi','と'=>'to','つ'=>'tsu', 1363 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1364 'が'=>'ga','げ'=>'ge','ぎ'=>'gi','ご'=>'go','ぐ'=>'gu', 1365 'か'=>'ka','け'=>'ke','き'=>'ki','こ'=>'ko','く'=>'ku', 1366 'ま'=>'ma','め'=>'me','み'=>'mi','も'=>'mo','む'=>'mu', 1367 'な'=>'na','ね'=>'ne','に'=>'ni','の'=>'no','ぬ'=>'nu', 1368 'ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru', 1369 'さ'=>'sa','せ'=>'se','し'=>'shi','そ'=>'so','す'=>'su', 1370 'わ'=>'wa','を'=>'wo', 1371 'ざ'=>'za','ぜ'=>'ze','じ'=>'ji','ぞ'=>'zo','ず'=>'zu', 1372 'や'=>'ya','よ'=>'yo','ゆ'=>'yu', 1373 // old characters 1374 'ゑ'=>'we','ゐ'=>'wi', 1375 1376 // convert what's left (probably only kicks in when something's missing above) 1377 // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u', 1378 // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu', 1379 1380 // never seen one of those (disabled for the moment) 1381 // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu', 1382 // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu', 1383 // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu', 1384 // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu', 1385 // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu', 1386 // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu', 1387 // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu', 1388 // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu', 1389 // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu', 1390 // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu', 1391 // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu', 1392 // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu', 1393 // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu', 1394 // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu', 1395 1396 // 'spare' characters from other romanization systems 1397 // 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1398 // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu', 1399 // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su', 1400 // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu', 1401 //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu', 1402 //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu', 1403 //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu', 1404 //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu', 1405 //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi', 1406 //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju', 1407 1408 1409 // Japanese katakana 1410 1411 // 4 character syllables: ッ doubles the consonant after, ー doubles the vowel before (usualy written with macron, but we don't want that in our URLs) 1412 'ッビャー'=>'bbyaa','ッビェー'=>'bbyee','ッビィー'=>'bbyii','ッビョー'=>'bbyoo','ッビュー'=>'bbyuu', 1413 'ッピャー'=>'ppyaa','ッピェー'=>'ppyee','ッピィー'=>'ppyii','ッピョー'=>'ppyoo','ッピュー'=>'ppyuu', 1414 'ッキャー'=>'kkyaa','ッキェー'=>'kkyee','ッキィー'=>'kkyii','ッキョー'=>'kkyoo','ッキュー'=>'kkyuu', 1415 'ッギャー'=>'ggyaa','ッギェー'=>'ggyee','ッギィー'=>'ggyii','ッギョー'=>'ggyoo','ッギュー'=>'ggyuu', 1416 'ッミャー'=>'mmyaa','ッミェー'=>'mmyee','ッミィー'=>'mmyii','ッミョー'=>'mmyoo','ッミュー'=>'mmyuu', 1417 'ッニャー'=>'nnyaa','ッニェー'=>'nnyee','ッニィー'=>'nnyii','ッニョー'=>'nnyoo','ッニュー'=>'nnyuu', 1418 'ッリャー'=>'rryaa','ッリェー'=>'rryee','ッリィー'=>'rryii','ッリョー'=>'rryoo','ッリュー'=>'rryuu', 1419 'ッシャー'=>'sshaa','ッシェー'=>'sshee','ッシー'=>'sshii','ッショー'=>'sshoo','ッシュー'=>'sshuu', 1420 'ッチャー'=>'cchaa','ッチェー'=>'cchee','ッチー'=>'cchii','ッチョー'=>'cchoo','ッチュー'=>'cchuu', 1421 'ッティー'=>'ttii', 1422 'ッヂィー'=>'ddii', 1423 1424 // 3 character syllables - doubled vowels 1425 'ファー'=>'faa','フェー'=>'fee','フィー'=>'fii','フォー'=>'foo', 1426 'フャー'=>'fyaa','フェー'=>'fyee','フィー'=>'fyii','フョー'=>'fyoo','フュー'=>'fyuu', 1427 'ヒャー'=>'hyaa','ヒェー'=>'hyee','ヒィー'=>'hyii','ヒョー'=>'hyoo','ヒュー'=>'hyuu', 1428 'ビャー'=>'byaa','ビェー'=>'byee','ビィー'=>'byii','ビョー'=>'byoo','ビュー'=>'byuu', 1429 'ピャー'=>'pyaa','ピェー'=>'pyee','ピィー'=>'pyii','ピョー'=>'pyoo','ピュー'=>'pyuu', 1430 'キャー'=>'kyaa','キェー'=>'kyee','キィー'=>'kyii','キョー'=>'kyoo','キュー'=>'kyuu', 1431 'ギャー'=>'gyaa','ギェー'=>'gyee','ギィー'=>'gyii','ギョー'=>'gyoo','ギュー'=>'gyuu', 1432 'ミャー'=>'myaa','ミェー'=>'myee','ミィー'=>'myii','ミョー'=>'myoo','ミュー'=>'myuu', 1433 'ニャー'=>'nyaa','ニェー'=>'nyee','ニィー'=>'nyii','ニョー'=>'nyoo','ニュー'=>'nyuu', 1434 'リャー'=>'ryaa','リェー'=>'ryee','リィー'=>'ryii','リョー'=>'ryoo','リュー'=>'ryuu', 1435 'シャー'=>'shaa','シェー'=>'shee','シー'=>'shii','ショー'=>'shoo','シュー'=>'shuu', 1436 'ジャー'=>'jaa','ジェー'=>'jee','ジー'=>'jii','ジョー'=>'joo','ジュー'=>'juu', 1437 'スァー'=>'swaa','スェー'=>'swee','スィー'=>'swii','スォー'=>'swoo','スゥー'=>'swuu', 1438 'デァー'=>'daa','デェー'=>'dee','ディー'=>'dii','デォー'=>'doo','デゥー'=>'duu', 1439 'チャー'=>'chaa','チェー'=>'chee','チー'=>'chii','チョー'=>'choo','チュー'=>'chuu', 1440 'ヂャー'=>'dyaa','ヂェー'=>'dyee','ヂィー'=>'dyii','ヂョー'=>'dyoo','ヂュー'=>'dyuu', 1441 'ツャー'=>'tsaa','ツェー'=>'tsee','ツィー'=>'tsii','ツョー'=>'tsoo','ツー'=>'tsuu', 1442 'トァー'=>'twaa','トェー'=>'twee','トィー'=>'twii','トォー'=>'twoo','トゥー'=>'twuu', 1443 'ドァー'=>'dwaa','ドェー'=>'dwee','ドィー'=>'dwii','ドォー'=>'dwoo','ドゥー'=>'dwuu', 1444 'ウァー'=>'whaa','ウェー'=>'whee','ウィー'=>'whii','ウォー'=>'whoo','ウゥー'=>'whuu', 1445 'ヴャー'=>'vyaa','ヴェー'=>'vyee','ヴィー'=>'vyii','ヴョー'=>'vyoo','ヴュー'=>'vyuu', 1446 'ヴァー'=>'vaa','ヴェー'=>'vee','ヴィー'=>'vii','ヴォー'=>'voo','ヴー'=>'vuu', 1447 'ウェー'=>'wee','ウィー'=>'wii', 1448 'イェー'=>'yee', 1449 'ティー'=>'tii', 1450 'ヂィー'=>'dii', 1451 1452 // 3 character syllables - doubled consonants 1453 'ッビャ'=>'bbya','ッビェ'=>'bbye','ッビィ'=>'bbyi','ッビョ'=>'bbyo','ッビュ'=>'bbyu', 1454 'ッピャ'=>'ppya','ッピェ'=>'ppye','ッピィ'=>'ppyi','ッピョ'=>'ppyo','ッピュ'=>'ppyu', 1455 'ッキャ'=>'kkya','ッキェ'=>'kkye','ッキィ'=>'kkyi','ッキョ'=>'kkyo','ッキュ'=>'kkyu', 1456 'ッギャ'=>'ggya','ッギェ'=>'ggye','ッギィ'=>'ggyi','ッギョ'=>'ggyo','ッギュ'=>'ggyu', 1457 'ッミャ'=>'mmya','ッミェ'=>'mmye','ッミィ'=>'mmyi','ッミョ'=>'mmyo','ッミュ'=>'mmyu', 1458 'ッニャ'=>'nnya','ッニェ'=>'nnye','ッニィ'=>'nnyi','ッニョ'=>'nnyo','ッニュ'=>'nnyu', 1459 'ッリャ'=>'rrya','ッリェ'=>'rrye','ッリィ'=>'rryi','ッリョ'=>'rryo','ッリュ'=>'rryu', 1460 'ッシャ'=>'ssha','ッシェ'=>'sshe','ッシ'=>'sshi','ッショ'=>'ssho','ッシュ'=>'sshu', 1461 'ッチャ'=>'ccha','ッチェ'=>'cche','ッチ'=>'cchi','ッチョ'=>'ccho','ッチュ'=>'cchu', 1462 'ッティ'=>'tti', 1463 'ッヂィ'=>'ddi', 1464 1465 // 3 character syllables - doubled vowel and consonants 1466 'ッバー'=>'bbaa','ッベー'=>'bbee','ッビー'=>'bbii','ッボー'=>'bboo','ッブー'=>'bbuu', 1467 'ッパー'=>'ppaa','ッペー'=>'ppee','ッピー'=>'ppii','ッポー'=>'ppoo','ップー'=>'ppuu', 1468 'ッケー'=>'kkee','ッキー'=>'kkii','ッコー'=>'kkoo','ックー'=>'kkuu','ッカー'=>'kkaa', 1469 'ッガー'=>'ggaa','ッゲー'=>'ggee','ッギー'=>'ggii','ッゴー'=>'ggoo','ッグー'=>'gguu', 1470 'ッマー'=>'maa','ッメー'=>'mee','ッミー'=>'mii','ッモー'=>'moo','ッムー'=>'muu', 1471 'ッナー'=>'nnaa','ッネー'=>'nnee','ッニー'=>'nnii','ッノー'=>'nnoo','ッヌー'=>'nnuu', 1472 'ッラー'=>'rraa','ッレー'=>'rree','ッリー'=>'rrii','ッロー'=>'rroo','ッルー'=>'rruu', 1473 'ッサー'=>'ssaa','ッセー'=>'ssee','ッシー'=>'sshii','ッソー'=>'ssoo','ッスー'=>'ssuu', 1474 'ッザー'=>'zzaa','ッゼー'=>'zzee','ッジー'=>'jjii','ッゾー'=>'zzoo','ッズー'=>'zzuu', 1475 'ッター'=>'ttaa','ッテー'=>'ttee','ッチー'=>'chii','ットー'=>'ttoo','ッツー'=>'ttsuu', 1476 'ッダー'=>'ddaa','ッデー'=>'ddee','ッヂー'=>'ddii','ッドー'=>'ddoo','ッヅー'=>'dduu', 1477 1478 // 2 character syllables - normal 1479 'ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フゥ'=>'fu', 1480 // 'フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo','フュ'=>'fyu', 1481 'フャ'=>'fa','フェ'=>'fe','フィ'=>'fi','フョ'=>'fo','フュ'=>'fu', 1482 'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu', 1483 'ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo','ビュ'=>'byu', 1484 'ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu', 1485 'キャ'=>'kya','キェ'=>'kye','キィ'=>'kyi','キョ'=>'kyo','キュ'=>'kyu', 1486 'ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu', 1487 'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu', 1488 'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu', 1489 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu', 1490 'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu', 1491 'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju', 1492 'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu', 1493 'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du', 1494 'チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu', 1495 // 'ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi','ヂョ'=>'dyo','ヂュ'=>'dyu', 1496 'ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu', 1497 'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu', 1498 'ドァ'=>'dwa','ドェ'=>'dwe','ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu', 1499 'ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi','ウォ'=>'who','ウゥ'=>'whu', 1500 'ヴャ'=>'vya','ヴェ'=>'vye','ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu', 1501 'ヴァ'=>'va','ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu', 1502 'ウェ'=>'we','ウィ'=>'wi', 1503 'イェ'=>'ye', 1504 'ティ'=>'ti', 1505 'ヂィ'=>'di', 1506 1507 // 2 character syllables - doubled vocal 1508 'アー'=>'aa','エー'=>'ee','イー'=>'ii','オー'=>'oo','ウー'=>'uu', 1509 'ダー'=>'daa','デー'=>'dee','ヂー'=>'dii','ドー'=>'doo','ヅー'=>'duu', 1510 'ハー'=>'haa','ヘー'=>'hee','ヒー'=>'hii','ホー'=>'hoo','フー'=>'fuu', 1511 'バー'=>'baa','ベー'=>'bee','ビー'=>'bii','ボー'=>'boo','ブー'=>'buu', 1512 'パー'=>'paa','ペー'=>'pee','ピー'=>'pii','ポー'=>'poo','プー'=>'puu', 1513 'ケー'=>'kee','キー'=>'kii','コー'=>'koo','クー'=>'kuu','カー'=>'kaa', 1514 'ガー'=>'gaa','ゲー'=>'gee','ギー'=>'gii','ゴー'=>'goo','グー'=>'guu', 1515 'マー'=>'maa','メー'=>'mee','ミー'=>'mii','モー'=>'moo','ムー'=>'muu', 1516 'ナー'=>'naa','ネー'=>'nee','ニー'=>'nii','ノー'=>'noo','ヌー'=>'nuu', 1517 'ラー'=>'raa','レー'=>'ree','リー'=>'rii','ロー'=>'roo','ルー'=>'ruu', 1518 'サー'=>'saa','セー'=>'see','シー'=>'shii','ソー'=>'soo','スー'=>'suu', 1519 'ザー'=>'zaa','ゼー'=>'zee','ジー'=>'jii','ゾー'=>'zoo','ズー'=>'zuu', 1520 'ター'=>'taa','テー'=>'tee','チー'=>'chii','トー'=>'too','ツー'=>'tsuu', 1521 'ワー'=>'waa','ヲー'=>'woo', 1522 'ヤー'=>'yaa','ヨー'=>'yoo','ユー'=>'yuu', 1523 'ヵー'=>'kaa','ヶー'=>'kee', 1524 // old characters 1525 'ヱー'=>'wee','ヰー'=>'wii', 1526 1527 // seperate katakana 'n' 1528 'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u', 1529 'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンユ'=>'n_yu', 1530 1531 // 2 character syllables - doubled consonants 1532 'ッバ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu', 1533 'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッポ'=>'ppo','ップ'=>'ppu', 1534 'ッケ'=>'kke','ッキ'=>'kki','ッコ'=>'kko','ック'=>'kku','ッカ'=>'kka', 1535 'ッガ'=>'gga','ッゲ'=>'gge','ッギ'=>'ggi','ッゴ'=>'ggo','ッグ'=>'ggu', 1536 'ッマ'=>'ma','ッメ'=>'me','ッミ'=>'mi','ッモ'=>'mo','ッム'=>'mu', 1537 'ッナ'=>'nna','ッネ'=>'nne','ッニ'=>'nni','ッノ'=>'nno','ッヌ'=>'nnu', 1538 'ッラ'=>'rra','ッレ'=>'rre','ッリ'=>'rri','ッロ'=>'rro','ッル'=>'rru', 1539 'ッサ'=>'ssa','ッセ'=>'sse','ッシ'=>'sshi','ッソ'=>'sso','ッス'=>'ssu', 1540 'ッザ'=>'zza','ッゼ'=>'zze','ッジ'=>'jji','ッゾ'=>'zzo','ッズ'=>'zzu', 1541 'ッタ'=>'tta','ッテ'=>'tte','ッチ'=>'cchi','ット'=>'tto','ッツ'=>'ttsu', 1542 'ッダ'=>'dda','ッデ'=>'dde','ッヂ'=>'ddi','ッド'=>'ddo','ッヅ'=>'ddu', 1543 1544 // 1 character syllables 1545 'ア'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','ン'=>'n', 1546 'ハ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho','フ'=>'fu', 1547 'バ'=>'ba','ベ'=>'be','ビ'=>'bi','ボ'=>'bo','ブ'=>'bu', 1548 'パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ポ'=>'po','プ'=>'pu', 1549 'ケ'=>'ke','キ'=>'ki','コ'=>'ko','ク'=>'ku','カ'=>'ka', 1550 'ガ'=>'ga','ゲ'=>'ge','ギ'=>'gi','ゴ'=>'go','グ'=>'gu', 1551 'マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu', 1552 'ナ'=>'na','ネ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu', 1553 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ロ'=>'ro','ル'=>'ru', 1554 'サ'=>'sa','セ'=>'se','シ'=>'shi','ソ'=>'so','ス'=>'su', 1555 'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu', 1556 'タ'=>'ta','テ'=>'te','チ'=>'chi','ト'=>'to','ツ'=>'tsu', 1557 'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du', 1558 'ワ'=>'wa','ヲ'=>'wo', 1559 'ヤ'=>'ya','ヨ'=>'yo','ユ'=>'yu', 1560 'ヵ'=>'ka','ヶ'=>'ke', 1561 // old characters 1562 'ヱ'=>'we','ヰ'=>'wi', 1563 1564 // convert what's left (probably only kicks in when something's missing above) 1565 'ァ'=>'a','ェ'=>'e','ィ'=>'i','ォ'=>'o','ゥ'=>'u', 1566 'ャ'=>'ya','ョ'=>'yo','ュ'=>'yu', 1567 1568 // special characters 1569 '・'=>'_','、'=>'_', 1570 'ー'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise 1571 1572 // 'ラ'=>'la','レ'=>'le','リ'=>'li','ロ'=>'lo','ル'=>'lu', 1573 // 'チャ'=>'cya','チェ'=>'cye','チィ'=>'cyi','チョ'=>'cyo','チュ'=>'cyu', 1574 //'デャ'=>'dha','デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu', 1575 // 'リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi','リョ'=>'lyo','リュ'=>'lyu', 1576 // 'テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho','テュ'=>'thu', 1577 //'ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi','フォ'=>'fwo','フゥ'=>'fwu', 1578 //'チャ'=>'tya','チェ'=>'tye','チィ'=>'tyi','チョ'=>'tyo','チュ'=>'tyu', 1579 // 'ジャ'=>'jya','ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu', 1580 // 'ジャ'=>'zha','ジェ'=>'zhe','ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu', 1581 //'ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi','ジョ'=>'zyo','ジュ'=>'zyu', 1582 //'シャ'=>'sya','シェ'=>'sye','シィ'=>'syi','ショ'=>'syo','シュ'=>'syu', 1583 //'シ'=>'ci','フ'=>'hu',シ'=>'si','チ'=>'ti','ツ'=>'tu','イ'=>'yi','ヂ'=>'dzi', 1584 1585 // "Greeklish" 1586 'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Π'=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps', 1587 'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','π'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps', 1588 1589 // Thai 1590 'ก'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch', 1591 'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','ญ'=>'y','ฎ'=>'d','ฏ'=>'t','ฐ'=>'th', 1592 'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th', 1593 'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','ฝ'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph', 1594 'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue', 1595 'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h', 1596 'ะ'=>'a','ั'=>'a','รร'=>'a','า'=>'a','ๅ'=>'a','ำ'=>'am','ํา'=>'am', 1597 'ิ'=>'i','ี'=>'i','ึ'=>'ue','ี'=>'ue','ุ'=>'u','ู'=>'u', 1598 'เ'=>'e','แ'=>'ae','โ'=>'o','อ'=>'o', 1599 'ียะ'=>'ia','ีย'=>'ia','ือะ'=>'uea','ือ'=>'uea','ัวะ'=>'ua','ัว'=>'ua', 1600 'ใ'=>'ai','ไ'=>'ai','ัย'=>'ai','าย'=>'ai','าว'=>'ao', 1601 'ุย'=>'ui','อย'=>'oi','ือย'=>'ueai','วย'=>'uai', 1602 'ิว'=>'io','็ว'=>'eo','ียว'=>'iao', 1603 '่'=>'','้'=>'','๊'=>'','๋'=>'','็'=>'', 1604 '์'=>'','๎'=>'','ํ'=>'','ฺ'=>'', 1605 'ๆ'=>'2','๏'=>'o','ฯ'=>'-','๚'=>'-','๛'=>'-', 1606 '๐'=>'0','๑'=>'1','๒'=>'2','๓'=>'3','๔'=>'4', 1607 '๕'=>'5','๖'=>'6','๗'=>'7','๘'=>'8','๙'=>'9', 1608 1609 // Korean 1610 'ㄱ'=>'k','ㅋ'=>'kh','ㄲ'=>'kk','ㄷ'=>'t','ㅌ'=>'th','ㄸ'=>'tt','ㅂ'=>'p', 1611 'ㅍ'=>'ph','ㅃ'=>'pp','ㅈ'=>'c','ㅊ'=>'ch','ㅉ'=>'cc','ㅅ'=>'s','ㅆ'=>'ss', 1612 'ㅎ'=>'h','ㅇ'=>'ng','ㄴ'=>'n','ㄹ'=>'l','ㅁ'=>'m', 'ㅏ'=>'a','ㅓ'=>'e','ㅗ'=>'o', 1613 'ㅜ'=>'wu','ㅡ'=>'u','ㅣ'=>'i','ㅐ'=>'ay','ㅔ'=>'ey','ㅚ'=>'oy','ㅘ'=>'wa','ㅝ'=>'we', 1614 'ㅟ'=>'wi','ㅙ'=>'way','ㅞ'=>'wey','ㅢ'=>'uy','ㅑ'=>'ya','ㅕ'=>'ye','ㅛ'=>'oy', 1615 'ㅠ'=>'yu','ㅒ'=>'yay','ㅖ'=>'yey', 1616); 1617 1618 1619