1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license LGPL (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * check for mb_string support 11 */ 12if(!defined('UTF8_MBSTRING')){ 13 if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){ 14 define('UTF8_MBSTRING',1); 15 }else{ 16 define('UTF8_MBSTRING',0); 17 } 18} 19 20if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } 21 22if(!function_exists('utf8_encodeFN')){ 23 /** 24 * URL-Encode a filename to allow unicodecharacters 25 * 26 * Slashes are not encoded 27 * 28 * When the second parameter is true the string will 29 * be encoded only if non ASCII characters are detected - 30 * This makes it safe to run it multiple times on the 31 * same string (default is true) 32 * 33 * @author Andreas Gohr <andi@splitbrain.org> 34 * @see urlencode 35 */ 36 function utf8_encodeFN($file,$safe=true){ 37 if($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#',$file)){ 38 return $file; 39 } 40 $file = urlencode($file); 41 $file = str_replace('%2F','/',$file); 42 return $file; 43 } 44} 45 46if(!function_exists('utf8_decodeFN')){ 47 /** 48 * URL-Decode a filename 49 * 50 * This is just a wrapper around urldecode 51 * 52 * @author Andreas Gohr <andi@splitbrain.org> 53 * @see urldecode 54 */ 55 function utf8_decodeFN($file){ 56 $file = urldecode($file); 57 return $file; 58 } 59} 60 61if(!function_exists('utf8_isASCII')){ 62 /** 63 * Checks if a string contains 7bit ASCII only 64 * 65 * @author Andreas Haerter <netzmeister@andreas-haerter.de> 66 */ 67 function utf8_isASCII($str){ 68 return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); 69 } 70} 71 72if(!function_exists('utf8_strip')){ 73 /** 74 * Strips all highbyte chars 75 * 76 * Returns a pure ASCII7 string 77 * 78 * @author Andreas Gohr <andi@splitbrain.org> 79 */ 80 function utf8_strip($str){ 81 $ascii = ''; 82 $len = strlen($str); 83 for($i=0; $i<$len; $i++){ 84 if(ord($str{$i}) <128){ 85 $ascii .= $str{$i}; 86 } 87 } 88 return $ascii; 89 } 90} 91 92if(!function_exists('utf8_check')){ 93 /** 94 * Tries to detect if a string is in Unicode encoding 95 * 96 * @author <bmorel@ssi.fr> 97 * @link http://www.php.net/manual/en/function.utf8-encode.php 98 */ 99 function utf8_check($Str) { 100 $len = strlen($Str); 101 for ($i=0; $i<$len; $i++) { 102 $b = ord($Str[$i]); 103 if ($b < 0x80) continue; # 0bbbbbbb 104 elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb 105 elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb 106 elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb 107 elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb 108 elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b 109 else return false; # Does not match any model 110 111 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 112 if ((++$i == $len) || ((ord($Str[$i]) & 0xC0) != 0x80)) 113 return false; 114 } 115 } 116 return true; 117 } 118} 119 120if(!function_exists('utf8_strlen')){ 121 /** 122 * Unicode aware replacement for strlen() 123 * 124 * utf8_decode() converts characters that are not in ISO-8859-1 125 * to '?', which, for the purpose of counting, is alright - It's 126 * even faster than mb_strlen. 127 * 128 * @author <chernyshevsky at hotmail dot com> 129 * @see strlen() 130 * @see utf8_decode() 131 */ 132 function utf8_strlen($string){ 133 return strlen(utf8_decode($string)); 134 } 135} 136 137if(!function_exists('utf8_substr')){ 138 /** 139 * UTF-8 aware alternative to substr 140 * 141 * Return part of a string given character offset (and optionally length) 142 * 143 * @author Harry Fuecks <hfuecks@gmail.com> 144 * @author Chris Smith <chris@jalakai.co.uk> 145 * @param string 146 * @param integer number of UTF-8 characters offset (from left) 147 * @param integer (optional) length in UTF-8 characters from offset 148 * @return mixed string or false if failure 149 */ 150 function utf8_substr($str, $offset, $length = null) { 151 if(UTF8_MBSTRING){ 152 if( $length === null ){ 153 return mb_substr($str, $offset); 154 }else{ 155 return mb_substr($str, $offset, $length); 156 } 157 } 158 159 /* 160 * Notes: 161 * 162 * no mb string support, so we'll use pcre regex's with 'u' flag 163 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 164 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 165 * 166 * substr documentation states false can be returned in some cases (e.g. offset > string length) 167 * mb_substr never returns false, it will return an empty string instead. 168 * 169 * calculating the number of characters in the string is a relatively expensive operation, so 170 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 171 */ 172 173 // cast parameters to appropriate types to avoid multiple notices/warnings 174 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 175 $offset = (int)$offset; 176 if (!is_null($length)) $length = (int)$length; 177 178 // handle trivial cases 179 if ($length === 0) return ''; 180 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 181 182 $offset_pattern = ''; 183 $length_pattern = ''; 184 185 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 186 if ($offset < 0) { 187 $strlen = strlen(utf8_decode($str)); // see notes 188 $offset = $strlen + $offset; 189 if ($offset < 0) $offset = 0; 190 } 191 192 // establish a pattern for offset, a non-captured group equal in length to offset 193 if ($offset > 0) { 194 $Ox = (int)($offset/65535); 195 $Oy = $offset%65535; 196 197 if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}'; 198 $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})'; 199 } else { 200 $offset_pattern = '^'; // offset == 0; just anchor the pattern 201 } 202 203 // establish a pattern for length 204 if (is_null($length)) { 205 $length_pattern = '(.*)$'; // the rest of the string 206 } else { 207 208 if (!isset($strlen)) $strlen = strlen(utf8_decode($str)); // see notes 209 if ($offset > $strlen) return ''; // another trivial case 210 211 if ($length > 0) { 212 213 $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string 214 215 $Lx = (int)($length/65535); 216 $Ly = $length%65535; 217 218 // +ve length requires ... a captured group of length characters 219 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 220 $length_pattern = '('.$length_pattern.'.{'.$Ly.'})'; 221 222 } else if ($length < 0) { 223 224 if ($length < ($offset - $strlen)) return ''; 225 226 $Lx = (int)((-$length)/65535); 227 $Ly = (-$length)%65535; 228 229 // -ve length requires ... capture everything except a group of -length characters 230 // anchored at the tail-end of the string 231 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 232 $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$'; 233 } 234 } 235 236 if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return ''; 237 return $match[1]; 238 } 239} 240 241if(!function_exists('utf8_substr_replace')){ 242 /** 243 * Unicode aware replacement for substr_replace() 244 * 245 * @author Andreas Gohr <andi@splitbrain.org> 246 * @see substr_replace() 247 */ 248 function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 249 $ret = ''; 250 if($start>0) $ret .= utf8_substr($string, 0, $start); 251 $ret .= $replacement; 252 $ret .= utf8_substr($string, $start+$length); 253 return $ret; 254 } 255} 256 257if(!function_exists('utf8_ltrim')){ 258 /** 259 * Unicode aware replacement for ltrim() 260 * 261 * @author Andreas Gohr <andi@splitbrain.org> 262 * @see ltrim() 263 * @return string 264 */ 265 function utf8_ltrim($str,$charlist=''){ 266 if($charlist == '') return ltrim($str); 267 268 //quote charlist for use in a characterclass 269 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 270 271 return preg_replace('/^['.$charlist.']+/u','',$str); 272 } 273} 274 275if(!function_exists('utf8_rtrim')){ 276 /** 277 * Unicode aware replacement for rtrim() 278 * 279 * @author Andreas Gohr <andi@splitbrain.org> 280 * @see rtrim() 281 * @return string 282 */ 283 function utf8_rtrim($str,$charlist=''){ 284 if($charlist == '') return rtrim($str); 285 286 //quote charlist for use in a characterclass 287 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 288 289 return preg_replace('/['.$charlist.']+$/u','',$str); 290 } 291} 292 293if(!function_exists('utf8_trim')){ 294 /** 295 * Unicode aware replacement for trim() 296 * 297 * @author Andreas Gohr <andi@splitbrain.org> 298 * @see trim() 299 * @return string 300 */ 301 function utf8_trim($str,$charlist='') { 302 if($charlist == '') return trim($str); 303 304 return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist); 305 } 306} 307 308if(!function_exists('utf8_strtolower')){ 309 /** 310 * This is a unicode aware replacement for strtolower() 311 * 312 * Uses mb_string extension if available 313 * 314 * @author Leo Feyer <leo@typolight.org> 315 * @see strtolower() 316 * @see utf8_strtoupper() 317 */ 318 function utf8_strtolower($string){ 319 if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8'); 320 321 global $UTF8_UPPER_TO_LOWER; 322 return strtr($string,$UTF8_UPPER_TO_LOWER); 323 } 324} 325 326if(!function_exists('utf8_strtoupper')){ 327 /** 328 * This is a unicode aware replacement for strtoupper() 329 * 330 * Uses mb_string extension if available 331 * 332 * @author Leo Feyer <leo@typolight.org> 333 * @see strtoupper() 334 * @see utf8_strtoupper() 335 */ 336 function utf8_strtoupper($string){ 337 if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); 338 339 global $UTF8_LOWER_TO_UPPER; 340 return strtr($string,$UTF8_LOWER_TO_UPPER); 341 } 342} 343 344if(!function_exists('utf8_ucfirst')){ 345 /** 346 * UTF-8 aware alternative to ucfirst 347 * Make a string's first character uppercase 348 * 349 * @author Harry Fuecks 350 * @param string 351 * @return string with first character as upper case (if applicable) 352 */ 353 function utf8_ucfirst($str){ 354 switch ( utf8_strlen($str) ) { 355 case 0: 356 return ''; 357 case 1: 358 return utf8_strtoupper($str); 359 default: 360 preg_match('/^(.{1})(.*)$/us', $str, $matches); 361 return utf8_strtoupper($matches[1]).$matches[2]; 362 } 363 } 364} 365 366if(!function_exists('utf8_ucwords')){ 367 /** 368 * UTF-8 aware alternative to ucwords 369 * Uppercase the first character of each word in a string 370 * 371 * @author Harry Fuecks 372 * @param string 373 * @return string with first char of each word uppercase 374 * @see http://www.php.net/ucwords 375 */ 376 function utf8_ucwords($str) { 377 // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; 378 // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns 379 // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords 380 $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; 381 382 return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str); 383 } 384 385 /** 386 * Callback function for preg_replace_callback call in utf8_ucwords 387 * You don't need to call this yourself 388 * 389 * @author Harry Fuecks 390 * @param array of matches corresponding to a single word 391 * @return string with first char of the word in uppercase 392 * @see utf8_ucwords 393 * @see utf8_strtoupper 394 */ 395 function utf8_ucwords_callback($matches) { 396 $leadingws = $matches[2]; 397 $ucfirst = utf8_strtoupper($matches[3]); 398 $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1); 399 return $leadingws . $ucword; 400 } 401} 402 403if(!function_exists('utf8_deaccent')){ 404 /** 405 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 406 * 407 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 408 * letters. Default is to deaccent both cases ($case = 0) 409 * 410 * @author Andreas Gohr <andi@splitbrain.org> 411 */ 412 function utf8_deaccent($string,$case=0){ 413 if($case <= 0){ 414 global $UTF8_LOWER_ACCENTS; 415 $string = strtr($string,$UTF8_LOWER_ACCENTS); 416 } 417 if($case >= 0){ 418 global $UTF8_UPPER_ACCENTS; 419 $string = strtr($string,$UTF8_UPPER_ACCENTS); 420 } 421 return $string; 422 } 423} 424 425if(!function_exists('utf8_romanize')){ 426 /** 427 * Romanize a non-latin string 428 * 429 * @author Andreas Gohr <andi@splitbrain.org> 430 */ 431 function utf8_romanize($string){ 432 if(utf8_isASCII($string)) return $string; //nothing to do 433 434 global $UTF8_ROMANIZATION; 435 return strtr($string,$UTF8_ROMANIZATION); 436 } 437} 438 439if(!function_exists('utf8_stripspecials')){ 440 /** 441 * Removes special characters (nonalphanumeric) from a UTF-8 string 442 * 443 * This function adds the controlchars 0x00 to 0x19 to the array of 444 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 445 * 446 * @author Andreas Gohr <andi@splitbrain.org> 447 * @param string $string The UTF8 string to strip of special chars 448 * @param string $repl Replace special with this string 449 * @param string $additional Additional chars to strip (used in regexp char class) 450 */ 451 function utf8_stripspecials($string,$repl='',$additional=''){ 452 global $UTF8_SPECIAL_CHARS; 453 global $UTF8_SPECIAL_CHARS2; 454 455 static $specials = null; 456 if(is_null($specials)){ 457 #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 458 $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/'); 459 } 460 461 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 462 } 463} 464 465if(!function_exists('utf8_strpos')){ 466 /** 467 * This is an Unicode aware replacement for strpos 468 * 469 * @author Leo Feyer <leo@typolight.org> 470 * @see strpos() 471 * @param string 472 * @param string 473 * @param integer 474 * @return integer 475 */ 476 function utf8_strpos($haystack, $needle, $offset=0){ 477 $comp = 0; 478 $length = null; 479 480 while (is_null($length) || $length < $offset) { 481 $pos = strpos($haystack, $needle, $offset + $comp); 482 483 if ($pos === false) 484 return false; 485 486 $length = utf8_strlen(substr($haystack, 0, $pos)); 487 488 if ($length < $offset) 489 $comp = $pos - $length; 490 } 491 492 return $length; 493 } 494} 495 496if(!function_exists('utf8_tohtml')){ 497 /** 498 * Encodes UTF-8 characters to HTML entities 499 * 500 * @author Tom N Harris <tnharris@whoopdedo.org> 501 * @author <vpribish at shopping dot com> 502 * @link http://www.php.net/manual/en/function.utf8-decode.php 503 */ 504 function utf8_tohtml ($str) { 505 $ret = ''; 506 foreach (utf8_to_unicode($str) as $cp) { 507 if ($cp < 0x80) 508 $ret .= chr($cp); 509 elseif ($cp < 0x100) 510 $ret .= "&#$cp;"; 511 else 512 $ret .= '&#x'.dechex($cp).';'; 513 } 514 return $ret; 515 } 516} 517 518if(!function_exists('utf8_unhtml')){ 519 /** 520 * Decodes HTML entities to UTF-8 characters 521 * 522 * Convert any &#..; entity to a codepoint, 523 * The entities flag defaults to only decoding numeric entities. 524 * Pass HTML_ENTITIES and named entities, including & < etc. 525 * are handled as well. Avoids the problem that would occur if you 526 * had to decode "&#38;&amp;#38;" 527 * 528 * unhtmlspecialchars(utf8_unhtml($s)) -> "&&" 529 * utf8_unhtml(unhtmlspecialchars($s)) -> "&&#38;" 530 * what it should be -> "&&#38;" 531 * 532 * @author Tom N Harris <tnharris@whoopdedo.org> 533 * @param string $str UTF-8 encoded string 534 * @param boolean $entities Flag controlling decoding of named entities. 535 * @return UTF-8 encoded string with numeric (and named) entities replaced. 536 */ 537 function utf8_unhtml($str, $entities=null) { 538 static $decoder = null; 539 if (is_null($decoder)) 540 $decoder = new utf8_entity_decoder(); 541 if (is_null($entities)) 542 return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m', 543 'utf8_decode_numeric', $str); 544 else 545 return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m', 546 array(&$decoder, 'decode'), $str); 547 } 548} 549 550if(!function_exists('utf8_decode_numeric')){ 551 function utf8_decode_numeric($ent) { 552 switch ($ent[2]) { 553 case 'X': 554 case 'x': 555 $cp = hexdec($ent[3]); 556 break; 557 default: 558 $cp = intval($ent[3]); 559 break; 560 } 561 return unicode_to_utf8(array($cp)); 562 } 563} 564 565if(!class_exists('utf8_entity_decoder')){ 566 class utf8_entity_decoder { 567 var $table; 568 function utf8_entity_decoder() { 569 $table = get_html_translation_table(HTML_ENTITIES); 570 $table = array_flip($table); 571 $this->table = array_map(array(&$this,'makeutf8'), $table); 572 } 573 function makeutf8($c) { 574 return unicode_to_utf8(array(ord($c))); 575 } 576 function decode($ent) { 577 if ($ent[1] == '#') { 578 return utf8_decode_numeric($ent); 579 } elseif (array_key_exists($ent[0],$this->table)) { 580 return $this->table[$ent[0]]; 581 } else { 582 return $ent[0]; 583 } 584 } 585 } 586} 587 588if(!function_exists('utf8_to_unicode')){ 589 /** 590 * Takes an UTF-8 string and returns an array of ints representing the 591 * Unicode characters. Astral planes are supported ie. the ints in the 592 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 593 * are not allowed. 594 * 595 * If $strict is set to true the function returns false if the input 596 * string isn't a valid UTF-8 octet sequence and raises a PHP error at 597 * level E_USER_WARNING 598 * 599 * Note: this function has been modified slightly in this library to 600 * trigger errors on encountering bad bytes 601 * 602 * @author <hsivonen@iki.fi> 603 * @author Harry Fuecks <hfuecks@gmail.com> 604 * @param string UTF-8 encoded string 605 * @param boolean Check for invalid sequences? 606 * @return mixed array of unicode code points or false if UTF-8 invalid 607 * @see unicode_to_utf8 608 * @link http://hsivonen.iki.fi/php-utf8/ 609 * @link http://sourceforge.net/projects/phputf8/ 610 */ 611 function utf8_to_unicode($str,$strict=false) { 612 $mState = 0; // cached expected number of octets after the current octet 613 // until the beginning of the next UTF8 character sequence 614 $mUcs4 = 0; // cached Unicode character 615 $mBytes = 1; // cached expected number of octets in the current sequence 616 617 $out = array(); 618 619 $len = strlen($str); 620 621 for($i = 0; $i < $len; $i++) { 622 623 $in = ord($str{$i}); 624 625 if ( $mState == 0) { 626 627 // When mState is zero we expect either a US-ASCII character or a 628 // multi-octet sequence. 629 if (0 == (0x80 & ($in))) { 630 // US-ASCII, pass straight through. 631 $out[] = $in; 632 $mBytes = 1; 633 634 } else if (0xC0 == (0xE0 & ($in))) { 635 // First octet of 2 octet sequence 636 $mUcs4 = ($in); 637 $mUcs4 = ($mUcs4 & 0x1F) << 6; 638 $mState = 1; 639 $mBytes = 2; 640 641 } else if (0xE0 == (0xF0 & ($in))) { 642 // First octet of 3 octet sequence 643 $mUcs4 = ($in); 644 $mUcs4 = ($mUcs4 & 0x0F) << 12; 645 $mState = 2; 646 $mBytes = 3; 647 648 } else if (0xF0 == (0xF8 & ($in))) { 649 // First octet of 4 octet sequence 650 $mUcs4 = ($in); 651 $mUcs4 = ($mUcs4 & 0x07) << 18; 652 $mState = 3; 653 $mBytes = 4; 654 655 } else if (0xF8 == (0xFC & ($in))) { 656 /* First octet of 5 octet sequence. 657 * 658 * This is illegal because the encoded codepoint must be either 659 * (a) not the shortest form or 660 * (b) outside the Unicode range of 0-0x10FFFF. 661 * Rather than trying to resynchronize, we will carry on until the end 662 * of the sequence and let the later error handling code catch it. 663 */ 664 $mUcs4 = ($in); 665 $mUcs4 = ($mUcs4 & 0x03) << 24; 666 $mState = 4; 667 $mBytes = 5; 668 669 } else if (0xFC == (0xFE & ($in))) { 670 // First octet of 6 octet sequence, see comments for 5 octet sequence. 671 $mUcs4 = ($in); 672 $mUcs4 = ($mUcs4 & 1) << 30; 673 $mState = 5; 674 $mBytes = 6; 675 676 } elseif($strict) { 677 /* Current octet is neither in the US-ASCII range nor a legal first 678 * octet of a multi-octet sequence. 679 */ 680 trigger_error( 681 'utf8_to_unicode: Illegal sequence identifier '. 682 'in UTF-8 at byte '.$i, 683 E_USER_WARNING 684 ); 685 return false; 686 687 } 688 689 } else { 690 691 // When mState is non-zero, we expect a continuation of the multi-octet 692 // sequence 693 if (0x80 == (0xC0 & ($in))) { 694 695 // Legal continuation. 696 $shift = ($mState - 1) * 6; 697 $tmp = $in; 698 $tmp = ($tmp & 0x0000003F) << $shift; 699 $mUcs4 |= $tmp; 700 701 /** 702 * End of the multi-octet sequence. mUcs4 now contains the final 703 * Unicode codepoint to be output 704 */ 705 if (0 == --$mState) { 706 707 /* 708 * Check for illegal sequences and codepoints. 709 */ 710 // From Unicode 3.1, non-shortest form is illegal 711 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 712 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 713 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 714 (4 < $mBytes) || 715 // From Unicode 3.2, surrogate characters are illegal 716 (($mUcs4 & 0xFFFFF800) == 0xD800) || 717 // Codepoints outside the Unicode range are illegal 718 ($mUcs4 > 0x10FFFF)) { 719 720 if($strict){ 721 trigger_error( 722 'utf8_to_unicode: Illegal sequence or codepoint '. 723 'in UTF-8 at byte '.$i, 724 E_USER_WARNING 725 ); 726 727 return false; 728 } 729 730 } 731 732 if (0xFEFF != $mUcs4) { 733 // BOM is legal but we don't want to output it 734 $out[] = $mUcs4; 735 } 736 737 //initialize UTF8 cache 738 $mState = 0; 739 $mUcs4 = 0; 740 $mBytes = 1; 741 } 742 743 } elseif($strict) { 744 /** 745 *((0xC0 & (*in) != 0x80) && (mState != 0)) 746 * Incomplete multi-octet sequence. 747 */ 748 trigger_error( 749 'utf8_to_unicode: Incomplete multi-octet '. 750 ' sequence in UTF-8 at byte '.$i, 751 E_USER_WARNING 752 ); 753 754 return false; 755 } 756 } 757 } 758 return $out; 759 } 760} 761 762if(!function_exists('unicode_to_utf8')){ 763 /** 764 * Takes an array of ints representing the Unicode characters and returns 765 * a UTF-8 string. Astral planes are supported ie. the ints in the 766 * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 767 * are not allowed. 768 * 769 * If $strict is set to true the function returns false if the input 770 * array contains ints that represent surrogates or are outside the 771 * Unicode range and raises a PHP error at level E_USER_WARNING 772 * 773 * Note: this function has been modified slightly in this library to use 774 * output buffering to concatenate the UTF-8 string (faster) as well as 775 * reference the array by it's keys 776 * 777 * @param array of unicode code points representing a string 778 * @param boolean Check for invalid sequences? 779 * @return mixed UTF-8 string or false if array contains invalid code points 780 * @author <hsivonen@iki.fi> 781 * @author Harry Fuecks <hfuecks@gmail.com> 782 * @see utf8_to_unicode 783 * @link http://hsivonen.iki.fi/php-utf8/ 784 * @link http://sourceforge.net/projects/phputf8/ 785 */ 786 function unicode_to_utf8($arr,$strict=false) { 787 if (!is_array($arr)) return ''; 788 ob_start(); 789 790 foreach (array_keys($arr) as $k) { 791 792 if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { 793 # ASCII range (including control chars) 794 795 echo chr($arr[$k]); 796 797 } else if ($arr[$k] <= 0x07ff) { 798 # 2 byte sequence 799 800 echo chr(0xc0 | ($arr[$k] >> 6)); 801 echo chr(0x80 | ($arr[$k] & 0x003f)); 802 803 } else if($arr[$k] == 0xFEFF) { 804 # Byte order mark (skip) 805 806 // nop -- zap the BOM 807 808 } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { 809 # Test for illegal surrogates 810 811 // found a surrogate 812 if($strict){ 813 trigger_error( 814 'unicode_to_utf8: Illegal surrogate '. 815 'at index: '.$k.', value: '.$arr[$k], 816 E_USER_WARNING 817 ); 818 return false; 819 } 820 821 } else if ($arr[$k] <= 0xffff) { 822 # 3 byte sequence 823 824 echo chr(0xe0 | ($arr[$k] >> 12)); 825 echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); 826 echo chr(0x80 | ($arr[$k] & 0x003f)); 827 828 } else if ($arr[$k] <= 0x10ffff) { 829 # 4 byte sequence 830 831 echo chr(0xf0 | ($arr[$k] >> 18)); 832 echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); 833 echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); 834 echo chr(0x80 | ($arr[$k] & 0x3f)); 835 836 } elseif($strict) { 837 838 trigger_error( 839 'unicode_to_utf8: Codepoint out of Unicode range '. 840 'at index: '.$k.', value: '.$arr[$k], 841 E_USER_WARNING 842 ); 843 844 // out of range 845 return false; 846 } 847 } 848 849 $result = ob_get_contents(); 850 ob_end_clean(); 851 return $result; 852 } 853} 854 855if(!function_exists('utf8_to_utf16be')){ 856 /** 857 * UTF-8 to UTF-16BE conversion. 858 * 859 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 860 */ 861 function utf8_to_utf16be(&$str, $bom = false) { 862 $out = $bom ? "\xFE\xFF" : ''; 863 if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 864 865 $uni = utf8_to_unicode($str); 866 foreach($uni as $cp){ 867 $out .= pack('n',$cp); 868 } 869 return $out; 870 } 871} 872 873if(!function_exists('utf16be_to_utf8')){ 874 /** 875 * UTF-8 to UTF-16BE conversion. 876 * 877 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 878 */ 879 function utf16be_to_utf8(&$str) { 880 $uni = unpack('n*',$str); 881 return unicode_to_utf8($uni); 882 } 883} 884 885if(!function_exists('utf8_bad_replace')){ 886 /** 887 * Replace bad bytes with an alternative character 888 * 889 * ASCII character is recommended for replacement char 890 * 891 * PCRE Pattern to locate bad bytes in a UTF-8 string 892 * Comes from W3 FAQ: Multilingual Forms 893 * Note: modified to include full ASCII range including control chars 894 * 895 * @author Harry Fuecks <hfuecks@gmail.com> 896 * @see http://www.w3.org/International/questions/qa-forms-utf-8 897 * @param string to search 898 * @param string to replace bad bytes with (defaults to '?') - use ASCII 899 * @return string 900 */ 901 function utf8_bad_replace($str, $replace = '') { 902 $UTF8_BAD = 903 '([\x00-\x7F]'. # ASCII (including control chars) 904 '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte 905 '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs 906 '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte 907 '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates 908 '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 909 '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 910 '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 911 '|(.{1}))'; # invalid byte 912 ob_start(); 913 while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { 914 if ( !isset($matches[2])) { 915 echo $matches[0]; 916 } else { 917 echo $replace; 918 } 919 $str = substr($str,strlen($matches[0])); 920 } 921 $result = ob_get_contents(); 922 ob_end_clean(); 923 return $result; 924 } 925} 926 927if(!function_exists('utf8_correctIdx')){ 928 /** 929 * adjust a byte index into a utf8 string to a utf8 character boundary 930 * 931 * @param $str string utf8 character string 932 * @param $i int byte index into $str 933 * @param $next bool direction to search for boundary, 934 * false = up (current character) 935 * true = down (next character) 936 * 937 * @return int byte index into $str now pointing to a utf8 character boundary 938 * 939 * @author chris smith <chris@jalakai.co.uk> 940 */ 941 function utf8_correctIdx(&$str,$i,$next=false) { 942 943 if ($i <= 0) return 0; 944 945 $limit = strlen($str); 946 if ($i>=$limit) return $limit; 947 948 if ($next) { 949 while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; 950 } else { 951 while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; 952 } 953 954 return $i; 955 } 956} 957 958// only needed if no mb_string available 959if(!UTF8_MBSTRING){ 960 /** 961 * UTF-8 Case lookup table 962 * 963 * This lookuptable defines the upper case letters to their correspponding 964 * lower case letter in UTF-8 965 * 966 * @author Andreas Gohr <andi@splitbrain.org> 967 */ 968 global $UTF8_LOWER_TO_UPPER; 969 if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array( 970 "z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T","s"=>"S","r"=>"R","q"=>"Q", 971 "p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J","i"=>"I","h"=>"H","g"=>"G", 972 "f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A","ῳ"=>"ῼ","ῥ"=>"Ῥ","ῡ"=>"Ῡ","ῑ"=>"Ῑ", 973 "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ", 974 "ᾣ"=>"ᾫ","ᾢ"=>"ᾪ","ᾡ"=>"ᾩ","ᾗ"=>"ᾟ","ᾖ"=>"ᾞ","ᾕ"=>"ᾝ","ᾔ"=>"ᾜ","ᾓ"=>"ᾛ","ᾒ"=>"ᾚ","ᾑ"=>"ᾙ", 975 "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ", 976 "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ", 977 "ὲ"=>"Ὲ","ά"=>"Ά","ὰ"=>"Ὰ","ὧ"=>"Ὧ","ὦ"=>"Ὦ","ὥ"=>"Ὥ","ὤ"=>"Ὤ","ὣ"=>"Ὣ","ὢ"=>"Ὢ","ὡ"=>"Ὡ", 978 "ὗ"=>"Ὗ","ὕ"=>"Ὕ","ὓ"=>"Ὓ","ὑ"=>"Ὑ","ὅ"=>"Ὅ","ὄ"=>"Ὄ","ὃ"=>"Ὃ","ὂ"=>"Ὂ","ὁ"=>"Ὁ","ὀ"=>"Ὀ", 979 "ἷ"=>"Ἷ","ἶ"=>"Ἶ","ἵ"=>"Ἵ","ἴ"=>"Ἴ","ἳ"=>"Ἳ","ἲ"=>"Ἲ","ἱ"=>"Ἱ","ἰ"=>"Ἰ","ἧ"=>"Ἧ","ἦ"=>"Ἦ", 980 "ἥ"=>"Ἥ","ἤ"=>"Ἤ","ἣ"=>"Ἣ","ἢ"=>"Ἢ","ἡ"=>"Ἡ","ἕ"=>"Ἕ","ἔ"=>"Ἔ","ἓ"=>"Ἓ","ἒ"=>"Ἒ","ἑ"=>"Ἑ", 981 "ἐ"=>"Ἐ","ἇ"=>"Ἇ","ἆ"=>"Ἆ","ἅ"=>"Ἅ","ἄ"=>"Ἄ","ἃ"=>"Ἃ","ἂ"=>"Ἂ","ἁ"=>"Ἁ","ἀ"=>"Ἀ","ỹ"=>"Ỹ", 982 "ỷ"=>"Ỷ","ỵ"=>"Ỵ","ỳ"=>"Ỳ","ự"=>"Ự","ữ"=>"Ữ","ử"=>"Ử","ừ"=>"Ừ","ứ"=>"Ứ","ủ"=>"Ủ","ụ"=>"Ụ", 983 "ợ"=>"Ợ","ỡ"=>"Ỡ","ở"=>"Ở","ờ"=>"Ờ","ớ"=>"Ớ","ộ"=>"Ộ","ỗ"=>"Ỗ","ổ"=>"Ổ","ồ"=>"Ồ","ố"=>"Ố", 984 "ỏ"=>"Ỏ","ọ"=>"Ọ","ị"=>"Ị","ỉ"=>"Ỉ","ệ"=>"Ệ","ễ"=>"Ễ","ể"=>"Ể","ề"=>"Ề","ế"=>"Ế","ẽ"=>"Ẽ", 985 "ẻ"=>"Ẻ","ẹ"=>"Ẹ","ặ"=>"Ặ","ẵ"=>"Ẵ","ẳ"=>"Ẳ","ằ"=>"Ằ","ắ"=>"Ắ","ậ"=>"Ậ","ẫ"=>"Ẫ","ẩ"=>"Ẩ", 986 "ầ"=>"Ầ","ấ"=>"Ấ","ả"=>"Ả","ạ"=>"Ạ","ẛ"=>"Ṡ","ẕ"=>"Ẕ","ẓ"=>"Ẓ","ẑ"=>"Ẑ","ẏ"=>"Ẏ","ẍ"=>"Ẍ", 987 "ẋ"=>"Ẋ","ẉ"=>"Ẉ","ẇ"=>"Ẇ","ẅ"=>"Ẅ","ẃ"=>"Ẃ","ẁ"=>"Ẁ","ṿ"=>"Ṿ","ṽ"=>"Ṽ","ṻ"=>"Ṻ","ṹ"=>"Ṹ", 988 "ṷ"=>"Ṷ","ṵ"=>"Ṵ","ṳ"=>"Ṳ","ṱ"=>"Ṱ","ṯ"=>"Ṯ","ṭ"=>"Ṭ","ṫ"=>"Ṫ","ṩ"=>"Ṩ","ṧ"=>"Ṧ","ṥ"=>"Ṥ", 989 "ṣ"=>"Ṣ","ṡ"=>"Ṡ","ṟ"=>"Ṟ","ṝ"=>"Ṝ","ṛ"=>"Ṛ","ṙ"=>"Ṙ","ṗ"=>"Ṗ","ṕ"=>"Ṕ","ṓ"=>"Ṓ","ṑ"=>"Ṑ", 990 "ṏ"=>"Ṏ","ṍ"=>"Ṍ","ṋ"=>"Ṋ","ṉ"=>"Ṉ","ṇ"=>"Ṇ","ṅ"=>"Ṅ","ṃ"=>"Ṃ","ṁ"=>"Ṁ","ḿ"=>"Ḿ","ḽ"=>"Ḽ", 991 "ḻ"=>"Ḻ","ḹ"=>"Ḹ","ḷ"=>"Ḷ","ḵ"=>"Ḵ","ḳ"=>"Ḳ","ḱ"=>"Ḱ","ḯ"=>"Ḯ","ḭ"=>"Ḭ","ḫ"=>"Ḫ","ḩ"=>"Ḩ", 992 "ḧ"=>"Ḧ","ḥ"=>"Ḥ","ḣ"=>"Ḣ","ḡ"=>"Ḡ","ḟ"=>"Ḟ","ḝ"=>"Ḝ","ḛ"=>"Ḛ","ḙ"=>"Ḙ","ḗ"=>"Ḗ","ḕ"=>"Ḕ", 993 "ḓ"=>"Ḓ","ḑ"=>"Ḑ","ḏ"=>"Ḏ","ḍ"=>"Ḍ","ḋ"=>"Ḋ","ḉ"=>"Ḉ","ḇ"=>"Ḇ","ḅ"=>"Ḅ","ḃ"=>"Ḃ","ḁ"=>"Ḁ", 994 "ֆ"=>"Ֆ","օ"=>"Օ","ք"=>"Ք","փ"=>"Փ","ւ"=>"Ւ","ց"=>"Ց","ր"=>"Ր","տ"=>"Տ","վ"=>"Վ","ս"=>"Ս", 995 "ռ"=>"Ռ","ջ"=>"Ջ","պ"=>"Պ","չ"=>"Չ","ո"=>"Ո","շ"=>"Շ","ն"=>"Ն","յ"=>"Յ","մ"=>"Մ","ճ"=>"Ճ", 996 "ղ"=>"Ղ","ձ"=>"Ձ","հ"=>"Հ","կ"=>"Կ","ծ"=>"Ծ","խ"=>"Խ","լ"=>"Լ","ի"=>"Ի","ժ"=>"Ժ","թ"=>"Թ", 997 "ը"=>"Ը","է"=>"Է","զ"=>"Զ","ե"=>"Ե","դ"=>"Դ","գ"=>"Գ","բ"=>"Բ","ա"=>"Ա","ԏ"=>"Ԏ","ԍ"=>"Ԍ", 998 "ԋ"=>"Ԋ","ԉ"=>"Ԉ","ԇ"=>"Ԇ","ԅ"=>"Ԅ","ԃ"=>"Ԃ","ԁ"=>"Ԁ","ӹ"=>"Ӹ","ӵ"=>"Ӵ","ӳ"=>"Ӳ","ӱ"=>"Ӱ", 999 "ӯ"=>"Ӯ","ӭ"=>"Ӭ","ӫ"=>"Ӫ","ө"=>"Ө","ӧ"=>"Ӧ","ӥ"=>"Ӥ","ӣ"=>"Ӣ","ӡ"=>"Ӡ","ӟ"=>"Ӟ","ӝ"=>"Ӝ", 1000 "ӛ"=>"Ӛ","ә"=>"Ә","ӗ"=>"Ӗ","ӕ"=>"Ӕ","ӓ"=>"Ӓ","ӑ"=>"Ӑ","ӎ"=>"Ӎ","ӌ"=>"Ӌ","ӊ"=>"Ӊ","ӈ"=>"Ӈ", 1001 "ӆ"=>"Ӆ","ӄ"=>"Ӄ","ӂ"=>"Ӂ","ҿ"=>"Ҿ","ҽ"=>"Ҽ","һ"=>"Һ","ҹ"=>"Ҹ","ҷ"=>"Ҷ","ҵ"=>"Ҵ","ҳ"=>"Ҳ", 1002 "ұ"=>"Ұ","ү"=>"Ү","ҭ"=>"Ҭ","ҫ"=>"Ҫ","ҩ"=>"Ҩ","ҧ"=>"Ҧ","ҥ"=>"Ҥ","ң"=>"Ң","ҡ"=>"Ҡ","ҟ"=>"Ҟ", 1003 "ҝ"=>"Ҝ","қ"=>"Қ","ҙ"=>"Ҙ","җ"=>"Җ","ҕ"=>"Ҕ","ғ"=>"Ғ","ґ"=>"Ґ","ҏ"=>"Ҏ","ҍ"=>"Ҍ","ҋ"=>"Ҋ", 1004 "ҁ"=>"Ҁ","ѿ"=>"Ѿ","ѽ"=>"Ѽ","ѻ"=>"Ѻ","ѹ"=>"Ѹ","ѷ"=>"Ѷ","ѵ"=>"Ѵ","ѳ"=>"Ѳ","ѱ"=>"Ѱ","ѯ"=>"Ѯ", 1005 "ѭ"=>"Ѭ","ѫ"=>"Ѫ","ѩ"=>"Ѩ","ѧ"=>"Ѧ","ѥ"=>"Ѥ","ѣ"=>"Ѣ","ѡ"=>"Ѡ","џ"=>"Џ","ў"=>"Ў","ѝ"=>"Ѝ", 1006 "ќ"=>"Ќ","ћ"=>"Ћ","њ"=>"Њ","љ"=>"Љ","ј"=>"Ј","ї"=>"Ї","і"=>"І","ѕ"=>"Ѕ","є"=>"Є","ѓ"=>"Ѓ", 1007 "ђ"=>"Ђ","ё"=>"Ё","ѐ"=>"Ѐ","я"=>"Я","ю"=>"Ю","э"=>"Э","ь"=>"Ь","ы"=>"Ы","ъ"=>"Ъ","щ"=>"Щ", 1008 "ш"=>"Ш","ч"=>"Ч","ц"=>"Ц","х"=>"Х","ф"=>"Ф","у"=>"У","т"=>"Т","с"=>"С","р"=>"Р","п"=>"П", 1009 "о"=>"О","н"=>"Н","м"=>"М","л"=>"Л","к"=>"К","й"=>"Й","и"=>"И","з"=>"З","ж"=>"Ж","е"=>"Е", 1010 "д"=>"Д","г"=>"Г","в"=>"В","б"=>"Б","а"=>"А","ϵ"=>"Ε","ϲ"=>"Σ","ϱ"=>"Ρ","ϰ"=>"Κ","ϯ"=>"Ϯ", 1011 "ϭ"=>"Ϭ","ϫ"=>"Ϫ","ϩ"=>"Ϩ","ϧ"=>"Ϧ","ϥ"=>"Ϥ","ϣ"=>"Ϣ","ϡ"=>"Ϡ","ϟ"=>"Ϟ","ϝ"=>"Ϝ","ϛ"=>"Ϛ", 1012 "ϙ"=>"Ϙ","ϖ"=>"Π","ϕ"=>"Φ","ϑ"=>"Θ","ϐ"=>"Β","ώ"=>"Ώ","ύ"=>"Ύ","ό"=>"Ό","ϋ"=>"Ϋ","ϊ"=>"Ϊ", 1013 "ω"=>"Ω","ψ"=>"Ψ","χ"=>"Χ","φ"=>"Φ","υ"=>"Υ","τ"=>"Τ","σ"=>"Σ","ς"=>"Σ","ρ"=>"Ρ","π"=>"Π", 1014 "ο"=>"Ο","ξ"=>"Ξ","ν"=>"Ν","μ"=>"Μ","λ"=>"Λ","κ"=>"Κ","ι"=>"Ι","θ"=>"Θ","η"=>"Η","ζ"=>"Ζ", 1015 "ε"=>"Ε","δ"=>"Δ","γ"=>"Γ","β"=>"Β","α"=>"Α","ί"=>"Ί","ή"=>"Ή","έ"=>"Έ","ά"=>"Ά","ʒ"=>"Ʒ", 1016 "ʋ"=>"Ʋ","ʊ"=>"Ʊ","ʈ"=>"Ʈ","ʃ"=>"Ʃ","ʀ"=>"Ʀ","ɵ"=>"Ɵ","ɲ"=>"Ɲ","ɯ"=>"Ɯ","ɩ"=>"Ɩ","ɨ"=>"Ɨ", 1017 "ɣ"=>"Ɣ","ɛ"=>"Ɛ","ə"=>"Ə","ɗ"=>"Ɗ","ɖ"=>"Ɖ","ɔ"=>"Ɔ","ɓ"=>"Ɓ","ȳ"=>"Ȳ","ȱ"=>"Ȱ","ȯ"=>"Ȯ", 1018 "ȭ"=>"Ȭ","ȫ"=>"Ȫ","ȩ"=>"Ȩ","ȧ"=>"Ȧ","ȥ"=>"Ȥ","ȣ"=>"Ȣ","ȟ"=>"Ȟ","ȝ"=>"Ȝ","ț"=>"Ț","ș"=>"Ș", 1019 "ȗ"=>"Ȗ","ȕ"=>"Ȕ","ȓ"=>"Ȓ","ȑ"=>"Ȑ","ȏ"=>"Ȏ","ȍ"=>"Ȍ","ȋ"=>"Ȋ","ȉ"=>"Ȉ","ȇ"=>"Ȇ","ȅ"=>"Ȅ", 1020 "ȃ"=>"Ȃ","ȁ"=>"Ȁ","ǿ"=>"Ǿ","ǽ"=>"Ǽ","ǻ"=>"Ǻ","ǹ"=>"Ǹ","ǵ"=>"Ǵ","dz"=>"Dz","ǯ"=>"Ǯ","ǭ"=>"Ǭ", 1021 "ǫ"=>"Ǫ","ǩ"=>"Ǩ","ǧ"=>"Ǧ","ǥ"=>"Ǥ","ǣ"=>"Ǣ","ǡ"=>"Ǡ","ǟ"=>"Ǟ","ǝ"=>"Ǝ","ǜ"=>"Ǜ","ǚ"=>"Ǚ", 1022 "ǘ"=>"Ǘ","ǖ"=>"Ǖ","ǔ"=>"Ǔ","ǒ"=>"Ǒ","ǐ"=>"Ǐ","ǎ"=>"Ǎ","nj"=>"Nj","lj"=>"Lj","dž"=>"Dž","ƿ"=>"Ƿ", 1023 "ƽ"=>"Ƽ","ƹ"=>"Ƹ","ƶ"=>"Ƶ","ƴ"=>"Ƴ","ư"=>"Ư","ƭ"=>"Ƭ","ƨ"=>"Ƨ","ƥ"=>"Ƥ","ƣ"=>"Ƣ","ơ"=>"Ơ", 1024 "ƞ"=>"Ƞ","ƙ"=>"Ƙ","ƕ"=>"Ƕ","ƒ"=>"Ƒ","ƌ"=>"Ƌ","ƈ"=>"Ƈ","ƅ"=>"Ƅ","ƃ"=>"Ƃ","ſ"=>"S","ž"=>"Ž", 1025 "ż"=>"Ż","ź"=>"Ź","ŷ"=>"Ŷ","ŵ"=>"Ŵ","ų"=>"Ų","ű"=>"Ű","ů"=>"Ů","ŭ"=>"Ŭ","ū"=>"Ū","ũ"=>"Ũ", 1026 "ŧ"=>"Ŧ","ť"=>"Ť","ţ"=>"Ţ","š"=>"Š","ş"=>"Ş","ŝ"=>"Ŝ","ś"=>"Ś","ř"=>"Ř","ŗ"=>"Ŗ","ŕ"=>"Ŕ", 1027 "œ"=>"Œ","ő"=>"Ő","ŏ"=>"Ŏ","ō"=>"Ō","ŋ"=>"Ŋ","ň"=>"Ň","ņ"=>"Ņ","ń"=>"Ń","ł"=>"Ł","ŀ"=>"Ŀ", 1028 "ľ"=>"Ľ","ļ"=>"Ļ","ĺ"=>"Ĺ","ķ"=>"Ķ","ĵ"=>"Ĵ","ij"=>"IJ","ı"=>"I","į"=>"Į","ĭ"=>"Ĭ","ī"=>"Ī", 1029 "ĩ"=>"Ĩ","ħ"=>"Ħ","ĥ"=>"Ĥ","ģ"=>"Ģ","ġ"=>"Ġ","ğ"=>"Ğ","ĝ"=>"Ĝ","ě"=>"Ě","ę"=>"Ę","ė"=>"Ė", 1030 "ĕ"=>"Ĕ","ē"=>"Ē","đ"=>"Đ","ď"=>"Ď","č"=>"Č","ċ"=>"Ċ","ĉ"=>"Ĉ","ć"=>"Ć","ą"=>"Ą","ă"=>"Ă", 1031 "ā"=>"Ā","ÿ"=>"Ÿ","þ"=>"Þ","ý"=>"Ý","ü"=>"Ü","û"=>"Û","ú"=>"Ú","ù"=>"Ù","ø"=>"Ø","ö"=>"Ö", 1032 "õ"=>"Õ","ô"=>"Ô","ó"=>"Ó","ò"=>"Ò","ñ"=>"Ñ","ð"=>"Ð","ï"=>"Ï","î"=>"Î","í"=>"Í","ì"=>"Ì", 1033 "ë"=>"Ë","ê"=>"Ê","é"=>"É","è"=>"È","ç"=>"Ç","æ"=>"Æ","å"=>"Å","ä"=>"Ä","ã"=>"Ã","â"=>"Â", 1034 "á"=>"Á","à"=>"À","µ"=>"Μ","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T", 1035 "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J", 1036 "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A" 1037 ); 1038 1039 /** 1040 * UTF-8 Case lookup table 1041 * 1042 * This lookuptable defines the lower case letters to their correspponding 1043 * upper case letter in UTF-8 1044 * 1045 * @author Andreas Gohr <andi@splitbrain.org> 1046 */ 1047 global $UTF8_UPPER_TO_LOWER; 1048 if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array ( 1049 "Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t","S"=>"s","R"=>"r","Q"=>"q", 1050 "P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j","I"=>"i","H"=>"h","G"=>"g", 1051 "F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a","ῼ"=>"ῳ","Ῥ"=>"ῥ","Ῡ"=>"ῡ","Ῑ"=>"ῑ", 1052 "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ", 1053 "ᾫ"=>"ᾣ","ᾪ"=>"ᾢ","ᾩ"=>"ᾡ","ᾟ"=>"ᾗ","ᾞ"=>"ᾖ","ᾝ"=>"ᾕ","ᾜ"=>"ᾔ","ᾛ"=>"ᾓ","ᾚ"=>"ᾒ","ᾙ"=>"ᾑ", 1054 "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ", 1055 "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ", 1056 "Ὲ"=>"ὲ","Ά"=>"ά","Ὰ"=>"ὰ","Ὧ"=>"ὧ","Ὦ"=>"ὦ","Ὥ"=>"ὥ","Ὤ"=>"ὤ","Ὣ"=>"ὣ","Ὢ"=>"ὢ","Ὡ"=>"ὡ", 1057 "Ὗ"=>"ὗ","Ὕ"=>"ὕ","Ὓ"=>"ὓ","Ὑ"=>"ὑ","Ὅ"=>"ὅ","Ὄ"=>"ὄ","Ὃ"=>"ὃ","Ὂ"=>"ὂ","Ὁ"=>"ὁ","Ὀ"=>"ὀ", 1058 "Ἷ"=>"ἷ","Ἶ"=>"ἶ","Ἵ"=>"ἵ","Ἴ"=>"ἴ","Ἳ"=>"ἳ","Ἲ"=>"ἲ","Ἱ"=>"ἱ","Ἰ"=>"ἰ","Ἧ"=>"ἧ","Ἦ"=>"ἦ", 1059 "Ἥ"=>"ἥ","Ἤ"=>"ἤ","Ἣ"=>"ἣ","Ἢ"=>"ἢ","Ἡ"=>"ἡ","Ἕ"=>"ἕ","Ἔ"=>"ἔ","Ἓ"=>"ἓ","Ἒ"=>"ἒ","Ἑ"=>"ἑ", 1060 "Ἐ"=>"ἐ","Ἇ"=>"ἇ","Ἆ"=>"ἆ","Ἅ"=>"ἅ","Ἄ"=>"ἄ","Ἃ"=>"ἃ","Ἂ"=>"ἂ","Ἁ"=>"ἁ","Ἀ"=>"ἀ","Ỹ"=>"ỹ", 1061 "Ỷ"=>"ỷ","Ỵ"=>"ỵ","Ỳ"=>"ỳ","Ự"=>"ự","Ữ"=>"ữ","Ử"=>"ử","Ừ"=>"ừ","Ứ"=>"ứ","Ủ"=>"ủ","Ụ"=>"ụ", 1062 "Ợ"=>"ợ","Ỡ"=>"ỡ","Ở"=>"ở","Ờ"=>"ờ","Ớ"=>"ớ","Ộ"=>"ộ","Ỗ"=>"ỗ","Ổ"=>"ổ","Ồ"=>"ồ","Ố"=>"ố", 1063 "Ỏ"=>"ỏ","Ọ"=>"ọ","Ị"=>"ị","Ỉ"=>"ỉ","Ệ"=>"ệ","Ễ"=>"ễ","Ể"=>"ể","Ề"=>"ề","Ế"=>"ế","Ẽ"=>"ẽ", 1064 "Ẻ"=>"ẻ","Ẹ"=>"ẹ","Ặ"=>"ặ","Ẵ"=>"ẵ","Ẳ"=>"ẳ","Ằ"=>"ằ","Ắ"=>"ắ","Ậ"=>"ậ","Ẫ"=>"ẫ","Ẩ"=>"ẩ", 1065 "Ầ"=>"ầ","Ấ"=>"ấ","Ả"=>"ả","Ạ"=>"ạ","Ṡ"=>"ẛ","Ẕ"=>"ẕ","Ẓ"=>"ẓ","Ẑ"=>"ẑ","Ẏ"=>"ẏ","Ẍ"=>"ẍ", 1066 "Ẋ"=>"ẋ","Ẉ"=>"ẉ","Ẇ"=>"ẇ","Ẅ"=>"ẅ","Ẃ"=>"ẃ","Ẁ"=>"ẁ","Ṿ"=>"ṿ","Ṽ"=>"ṽ","Ṻ"=>"ṻ","Ṹ"=>"ṹ", 1067 "Ṷ"=>"ṷ","Ṵ"=>"ṵ","Ṳ"=>"ṳ","Ṱ"=>"ṱ","Ṯ"=>"ṯ","Ṭ"=>"ṭ","Ṫ"=>"ṫ","Ṩ"=>"ṩ","Ṧ"=>"ṧ","Ṥ"=>"ṥ", 1068 "Ṣ"=>"ṣ","Ṡ"=>"ṡ","Ṟ"=>"ṟ","Ṝ"=>"ṝ","Ṛ"=>"ṛ","Ṙ"=>"ṙ","Ṗ"=>"ṗ","Ṕ"=>"ṕ","Ṓ"=>"ṓ","Ṑ"=>"ṑ", 1069 "Ṏ"=>"ṏ","Ṍ"=>"ṍ","Ṋ"=>"ṋ","Ṉ"=>"ṉ","Ṇ"=>"ṇ","Ṅ"=>"ṅ","Ṃ"=>"ṃ","Ṁ"=>"ṁ","Ḿ"=>"ḿ","Ḽ"=>"ḽ", 1070 "Ḻ"=>"ḻ","Ḹ"=>"ḹ","Ḷ"=>"ḷ","Ḵ"=>"ḵ","Ḳ"=>"ḳ","Ḱ"=>"ḱ","Ḯ"=>"ḯ","Ḭ"=>"ḭ","Ḫ"=>"ḫ","Ḩ"=>"ḩ", 1071 "Ḧ"=>"ḧ","Ḥ"=>"ḥ","Ḣ"=>"ḣ","Ḡ"=>"ḡ","Ḟ"=>"ḟ","Ḝ"=>"ḝ","Ḛ"=>"ḛ","Ḙ"=>"ḙ","Ḗ"=>"ḗ","Ḕ"=>"ḕ", 1072 "Ḓ"=>"ḓ","Ḑ"=>"ḑ","Ḏ"=>"ḏ","Ḍ"=>"ḍ","Ḋ"=>"ḋ","Ḉ"=>"ḉ","Ḇ"=>"ḇ","Ḅ"=>"ḅ","Ḃ"=>"ḃ","Ḁ"=>"ḁ", 1073 "Ֆ"=>"ֆ","Օ"=>"օ","Ք"=>"ք","Փ"=>"փ","Ւ"=>"ւ","Ց"=>"ց","Ր"=>"ր","Տ"=>"տ","Վ"=>"վ","Ս"=>"ս", 1074 "Ռ"=>"ռ","Ջ"=>"ջ","Պ"=>"պ","Չ"=>"չ","Ո"=>"ո","Շ"=>"շ","Ն"=>"ն","Յ"=>"յ","Մ"=>"մ","Ճ"=>"ճ", 1075 "Ղ"=>"ղ","Ձ"=>"ձ","Հ"=>"հ","Կ"=>"կ","Ծ"=>"ծ","Խ"=>"խ","Լ"=>"լ","Ի"=>"ի","Ժ"=>"ժ","Թ"=>"թ", 1076 "Ը"=>"ը","Է"=>"է","Զ"=>"զ","Ե"=>"ե","Դ"=>"դ","Գ"=>"գ","Բ"=>"բ","Ա"=>"ա","Ԏ"=>"ԏ","Ԍ"=>"ԍ", 1077 "Ԋ"=>"ԋ","Ԉ"=>"ԉ","Ԇ"=>"ԇ","Ԅ"=>"ԅ","Ԃ"=>"ԃ","Ԁ"=>"ԁ","Ӹ"=>"ӹ","Ӵ"=>"ӵ","Ӳ"=>"ӳ","Ӱ"=>"ӱ", 1078 "Ӯ"=>"ӯ","Ӭ"=>"ӭ","Ӫ"=>"ӫ","Ө"=>"ө","Ӧ"=>"ӧ","Ӥ"=>"ӥ","Ӣ"=>"ӣ","Ӡ"=>"ӡ","Ӟ"=>"ӟ","Ӝ"=>"ӝ", 1079 "Ӛ"=>"ӛ","Ә"=>"ә","Ӗ"=>"ӗ","Ӕ"=>"ӕ","Ӓ"=>"ӓ","Ӑ"=>"ӑ","Ӎ"=>"ӎ","Ӌ"=>"ӌ","Ӊ"=>"ӊ","Ӈ"=>"ӈ", 1080 "Ӆ"=>"ӆ","Ӄ"=>"ӄ","Ӂ"=>"ӂ","Ҿ"=>"ҿ","Ҽ"=>"ҽ","Һ"=>"һ","Ҹ"=>"ҹ","Ҷ"=>"ҷ","Ҵ"=>"ҵ","Ҳ"=>"ҳ", 1081 "Ұ"=>"ұ","Ү"=>"ү","Ҭ"=>"ҭ","Ҫ"=>"ҫ","Ҩ"=>"ҩ","Ҧ"=>"ҧ","Ҥ"=>"ҥ","Ң"=>"ң","Ҡ"=>"ҡ","Ҟ"=>"ҟ", 1082 "Ҝ"=>"ҝ","Қ"=>"қ","Ҙ"=>"ҙ","Җ"=>"җ","Ҕ"=>"ҕ","Ғ"=>"ғ","Ґ"=>"ґ","Ҏ"=>"ҏ","Ҍ"=>"ҍ","Ҋ"=>"ҋ", 1083 "Ҁ"=>"ҁ","Ѿ"=>"ѿ","Ѽ"=>"ѽ","Ѻ"=>"ѻ","Ѹ"=>"ѹ","Ѷ"=>"ѷ","Ѵ"=>"ѵ","Ѳ"=>"ѳ","Ѱ"=>"ѱ","Ѯ"=>"ѯ", 1084 "Ѭ"=>"ѭ","Ѫ"=>"ѫ","Ѩ"=>"ѩ","Ѧ"=>"ѧ","Ѥ"=>"ѥ","Ѣ"=>"ѣ","Ѡ"=>"ѡ","Џ"=>"џ","Ў"=>"ў","Ѝ"=>"ѝ", 1085 "Ќ"=>"ќ","Ћ"=>"ћ","Њ"=>"њ","Љ"=>"љ","Ј"=>"ј","Ї"=>"ї","І"=>"і","Ѕ"=>"ѕ","Є"=>"є","Ѓ"=>"ѓ", 1086 "Ђ"=>"ђ","Ё"=>"ё","Ѐ"=>"ѐ","Я"=>"я","Ю"=>"ю","Э"=>"э","Ь"=>"ь","Ы"=>"ы","Ъ"=>"ъ","Щ"=>"щ", 1087 "Ш"=>"ш","Ч"=>"ч","Ц"=>"ц","Х"=>"х","Ф"=>"ф","У"=>"у","Т"=>"т","С"=>"с","Р"=>"р","П"=>"п", 1088 "О"=>"о","Н"=>"н","М"=>"м","Л"=>"л","К"=>"к","Й"=>"й","И"=>"и","З"=>"з","Ж"=>"ж","Е"=>"е", 1089 "Д"=>"д","Г"=>"г","В"=>"в","Б"=>"б","А"=>"а","Ε"=>"ϵ","Σ"=>"ϲ","Ρ"=>"ϱ","Κ"=>"ϰ","Ϯ"=>"ϯ", 1090 "Ϭ"=>"ϭ","Ϫ"=>"ϫ","Ϩ"=>"ϩ","Ϧ"=>"ϧ","Ϥ"=>"ϥ","Ϣ"=>"ϣ","Ϡ"=>"ϡ","Ϟ"=>"ϟ","Ϝ"=>"ϝ","Ϛ"=>"ϛ", 1091 "Ϙ"=>"ϙ","Π"=>"ϖ","Φ"=>"ϕ","Θ"=>"ϑ","Β"=>"ϐ","Ώ"=>"ώ","Ύ"=>"ύ","Ό"=>"ό","Ϋ"=>"ϋ","Ϊ"=>"ϊ", 1092 "Ω"=>"ω","Ψ"=>"ψ","Χ"=>"χ","Φ"=>"φ","Υ"=>"υ","Τ"=>"τ","Σ"=>"σ","Σ"=>"ς","Ρ"=>"ρ","Π"=>"π", 1093 "Ο"=>"ο","Ξ"=>"ξ","Ν"=>"ν","Μ"=>"μ","Λ"=>"λ","Κ"=>"κ","Ι"=>"ι","Θ"=>"θ","Η"=>"η","Ζ"=>"ζ", 1094 "Ε"=>"ε","Δ"=>"δ","Γ"=>"γ","Β"=>"β","Α"=>"α","Ί"=>"ί","Ή"=>"ή","Έ"=>"έ","Ά"=>"ά","Ʒ"=>"ʒ", 1095 "Ʋ"=>"ʋ","Ʊ"=>"ʊ","Ʈ"=>"ʈ","Ʃ"=>"ʃ","Ʀ"=>"ʀ","Ɵ"=>"ɵ","Ɲ"=>"ɲ","Ɯ"=>"ɯ","Ɩ"=>"ɩ","Ɨ"=>"ɨ", 1096 "Ɣ"=>"ɣ","Ɛ"=>"ɛ","Ə"=>"ə","Ɗ"=>"ɗ","Ɖ"=>"ɖ","Ɔ"=>"ɔ","Ɓ"=>"ɓ","Ȳ"=>"ȳ","Ȱ"=>"ȱ","Ȯ"=>"ȯ", 1097 "Ȭ"=>"ȭ","Ȫ"=>"ȫ","Ȩ"=>"ȩ","Ȧ"=>"ȧ","Ȥ"=>"ȥ","Ȣ"=>"ȣ","Ȟ"=>"ȟ","Ȝ"=>"ȝ","Ț"=>"ț","Ș"=>"ș", 1098 "Ȗ"=>"ȗ","Ȕ"=>"ȕ","Ȓ"=>"ȓ","Ȑ"=>"ȑ","Ȏ"=>"ȏ","Ȍ"=>"ȍ","Ȋ"=>"ȋ","Ȉ"=>"ȉ","Ȇ"=>"ȇ","Ȅ"=>"ȅ", 1099 "Ȃ"=>"ȃ","Ȁ"=>"ȁ","Ǿ"=>"ǿ","Ǽ"=>"ǽ","Ǻ"=>"ǻ","Ǹ"=>"ǹ","Ǵ"=>"ǵ","Dz"=>"dz","Ǯ"=>"ǯ","Ǭ"=>"ǭ", 1100 "Ǫ"=>"ǫ","Ǩ"=>"ǩ","Ǧ"=>"ǧ","Ǥ"=>"ǥ","Ǣ"=>"ǣ","Ǡ"=>"ǡ","Ǟ"=>"ǟ","Ǝ"=>"ǝ","Ǜ"=>"ǜ","Ǚ"=>"ǚ", 1101 "Ǘ"=>"ǘ","Ǖ"=>"ǖ","Ǔ"=>"ǔ","Ǒ"=>"ǒ","Ǐ"=>"ǐ","Ǎ"=>"ǎ","Nj"=>"nj","Lj"=>"lj","Dž"=>"dž","Ƿ"=>"ƿ", 1102 "Ƽ"=>"ƽ","Ƹ"=>"ƹ","Ƶ"=>"ƶ","Ƴ"=>"ƴ","Ư"=>"ư","Ƭ"=>"ƭ","Ƨ"=>"ƨ","Ƥ"=>"ƥ","Ƣ"=>"ƣ","Ơ"=>"ơ", 1103 "Ƞ"=>"ƞ","Ƙ"=>"ƙ","Ƕ"=>"ƕ","Ƒ"=>"ƒ","Ƌ"=>"ƌ","Ƈ"=>"ƈ","Ƅ"=>"ƅ","Ƃ"=>"ƃ","S"=>"ſ","Ž"=>"ž", 1104 "Ż"=>"ż","Ź"=>"ź","Ŷ"=>"ŷ","Ŵ"=>"ŵ","Ų"=>"ų","Ű"=>"ű","Ů"=>"ů","Ŭ"=>"ŭ","Ū"=>"ū","Ũ"=>"ũ", 1105 "Ŧ"=>"ŧ","Ť"=>"ť","Ţ"=>"ţ","Š"=>"š","Ş"=>"ş","Ŝ"=>"ŝ","Ś"=>"ś","Ř"=>"ř","Ŗ"=>"ŗ","Ŕ"=>"ŕ", 1106 "Œ"=>"œ","Ő"=>"ő","Ŏ"=>"ŏ","Ō"=>"ō","Ŋ"=>"ŋ","Ň"=>"ň","Ņ"=>"ņ","Ń"=>"ń","Ł"=>"ł","Ŀ"=>"ŀ", 1107 "Ľ"=>"ľ","Ļ"=>"ļ","Ĺ"=>"ĺ","Ķ"=>"ķ","Ĵ"=>"ĵ","IJ"=>"ij","I"=>"ı","Į"=>"į","Ĭ"=>"ĭ","Ī"=>"ī", 1108 "Ĩ"=>"ĩ","Ħ"=>"ħ","Ĥ"=>"ĥ","Ģ"=>"ģ","Ġ"=>"ġ","Ğ"=>"ğ","Ĝ"=>"ĝ","Ě"=>"ě","Ę"=>"ę","Ė"=>"ė", 1109 "Ĕ"=>"ĕ","Ē"=>"ē","Đ"=>"đ","Ď"=>"ď","Č"=>"č","Ċ"=>"ċ","Ĉ"=>"ĉ","Ć"=>"ć","Ą"=>"ą","Ă"=>"ă", 1110 "Ā"=>"ā","Ÿ"=>"ÿ","Þ"=>"þ","Ý"=>"ý","Ü"=>"ü","Û"=>"û","Ú"=>"ú","Ù"=>"ù","Ø"=>"ø","Ö"=>"ö", 1111 "Õ"=>"õ","Ô"=>"ô","Ó"=>"ó","Ò"=>"ò","Ñ"=>"ñ","Ð"=>"ð","Ï"=>"ï","Î"=>"î","Í"=>"í","Ì"=>"ì", 1112 "Ë"=>"ë","Ê"=>"ê","É"=>"é","È"=>"è","Ç"=>"ç","Æ"=>"æ","Å"=>"å","Ä"=>"ä","Ã"=>"ã","Â"=>"â", 1113 "Á"=>"á","À"=>"à","Μ"=>"µ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t", 1114 "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j", 1115 "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a" 1116 ); 1117}; // end of case lookup tables 1118 1119/** 1120 * UTF-8 lookup table for lower case accented letters 1121 * 1122 * This lookuptable defines replacements for accented characters from the ASCII-7 1123 * range. This are lower case letters only. 1124 * 1125 * @author Andreas Gohr <andi@splitbrain.org> 1126 * @see utf8_deaccent() 1127 */ 1128global $UTF8_LOWER_ACCENTS; 1129if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array( 1130 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 1131 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 1132 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 1133 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 1134 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 1135 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 1136 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 1137 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 1138 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 1139 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 1140 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 1141 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 1142 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 1143 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 1144 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', 1145); 1146 1147/** 1148 * UTF-8 lookup table for upper case accented letters 1149 * 1150 * This lookuptable defines replacements for accented characters from the ASCII-7 1151 * range. This are upper case letters only. 1152 * 1153 * @author Andreas Gohr <andi@splitbrain.org> 1154 * @see utf8_deaccent() 1155 */ 1156global $UTF8_UPPER_ACCENTS; 1157if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array( 1158 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', 1159 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 1160 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', 1161 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', 1162 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', 1163 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', 1164 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', 1165 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', 1166 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 1167 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', 1168 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', 1169 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 1170 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 1171 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', 1172 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', 1173); 1174 1175/** 1176 * UTF-8 array of common special characters 1177 * 1178 * This array should contain all special characters (not a letter or digit) 1179 * defined in the various local charsets - it's not a complete list of non-alphanum 1180 * characters in UTF-8. It's not perfect but should match most cases of special 1181 * chars. 1182 * 1183 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 1184 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 1185 * 1186 * @author Andreas Gohr <andi@splitbrain.org> 1187 * @see utf8_stripspecials() 1188 */ 1189global $UTF8_SPECIAL_CHARS; 1190if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array( 1191 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 1192 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 1193 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 1194 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 1195 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 1196 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 1197 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 1198 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 1199 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 1200 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 1201 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 1202 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 1203 0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 1204 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 1205 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 1206 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 1207 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 1208 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 1209 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 1210 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 1211 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 1212 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 1213 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 1214 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 1215 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 1216 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 1217 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 1218 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 1219 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 1220 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 1221 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 1222 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 1223 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 1224 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 1225 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 1226 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 1227 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 1228 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 1229 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 1230 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 1231 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 1232 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 1233 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 1234 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 1235 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 1236 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 1237 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017, 1238 0x3018, 0x3019, 0x301a, 0x301b, 0x3036, 1239 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 1240 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 1241 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 1242 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 1243 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 1244 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 1245 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c, 1246 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b, 1247 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 1248 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea, 1249 0xffeb, 0xffec, 0xffed, 0xffee, 1250 0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703, 1251 0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b, 1252 0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713, 1253 0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b, 1254 0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf, 1255); 1256 1257// utf8 version of above data 1258global $UTF8_SPECIAL_CHARS2; 1259if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 = 1260 "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~ �'. 1261 '� ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½�'. 1262 '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. 1263 '�ׁׂ׃׳״،؛؟ـًٌٍَُِّْ٪฿–—―‗‘’‚“”�'. 1264 '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'. 1265 '⇐⇑⇒⇓⇔∀∂∃∅∆∇∈∉∋∏∑−∕∗∙√∝∞∠∧∨�'. 1266 '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'. 1267 '��┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠'. 1268 '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌▐░▒▓■▲▼◆◊●�'. 1269 '�★☎☛☞♠♣♥♦✁✂✃✄✆✇✈✉✌✍✎✏✐✑✒✓✔✕�'. 1270 '��✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✩✪✫✬✭✮✯✰✱'. 1271 '✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋�'. 1272 '�❏❐❑❒❖❘❙❚❛❜❝❞❡❢❣❤❥❦❧❿➉➓➔➘➙➚�'. 1273 '��➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶'. 1274 '➷➸➹➺➻➼➽➾'. 1275 ' 、。〃〈〉《》「」『』【】〒〔〕〖〗〘〙〚〛〶'. 1276 '�'. 1277 '�ﹼﹽ'. 1278 '!"#$%&'()*+,-./:;<=>?@[\]^`{|}~'. 1279 '⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○'. 1280 ''. 1281 ' '; 1282 1283/** 1284 * Romanization lookup table 1285 * 1286 * This lookup tables provides a way to transform strings written in a language 1287 * different from the ones based upon latin letters into plain ASCII. 1288 * 1289 * Please note: this is not a scientific transliteration table. It only works 1290 * oneway from nonlatin to ASCII and it works by simple character replacement 1291 * only. Specialities of each language are not supported. 1292 * 1293 * @author Andreas Gohr <andi@splitbrain.org> 1294 * @author Vitaly Blokhin <vitinfo@vitn.com> 1295 * @link http://www.uconv.com/translit.htm 1296 * @author Bisqwit <bisqwit@iki.fi> 1297 * @link http://kanjidict.stc.cx/hiragana.php?src=2 1298 * @link http://www.translatum.gr/converter/greek-transliteration.htm 1299 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 1300 * @link http://www.btranslations.com/resources/romanization/korean.asp 1301 * @author Arthit Suriyawongkul <arthit@gmail.com> 1302 * @author Denis Scheither <amorphis@uni-bremen.de> 1303 */ 1304global $UTF8_ROMANIZATION; 1305if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array( 1306 // scandinavian - differs from what we do in deaccent 1307 'å'=>'a','Å'=>'A','ä'=>'a','Ä'=>'A','ö'=>'o','Ö'=>'O', 1308 1309 //russian cyrillic 1310 'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G', 1311 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh', 1312 'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K', 1313 'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Н'=>'N','о'=>'o','О'=>'O', 1314 'п'=>'p','П'=>'P','р'=>'r','Р'=>'R','с'=>'s','С'=>'S','т'=>'t','Т'=>'T', 1315 'у'=>'u','У'=>'U','ф'=>'f','Ф'=>'F','х'=>'x','Х'=>'X','ц'=>'c','Ц'=>'C', 1316 'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'sch','Щ'=>'Sch','ъ'=>'', 1317 'Ъ'=>'','ы'=>'y','Ы'=>'Y','ь'=>'','Ь'=>'','э'=>'eh','Э'=>'Eh','ю'=>'ju', 1318 'Ю'=>'Ju','я'=>'ja','Я'=>'Ja', 1319 // Ukrainian cyrillic 1320 'Ґ'=>'Gh','ґ'=>'gh','Є'=>'Je','є'=>'je','І'=>'I','і'=>'i','Ї'=>'Ji','ї'=>'ji', 1321 // Georgian 1322 'ა'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th', 1323 'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','ო'=>'o','პ'=>'p','ჟ'=>'zh', 1324 'რ'=>'r','ს'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ქ'=>'kh','ღ'=>'gh','ყ'=>'q', 1325 'შ'=>'sh','ჩ'=>'ch','ც'=>'c','ძ'=>'dh','წ'=>'w','ჭ'=>'j','ხ'=>'x','ჯ'=>'jh', 1326 'ჰ'=>'xh', 1327 //Sanskrit 1328 'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry', 1329 'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','ए'=>'e','ऐ'=>'ay','ओ'=>'o','औ'=>'aw', 1330 'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh', 1331 'च'=>'c','छ'=>'ch','ज'=>'j','झ'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh', 1332 'ड'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','थ'=>'th','द'=>'d','ध'=>'dh', 1333 'न'=>'n','प'=>'p','फ'=>'ph','ब'=>'b','भ'=>'bh','म'=>'m','य'=>'z','र'=>'r', 1334 'ल'=>'l','व'=>'v','श'=>'sh','ष'=>'sqh','स'=>'s','ह'=>'x', 1335 //Hebrew 1336 'א'=>'a', 'ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'kh','ט'=>'th', 1337 'י'=>'y','ך'=>'h','כ'=>'k','ל'=>'l','ם'=>'m','מ'=>'m','ן'=>'n','נ'=>'n', 1338 'ס'=>'s','ע'=>'ah','ף'=>'f','פ'=>'p','ץ'=>'c','צ'=>'c','ק'=>'q','ר'=>'r', 1339 'ש'=>'sh','ת'=>'t', 1340 //Arabic 1341 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'g','ح'=>'xh','خ'=>'x','د'=>'d', 1342 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s\'','ض'=>'d\'', 1343 'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k', 1344 'ل'=>'l','م'=>'m','ن'=>'n','ه'=>'x\'','و'=>'u','ي'=>'i', 1345 1346 // Japanese characters (last update: 2008-05-09) 1347 1348 // Japanese hiragana 1349 1350 // 3 character syllables, っ doubles the consonant after 1351 'っちゃ'=>'ccha','っちぇ'=>'cche','っちょ'=>'ccho','っちゅ'=>'cchu', 1352 'っびゃ'=>'bbya','っびぇ'=>'bbye','っびぃ'=>'bbyi','っびょ'=>'bbyo','っびゅ'=>'bbyu', 1353 'っぴゃ'=>'ppya','っぴぇ'=>'ppye','っぴぃ'=>'ppyi','っぴょ'=>'ppyo','っぴゅ'=>'ppyu', 1354 'っちゃ'=>'ccha','っちぇ'=>'cche','っち'=>'cchi','っちょ'=>'ccho','っちゅ'=>'cchu', 1355 // 'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu', 1356 'っきゃ'=>'kkya','っきぇ'=>'kkye','っきぃ'=>'kkyi','っきょ'=>'kkyo','っきゅ'=>'kkyu', 1357 'っぎゃ'=>'ggya','っぎぇ'=>'ggye','っぎぃ'=>'ggyi','っぎょ'=>'ggyo','っぎゅ'=>'ggyu', 1358 'っみゃ'=>'mmya','っみぇ'=>'mmye','っみぃ'=>'mmyi','っみょ'=>'mmyo','っみゅ'=>'mmyu', 1359 'っにゃ'=>'nnya','っにぇ'=>'nnye','っにぃ'=>'nnyi','っにょ'=>'nnyo','っにゅ'=>'nnyu', 1360 'っりゃ'=>'rrya','っりぇ'=>'rrye','っりぃ'=>'rryi','っりょ'=>'rryo','っりゅ'=>'rryu', 1361 'っしゃ'=>'ssha','っしぇ'=>'sshe','っし'=>'sshi','っしょ'=>'ssho','っしゅ'=>'sshu', 1362 1363 // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway) 1364 'んあ'=>'n_a','んえ'=>'n_e','んい'=>'n_i','んお'=>'n_o','んう'=>'n_u', 1365 'んや'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu', 1366 1367 // 2 character syllables - normal 1368 'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo', 1369 'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu', 1370 'ひゃ'=>'hya','ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu', 1371 'びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu', 1372 'ぴゃ'=>'pya','ぴぇ'=>'pye','ぴぃ'=>'pyi','ぴょ'=>'pyo','ぴゅ'=>'pyu', 1373 'きゃ'=>'kya','きぇ'=>'kye','きぃ'=>'kyi','きょ'=>'kyo','きゅ'=>'kyu', 1374 'ぎゃ'=>'gya','ぎぇ'=>'gye','ぎぃ'=>'gyi','ぎょ'=>'gyo','ぎゅ'=>'gyu', 1375 'みゃ'=>'mya','みぇ'=>'mye','みぃ'=>'myi','みょ'=>'myo','みゅ'=>'myu', 1376 'にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu', 1377 'りゃ'=>'rya','りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu', 1378 'しゃ'=>'sha','しぇ'=>'she','し'=>'shi','しょ'=>'sho','しゅ'=>'shu', 1379 'じゃ'=>'ja','じぇ'=>'je','じょ'=>'jo','じゅ'=>'ju', 1380 'うぇ'=>'we','うぃ'=>'wi', 1381 'いぇ'=>'ye', 1382 1383 // 2 character syllables, っ doubles the consonant after 1384 'っば'=>'bba','っべ'=>'bbe','っび'=>'bbi','っぼ'=>'bbo','っぶ'=>'bbu', 1385 'っぱ'=>'ppa','っぺ'=>'ppe','っぴ'=>'ppi','っぽ'=>'ppo','っぷ'=>'ppu', 1386 'った'=>'tta','って'=>'tte','っち'=>'cchi','っと'=>'tto','っつ'=>'ttsu', 1387 'っだ'=>'dda','っで'=>'dde','っぢ'=>'ddi','っど'=>'ddo','っづ'=>'ddu', 1388 'っが'=>'gga','っげ'=>'gge','っぎ'=>'ggi','っご'=>'ggo','っぐ'=>'ggu', 1389 'っか'=>'kka','っけ'=>'kke','っき'=>'kki','っこ'=>'kko','っく'=>'kku', 1390 'っま'=>'mma','っめ'=>'mme','っみ'=>'mmi','っも'=>'mmo','っむ'=>'mmu', 1391 'っな'=>'nna','っね'=>'nne','っに'=>'nni','っの'=>'nno','っぬ'=>'nnu', 1392 'っら'=>'rra','っれ'=>'rre','っり'=>'rri','っろ'=>'rro','っる'=>'rru', 1393 'っさ'=>'ssa','っせ'=>'sse','っし'=>'sshi','っそ'=>'sso','っす'=>'ssu', 1394 'っざ'=>'zza','っぜ'=>'zze','っじ'=>'jji','っぞ'=>'zzo','っず'=>'zzu', 1395 1396 // 1 character syllabels 1397 'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ん'=>'n', 1398 'は'=>'ha','へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'fu', 1399 'ば'=>'ba','べ'=>'be','び'=>'bi','ぼ'=>'bo','ぶ'=>'bu', 1400 'ぱ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po','ぷ'=>'pu', 1401 'た'=>'ta','て'=>'te','ち'=>'chi','と'=>'to','つ'=>'tsu', 1402 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1403 'が'=>'ga','げ'=>'ge','ぎ'=>'gi','ご'=>'go','ぐ'=>'gu', 1404 'か'=>'ka','け'=>'ke','き'=>'ki','こ'=>'ko','く'=>'ku', 1405 'ま'=>'ma','め'=>'me','み'=>'mi','も'=>'mo','む'=>'mu', 1406 'な'=>'na','ね'=>'ne','に'=>'ni','の'=>'no','ぬ'=>'nu', 1407 'ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru', 1408 'さ'=>'sa','せ'=>'se','し'=>'shi','そ'=>'so','す'=>'su', 1409 'わ'=>'wa','を'=>'wo', 1410 'ざ'=>'za','ぜ'=>'ze','じ'=>'ji','ぞ'=>'zo','ず'=>'zu', 1411 'や'=>'ya','よ'=>'yo','ゆ'=>'yu', 1412 // old characters 1413 'ゑ'=>'we','ゐ'=>'wi', 1414 1415 // convert what's left (probably only kicks in when something's missing above) 1416 // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u', 1417 // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu', 1418 1419 // never seen one of those (disabled for the moment) 1420 // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu', 1421 // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu', 1422 // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu', 1423 // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu', 1424 // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu', 1425 // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu', 1426 // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu', 1427 // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu', 1428 // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu', 1429 // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu', 1430 // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu', 1431 // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu', 1432 // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu', 1433 // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu', 1434 1435 // 'spare' characters from other romanization systems 1436 // 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1437 // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu', 1438 // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su', 1439 // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu', 1440 //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu', 1441 //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu', 1442 //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu', 1443 //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu', 1444 //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi', 1445 //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju', 1446 1447 1448 // Japanese katakana 1449 1450 // 4 character syllables: ッ doubles the consonant after, ー doubles the vowel before (usualy written with macron, but we don't want that in our URLs) 1451 'ッビャー'=>'bbyaa','ッビェー'=>'bbyee','ッビィー'=>'bbyii','ッビョー'=>'bbyoo','ッビュー'=>'bbyuu', 1452 'ッピャー'=>'ppyaa','ッピェー'=>'ppyee','ッピィー'=>'ppyii','ッピョー'=>'ppyoo','ッピュー'=>'ppyuu', 1453 'ッキャー'=>'kkyaa','ッキェー'=>'kkyee','ッキィー'=>'kkyii','ッキョー'=>'kkyoo','ッキュー'=>'kkyuu', 1454 'ッギャー'=>'ggyaa','ッギェー'=>'ggyee','ッギィー'=>'ggyii','ッギョー'=>'ggyoo','ッギュー'=>'ggyuu', 1455 'ッミャー'=>'mmyaa','ッミェー'=>'mmyee','ッミィー'=>'mmyii','ッミョー'=>'mmyoo','ッミュー'=>'mmyuu', 1456 'ッニャー'=>'nnyaa','ッニェー'=>'nnyee','ッニィー'=>'nnyii','ッニョー'=>'nnyoo','ッニュー'=>'nnyuu', 1457 'ッリャー'=>'rryaa','ッリェー'=>'rryee','ッリィー'=>'rryii','ッリョー'=>'rryoo','ッリュー'=>'rryuu', 1458 'ッシャー'=>'sshaa','ッシェー'=>'sshee','ッシー'=>'sshii','ッショー'=>'sshoo','ッシュー'=>'sshuu', 1459 'ッチャー'=>'cchaa','ッチェー'=>'cchee','ッチー'=>'cchii','ッチョー'=>'cchoo','ッチュー'=>'cchuu', 1460 'ッティー'=>'ttii', 1461 'ッヂィー'=>'ddii', 1462 1463 // 3 character syllables - doubled vowels 1464 'ファー'=>'faa','フェー'=>'fee','フィー'=>'fii','フォー'=>'foo', 1465 'フャー'=>'fyaa','フェー'=>'fyee','フィー'=>'fyii','フョー'=>'fyoo','フュー'=>'fyuu', 1466 'ヒャー'=>'hyaa','ヒェー'=>'hyee','ヒィー'=>'hyii','ヒョー'=>'hyoo','ヒュー'=>'hyuu', 1467 'ビャー'=>'byaa','ビェー'=>'byee','ビィー'=>'byii','ビョー'=>'byoo','ビュー'=>'byuu', 1468 'ピャー'=>'pyaa','ピェー'=>'pyee','ピィー'=>'pyii','ピョー'=>'pyoo','ピュー'=>'pyuu', 1469 'キャー'=>'kyaa','キェー'=>'kyee','キィー'=>'kyii','キョー'=>'kyoo','キュー'=>'kyuu', 1470 'ギャー'=>'gyaa','ギェー'=>'gyee','ギィー'=>'gyii','ギョー'=>'gyoo','ギュー'=>'gyuu', 1471 'ミャー'=>'myaa','ミェー'=>'myee','ミィー'=>'myii','ミョー'=>'myoo','ミュー'=>'myuu', 1472 'ニャー'=>'nyaa','ニェー'=>'nyee','ニィー'=>'nyii','ニョー'=>'nyoo','ニュー'=>'nyuu', 1473 'リャー'=>'ryaa','リェー'=>'ryee','リィー'=>'ryii','リョー'=>'ryoo','リュー'=>'ryuu', 1474 'シャー'=>'shaa','シェー'=>'shee','シー'=>'shii','ショー'=>'shoo','シュー'=>'shuu', 1475 'ジャー'=>'jaa','ジェー'=>'jee','ジー'=>'jii','ジョー'=>'joo','ジュー'=>'juu', 1476 'スァー'=>'swaa','スェー'=>'swee','スィー'=>'swii','スォー'=>'swoo','スゥー'=>'swuu', 1477 'デァー'=>'daa','デェー'=>'dee','ディー'=>'dii','デォー'=>'doo','デゥー'=>'duu', 1478 'チャー'=>'chaa','チェー'=>'chee','チー'=>'chii','チョー'=>'choo','チュー'=>'chuu', 1479 'ヂャー'=>'dyaa','ヂェー'=>'dyee','ヂィー'=>'dyii','ヂョー'=>'dyoo','ヂュー'=>'dyuu', 1480 'ツャー'=>'tsaa','ツェー'=>'tsee','ツィー'=>'tsii','ツョー'=>'tsoo','ツー'=>'tsuu', 1481 'トァー'=>'twaa','トェー'=>'twee','トィー'=>'twii','トォー'=>'twoo','トゥー'=>'twuu', 1482 'ドァー'=>'dwaa','ドェー'=>'dwee','ドィー'=>'dwii','ドォー'=>'dwoo','ドゥー'=>'dwuu', 1483 'ウァー'=>'whaa','ウェー'=>'whee','ウィー'=>'whii','ウォー'=>'whoo','ウゥー'=>'whuu', 1484 'ヴャー'=>'vyaa','ヴェー'=>'vyee','ヴィー'=>'vyii','ヴョー'=>'vyoo','ヴュー'=>'vyuu', 1485 'ヴァー'=>'vaa','ヴェー'=>'vee','ヴィー'=>'vii','ヴォー'=>'voo','ヴー'=>'vuu', 1486 'ウェー'=>'wee','ウィー'=>'wii', 1487 'イェー'=>'yee', 1488 'ティー'=>'tii', 1489 'ヂィー'=>'dii', 1490 1491 // 3 character syllables - doubled consonants 1492 'ッビャ'=>'bbya','ッビェ'=>'bbye','ッビィ'=>'bbyi','ッビョ'=>'bbyo','ッビュ'=>'bbyu', 1493 'ッピャ'=>'ppya','ッピェ'=>'ppye','ッピィ'=>'ppyi','ッピョ'=>'ppyo','ッピュ'=>'ppyu', 1494 'ッキャ'=>'kkya','ッキェ'=>'kkye','ッキィ'=>'kkyi','ッキョ'=>'kkyo','ッキュ'=>'kkyu', 1495 'ッギャ'=>'ggya','ッギェ'=>'ggye','ッギィ'=>'ggyi','ッギョ'=>'ggyo','ッギュ'=>'ggyu', 1496 'ッミャ'=>'mmya','ッミェ'=>'mmye','ッミィ'=>'mmyi','ッミョ'=>'mmyo','ッミュ'=>'mmyu', 1497 'ッニャ'=>'nnya','ッニェ'=>'nnye','ッニィ'=>'nnyi','ッニョ'=>'nnyo','ッニュ'=>'nnyu', 1498 'ッリャ'=>'rrya','ッリェ'=>'rrye','ッリィ'=>'rryi','ッリョ'=>'rryo','ッリュ'=>'rryu', 1499 'ッシャ'=>'ssha','ッシェ'=>'sshe','ッシ'=>'sshi','ッショ'=>'ssho','ッシュ'=>'sshu', 1500 'ッチャ'=>'ccha','ッチェ'=>'cche','ッチ'=>'cchi','ッチョ'=>'ccho','ッチュ'=>'cchu', 1501 'ッティ'=>'tti', 1502 'ッヂィ'=>'ddi', 1503 1504 // 3 character syllables - doubled vowel and consonants 1505 'ッバー'=>'bbaa','ッベー'=>'bbee','ッビー'=>'bbii','ッボー'=>'bboo','ッブー'=>'bbuu', 1506 'ッパー'=>'ppaa','ッペー'=>'ppee','ッピー'=>'ppii','ッポー'=>'ppoo','ップー'=>'ppuu', 1507 'ッケー'=>'kkee','ッキー'=>'kkii','ッコー'=>'kkoo','ックー'=>'kkuu','ッカー'=>'kkaa', 1508 'ッガー'=>'ggaa','ッゲー'=>'ggee','ッギー'=>'ggii','ッゴー'=>'ggoo','ッグー'=>'gguu', 1509 'ッマー'=>'maa','ッメー'=>'mee','ッミー'=>'mii','ッモー'=>'moo','ッムー'=>'muu', 1510 'ッナー'=>'nnaa','ッネー'=>'nnee','ッニー'=>'nnii','ッノー'=>'nnoo','ッヌー'=>'nnuu', 1511 'ッラー'=>'rraa','ッレー'=>'rree','ッリー'=>'rrii','ッロー'=>'rroo','ッルー'=>'rruu', 1512 'ッサー'=>'ssaa','ッセー'=>'ssee','ッシー'=>'sshii','ッソー'=>'ssoo','ッスー'=>'ssuu', 1513 'ッザー'=>'zzaa','ッゼー'=>'zzee','ッジー'=>'jjii','ッゾー'=>'zzoo','ッズー'=>'zzuu', 1514 'ッター'=>'ttaa','ッテー'=>'ttee','ッチー'=>'chii','ットー'=>'ttoo','ッツー'=>'ttsuu', 1515 'ッダー'=>'ddaa','ッデー'=>'ddee','ッヂー'=>'ddii','ッドー'=>'ddoo','ッヅー'=>'dduu', 1516 1517 // 2 character syllables - normal 1518 'ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フゥ'=>'fu', 1519 // 'フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo','フュ'=>'fyu', 1520 'フャ'=>'fa','フェ'=>'fe','フィ'=>'fi','フョ'=>'fo','フュ'=>'fu', 1521 'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu', 1522 'ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo','ビュ'=>'byu', 1523 'ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu', 1524 'キャ'=>'kya','キェ'=>'kye','キィ'=>'kyi','キョ'=>'kyo','キュ'=>'kyu', 1525 'ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu', 1526 'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu', 1527 'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu', 1528 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu', 1529 'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu', 1530 'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju', 1531 'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu', 1532 'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du', 1533 'チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu', 1534 // 'ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi','ヂョ'=>'dyo','ヂュ'=>'dyu', 1535 'ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu', 1536 'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu', 1537 'ドァ'=>'dwa','ドェ'=>'dwe','ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu', 1538 'ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi','ウォ'=>'who','ウゥ'=>'whu', 1539 'ヴャ'=>'vya','ヴェ'=>'vye','ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu', 1540 'ヴァ'=>'va','ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu', 1541 'ウェ'=>'we','ウィ'=>'wi', 1542 'イェ'=>'ye', 1543 'ティ'=>'ti', 1544 'ヂィ'=>'di', 1545 1546 // 2 character syllables - doubled vocal 1547 'アー'=>'aa','エー'=>'ee','イー'=>'ii','オー'=>'oo','ウー'=>'uu', 1548 'ダー'=>'daa','デー'=>'dee','ヂー'=>'dii','ドー'=>'doo','ヅー'=>'duu', 1549 'ハー'=>'haa','ヘー'=>'hee','ヒー'=>'hii','ホー'=>'hoo','フー'=>'fuu', 1550 'バー'=>'baa','ベー'=>'bee','ビー'=>'bii','ボー'=>'boo','ブー'=>'buu', 1551 'パー'=>'paa','ペー'=>'pee','ピー'=>'pii','ポー'=>'poo','プー'=>'puu', 1552 'ケー'=>'kee','キー'=>'kii','コー'=>'koo','クー'=>'kuu','カー'=>'kaa', 1553 'ガー'=>'gaa','ゲー'=>'gee','ギー'=>'gii','ゴー'=>'goo','グー'=>'guu', 1554 'マー'=>'maa','メー'=>'mee','ミー'=>'mii','モー'=>'moo','ムー'=>'muu', 1555 'ナー'=>'naa','ネー'=>'nee','ニー'=>'nii','ノー'=>'noo','ヌー'=>'nuu', 1556 'ラー'=>'raa','レー'=>'ree','リー'=>'rii','ロー'=>'roo','ルー'=>'ruu', 1557 'サー'=>'saa','セー'=>'see','シー'=>'shii','ソー'=>'soo','スー'=>'suu', 1558 'ザー'=>'zaa','ゼー'=>'zee','ジー'=>'jii','ゾー'=>'zoo','ズー'=>'zuu', 1559 'ター'=>'taa','テー'=>'tee','チー'=>'chii','トー'=>'too','ツー'=>'tsuu', 1560 'ワー'=>'waa','ヲー'=>'woo', 1561 'ヤー'=>'yaa','ヨー'=>'yoo','ユー'=>'yuu', 1562 'ヵー'=>'kaa','ヶー'=>'kee', 1563 // old characters 1564 'ヱー'=>'wee','ヰー'=>'wii', 1565 1566 // seperate katakana 'n' 1567 'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u', 1568 'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンユ'=>'n_yu', 1569 1570 // 2 character syllables - doubled consonants 1571 'ッバ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu', 1572 'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッポ'=>'ppo','ップ'=>'ppu', 1573 'ッケ'=>'kke','ッキ'=>'kki','ッコ'=>'kko','ック'=>'kku','ッカ'=>'kka', 1574 'ッガ'=>'gga','ッゲ'=>'gge','ッギ'=>'ggi','ッゴ'=>'ggo','ッグ'=>'ggu', 1575 'ッマ'=>'ma','ッメ'=>'me','ッミ'=>'mi','ッモ'=>'mo','ッム'=>'mu', 1576 'ッナ'=>'nna','ッネ'=>'nne','ッニ'=>'nni','ッノ'=>'nno','ッヌ'=>'nnu', 1577 'ッラ'=>'rra','ッレ'=>'rre','ッリ'=>'rri','ッロ'=>'rro','ッル'=>'rru', 1578 'ッサ'=>'ssa','ッセ'=>'sse','ッシ'=>'sshi','ッソ'=>'sso','ッス'=>'ssu', 1579 'ッザ'=>'zza','ッゼ'=>'zze','ッジ'=>'jji','ッゾ'=>'zzo','ッズ'=>'zzu', 1580 'ッタ'=>'tta','ッテ'=>'tte','ッチ'=>'cchi','ット'=>'tto','ッツ'=>'ttsu', 1581 'ッダ'=>'dda','ッデ'=>'dde','ッヂ'=>'ddi','ッド'=>'ddo','ッヅ'=>'ddu', 1582 1583 // 1 character syllables 1584 'ア'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','ン'=>'n', 1585 'ハ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho','フ'=>'fu', 1586 'バ'=>'ba','ベ'=>'be','ビ'=>'bi','ボ'=>'bo','ブ'=>'bu', 1587 'パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ポ'=>'po','プ'=>'pu', 1588 'ケ'=>'ke','キ'=>'ki','コ'=>'ko','ク'=>'ku','カ'=>'ka', 1589 'ガ'=>'ga','ゲ'=>'ge','ギ'=>'gi','ゴ'=>'go','グ'=>'gu', 1590 'マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu', 1591 'ナ'=>'na','ネ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu', 1592 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ロ'=>'ro','ル'=>'ru', 1593 'サ'=>'sa','セ'=>'se','シ'=>'shi','ソ'=>'so','ス'=>'su', 1594 'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu', 1595 'タ'=>'ta','テ'=>'te','チ'=>'chi','ト'=>'to','ツ'=>'tsu', 1596 'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du', 1597 'ワ'=>'wa','ヲ'=>'wo', 1598 'ヤ'=>'ya','ヨ'=>'yo','ユ'=>'yu', 1599 'ヵ'=>'ka','ヶ'=>'ke', 1600 // old characters 1601 'ヱ'=>'we','ヰ'=>'wi', 1602 1603 // convert what's left (probably only kicks in when something's missing above) 1604 'ァ'=>'a','ェ'=>'e','ィ'=>'i','ォ'=>'o','ゥ'=>'u', 1605 'ャ'=>'ya','ョ'=>'yo','ュ'=>'yu', 1606 1607 // special characters 1608 '・'=>'_','、'=>'_', 1609 'ー'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise 1610 1611 // 'ラ'=>'la','レ'=>'le','リ'=>'li','ロ'=>'lo','ル'=>'lu', 1612 // 'チャ'=>'cya','チェ'=>'cye','チィ'=>'cyi','チョ'=>'cyo','チュ'=>'cyu', 1613 //'デャ'=>'dha','デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu', 1614 // 'リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi','リョ'=>'lyo','リュ'=>'lyu', 1615 // 'テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho','テュ'=>'thu', 1616 //'ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi','フォ'=>'fwo','フゥ'=>'fwu', 1617 //'チャ'=>'tya','チェ'=>'tye','チィ'=>'tyi','チョ'=>'tyo','チュ'=>'tyu', 1618 // 'ジャ'=>'jya','ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu', 1619 // 'ジャ'=>'zha','ジェ'=>'zhe','ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu', 1620 //'ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi','ジョ'=>'zyo','ジュ'=>'zyu', 1621 //'シャ'=>'sya','シェ'=>'sye','シィ'=>'syi','ショ'=>'syo','シュ'=>'syu', 1622 //'シ'=>'ci','フ'=>'hu',シ'=>'si','チ'=>'ti','ツ'=>'tu','イ'=>'yi','ヂ'=>'dzi', 1623 1624 // "Greeklish" 1625 'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Π'=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps', 1626 'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','π'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps', 1627 1628 // Thai 1629 'ก'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch', 1630 'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','ญ'=>'y','ฎ'=>'d','ฏ'=>'t','ฐ'=>'th', 1631 'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th', 1632 'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','ฝ'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph', 1633 'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue', 1634 'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h', 1635 'ะ'=>'a','ั'=>'a','รร'=>'a','า'=>'a','ๅ'=>'a','ำ'=>'am','ํา'=>'am', 1636 'ิ'=>'i','ี'=>'i','ึ'=>'ue','ี'=>'ue','ุ'=>'u','ู'=>'u', 1637 'เ'=>'e','แ'=>'ae','โ'=>'o','อ'=>'o', 1638 'ียะ'=>'ia','ีย'=>'ia','ือะ'=>'uea','ือ'=>'uea','ัวะ'=>'ua','ัว'=>'ua', 1639 'ใ'=>'ai','ไ'=>'ai','ัย'=>'ai','าย'=>'ai','าว'=>'ao', 1640 'ุย'=>'ui','อย'=>'oi','ือย'=>'ueai','วย'=>'uai', 1641 'ิว'=>'io','็ว'=>'eo','ียว'=>'iao', 1642 '่'=>'','้'=>'','๊'=>'','๋'=>'','็'=>'', 1643 '์'=>'','๎'=>'','ํ'=>'','ฺ'=>'', 1644 'ๆ'=>'2','๏'=>'o','ฯ'=>'-','๚'=>'-','๛'=>'-', 1645 '๐'=>'0','๑'=>'1','๒'=>'2','๓'=>'3','๔'=>'4', 1646 '๕'=>'5','๖'=>'6','๗'=>'7','๘'=>'8','๙'=>'9', 1647 1648 // Korean 1649 'ㄱ'=>'k','ㅋ'=>'kh','ㄲ'=>'kk','ㄷ'=>'t','ㅌ'=>'th','ㄸ'=>'tt','ㅂ'=>'p', 1650 'ㅍ'=>'ph','ㅃ'=>'pp','ㅈ'=>'c','ㅊ'=>'ch','ㅉ'=>'cc','ㅅ'=>'s','ㅆ'=>'ss', 1651 'ㅎ'=>'h','ㅇ'=>'ng','ㄴ'=>'n','ㄹ'=>'l','ㅁ'=>'m', 'ㅏ'=>'a','ㅓ'=>'e','ㅗ'=>'o', 1652 'ㅜ'=>'wu','ㅡ'=>'u','ㅣ'=>'i','ㅐ'=>'ay','ㅔ'=>'ey','ㅚ'=>'oy','ㅘ'=>'wa','ㅝ'=>'we', 1653 'ㅟ'=>'wi','ㅙ'=>'way','ㅞ'=>'wey','ㅢ'=>'uy','ㅑ'=>'ya','ㅕ'=>'ye','ㅛ'=>'oy', 1654 'ㅠ'=>'yu','ㅒ'=>'yay','ㅖ'=>'yey', 1655); 1656 1657 1658