1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license LGPL (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * check for mb_string support 11 */ 12if(!defined('UTF8_MBSTRING')){ 13 if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){ 14 define('UTF8_MBSTRING',1); 15 }else{ 16 define('UTF8_MBSTRING',0); 17 } 18} 19 20if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } 21 22if(!function_exists('utf8_encodeFN')){ 23 /** 24 * URL-Encode a filename to allow unicodecharacters 25 * 26 * Slashes are not encoded 27 * 28 * When the second parameter is true the string will 29 * be encoded only if non ASCII characters are detected - 30 * This makes it safe to run it multiple times on the 31 * same string (default is true) 32 * 33 * @author Andreas Gohr <andi@splitbrain.org> 34 * @see urlencode 35 */ 36 function utf8_encodeFN($file,$safe=true){ 37 if($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#',$file)){ 38 return $file; 39 } 40 $file = urlencode($file); 41 $file = str_replace('%2F','/',$file); 42 return $file; 43 } 44} 45 46if(!function_exists('utf8_decodeFN')){ 47 /** 48 * URL-Decode a filename 49 * 50 * This is just a wrapper around urldecode 51 * 52 * @author Andreas Gohr <andi@splitbrain.org> 53 * @see urldecode 54 */ 55 function utf8_decodeFN($file){ 56 $file = urldecode($file); 57 return $file; 58 } 59} 60 61if(!function_exists('utf8_isASCII')){ 62 /** 63 * Checks if a string contains 7bit ASCII only 64 * 65 * @author Andreas Gohr <andi@splitbrain.org> 66 */ 67 function utf8_isASCII($str){ 68 for($i=0; $i<strlen($str); $i++){ 69 if(ord($str{$i}) >127) return false; 70 } 71 return true; 72 } 73} 74 75if(!function_exists('utf8_strip')){ 76 /** 77 * Strips all highbyte chars 78 * 79 * Returns a pure ASCII7 string 80 * 81 * @author Andreas Gohr <andi@splitbrain.org> 82 */ 83 function utf8_strip($str){ 84 $ascii = ''; 85 for($i=0; $i<strlen($str); $i++){ 86 if(ord($str{$i}) <128){ 87 $ascii .= $str{$i}; 88 } 89 } 90 return $ascii; 91 } 92} 93 94if(!function_exists('utf8_check')){ 95 /** 96 * Tries to detect if a string is in Unicode encoding 97 * 98 * @author <bmorel@ssi.fr> 99 * @link http://www.php.net/manual/en/function.utf8-encode.php 100 */ 101 function utf8_check($Str) { 102 for ($i=0; $i<strlen($Str); $i++) { 103 $b = ord($Str[$i]); 104 if ($b < 0x80) continue; # 0bbbbbbb 105 elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb 106 elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb 107 elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb 108 elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb 109 elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b 110 else return false; # Does not match any model 111 112 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 113 if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80)) 114 return false; 115 } 116 } 117 return true; 118 } 119} 120 121if(!function_exists('utf8_strlen')){ 122 /** 123 * Unicode aware replacement for strlen() 124 * 125 * utf8_decode() converts characters that are not in ISO-8859-1 126 * to '?', which, for the purpose of counting, is alright - It's 127 * even faster than mb_strlen. 128 * 129 * @author <chernyshevsky at hotmail dot com> 130 * @see strlen() 131 * @see utf8_decode() 132 */ 133 function utf8_strlen($string){ 134 return strlen(utf8_decode($string)); 135 } 136} 137 138if(!function_exists('utf8_substr')){ 139 /** 140 * UTF-8 aware alternative to substr 141 * 142 * Return part of a string given character offset (and optionally length) 143 * 144 * @author Harry Fuecks <hfuecks@gmail.com> 145 * @author Chris Smith <chris@jalakai.co.uk> 146 * @param string 147 * @param integer number of UTF-8 characters offset (from left) 148 * @param integer (optional) length in UTF-8 characters from offset 149 * @return mixed string or false if failure 150 */ 151 function utf8_substr($str, $offset, $length = null) { 152 if(UTF8_MBSTRING){ 153 if( $length === null ){ 154 return mb_substr($str, $offset); 155 }else{ 156 return mb_substr($str, $offset, $length); 157 } 158 } 159 160 /* 161 * Notes: 162 * 163 * no mb string support, so we'll use pcre regex's with 'u' flag 164 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 165 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 166 * 167 * substr documentation states false can be returned in some cases (e.g. offset > string length) 168 * mb_substr never returns false, it will return an empty string instead. 169 * 170 * calculating the number of characters in the string is a relatively expensive operation, so 171 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 172 */ 173 174 // cast parameters to appropriate types to avoid multiple notices/warnings 175 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 176 $offset = (int)$offset; 177 if (!is_null($length)) $length = (int)$length; 178 179 // handle trivial cases 180 if ($length === 0) return ''; 181 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 182 183 $offset_pattern = ''; 184 $length_pattern = ''; 185 186 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 187 if ($offset < 0) { 188 $strlen = strlen(utf8_decode($str)); // see notes 189 $offset = $strlen + $offset; 190 if ($offset < 0) $offset = 0; 191 } 192 193 // establish a pattern for offset, a non-captured group equal in length to offset 194 if ($offset > 0) { 195 $Ox = (int)($offset/65535); 196 $Oy = $offset%65535; 197 198 if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}'; 199 $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})'; 200 } else { 201 $offset_pattern = '^'; // offset == 0; just anchor the pattern 202 } 203 204 // establish a pattern for length 205 if (is_null($length)) { 206 $length_pattern = '(.*)$'; // the rest of the string 207 } else { 208 209 if (!isset($strlen)) $strlen = strlen(utf8_decode($str)); // see notes 210 if ($offset > $strlen) return ''; // another trivial case 211 212 if ($length > 0) { 213 214 $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string 215 216 $Lx = (int)($length/65535); 217 $Ly = $length%65535; 218 219 // +ve length requires ... a captured group of length characters 220 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 221 $length_pattern = '('.$length_pattern.'.{'.$Ly.'})'; 222 223 } else if ($length < 0) { 224 225 if ($length < ($offset - $strlen)) return ''; 226 227 $Lx = (int)((-$length)/65535); 228 $Ly = (-$length)%65535; 229 230 // -ve length requires ... capture everything except a group of -length characters 231 // anchored at the tail-end of the string 232 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 233 $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$'; 234 } 235 } 236 237 if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return ''; 238 return $match[1]; 239 } 240} 241 242if(!function_exists('utf8_substr_replace')){ 243 /** 244 * Unicode aware replacement for substr_replace() 245 * 246 * @author Andreas Gohr <andi@splitbrain.org> 247 * @see substr_replace() 248 */ 249 function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 250 $ret = ''; 251 if($start>0) $ret .= utf8_substr($string, 0, $start); 252 $ret .= $replacement; 253 $ret .= utf8_substr($string, $start+$length); 254 return $ret; 255 } 256} 257 258if(!function_exists('utf8_ltrim')){ 259 /** 260 * Unicode aware replacement for ltrim() 261 * 262 * @author Andreas Gohr <andi@splitbrain.org> 263 * @see ltrim() 264 * @return string 265 */ 266 function utf8_ltrim($str,$charlist=''){ 267 if($charlist == '') return ltrim($str); 268 269 //quote charlist for use in a characterclass 270 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 271 272 return preg_replace('/^['.$charlist.']+/u','',$str); 273 } 274} 275 276if(!function_exists('utf8_rtrim')){ 277 /** 278 * Unicode aware replacement for rtrim() 279 * 280 * @author Andreas Gohr <andi@splitbrain.org> 281 * @see rtrim() 282 * @return string 283 */ 284 function utf8_rtrim($str,$charlist=''){ 285 if($charlist == '') return rtrim($str); 286 287 //quote charlist for use in a characterclass 288 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 289 290 return preg_replace('/['.$charlist.']+$/u','',$str); 291 } 292} 293 294if(!function_exists('utf8_trim')){ 295 /** 296 * Unicode aware replacement for trim() 297 * 298 * @author Andreas Gohr <andi@splitbrain.org> 299 * @see trim() 300 * @return string 301 */ 302 function utf8_trim($str,$charlist='') { 303 if($charlist == '') return trim($str); 304 305 return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist); 306 } 307} 308 309if(!function_exists('utf8_strtolower')){ 310 /** 311 * This is a unicode aware replacement for strtolower() 312 * 313 * Uses mb_string extension if available 314 * 315 * @author Leo Feyer <leo@typolight.org> 316 * @see strtolower() 317 * @see utf8_strtoupper() 318 */ 319 function utf8_strtolower($string){ 320 if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8'); 321 322 global $UTF8_UPPER_TO_LOWER; 323 return strtr($string,$UTF8_UPPER_TO_LOWER); 324 } 325} 326 327if(!function_exists('utf8_strtoupper')){ 328 /** 329 * This is a unicode aware replacement for strtoupper() 330 * 331 * Uses mb_string extension if available 332 * 333 * @author Leo Feyer <leo@typolight.org> 334 * @see strtoupper() 335 * @see utf8_strtoupper() 336 */ 337 function utf8_strtoupper($string){ 338 if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); 339 340 global $UTF8_LOWER_TO_UPPER; 341 return strtr($string,$UTF8_LOWER_TO_UPPER); 342 } 343} 344 345if(!function_exists('utf8_ucfirst')){ 346 /** 347 * UTF-8 aware alternative to ucfirst 348 * Make a string's first character uppercase 349 * 350 * @author Harry Fuecks 351 * @param string 352 * @return string with first character as upper case (if applicable) 353 */ 354 function utf8_ucfirst($str){ 355 switch ( utf8_strlen($str) ) { 356 case 0: 357 return ''; 358 case 1: 359 return utf8_strtoupper($str); 360 default: 361 preg_match('/^(.{1})(.*)$/us', $str, $matches); 362 return utf8_strtoupper($matches[1]).$matches[2]; 363 } 364 } 365} 366 367if(!function_exists('utf8_ucwords')){ 368 /** 369 * UTF-8 aware alternative to ucwords 370 * Uppercase the first character of each word in a string 371 * 372 * @author Harry Fuecks 373 * @param string 374 * @return string with first char of each word uppercase 375 * @see http://www.php.net/ucwords 376 */ 377 function utf8_ucwords($str) { 378 // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; 379 // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns 380 // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords 381 $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; 382 383 return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str); 384 } 385 386 /** 387 * Callback function for preg_replace_callback call in utf8_ucwords 388 * You don't need to call this yourself 389 * 390 * @author Harry Fuecks 391 * @param array of matches corresponding to a single word 392 * @return string with first char of the word in uppercase 393 * @see utf8_ucwords 394 * @see utf8_strtoupper 395 */ 396 function utf8_ucwords_callback($matches) { 397 $leadingws = $matches[2]; 398 $ucfirst = utf8_strtoupper($matches[3]); 399 $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1); 400 return $leadingws . $ucword; 401 } 402} 403 404if(!function_exists('utf8_deaccent')){ 405 /** 406 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 407 * 408 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 409 * letters. Default is to deaccent both cases ($case = 0) 410 * 411 * @author Andreas Gohr <andi@splitbrain.org> 412 */ 413 function utf8_deaccent($string,$case=0){ 414 if($case <= 0){ 415 global $UTF8_LOWER_ACCENTS; 416 $string = strtr($string,$UTF8_LOWER_ACCENTS); 417 } 418 if($case >= 0){ 419 global $UTF8_UPPER_ACCENTS; 420 $string = strtr($string,$UTF8_UPPER_ACCENTS); 421 } 422 return $string; 423 } 424} 425 426if(!function_exists('utf8_romanize')){ 427 /** 428 * Romanize a non-latin string 429 * 430 * @author Andreas Gohr <andi@splitbrain.org> 431 */ 432 function utf8_romanize($string){ 433 if(utf8_isASCII($string)) return $string; //nothing to do 434 435 global $UTF8_ROMANIZATION; 436 return strtr($string,$UTF8_ROMANIZATION); 437 } 438} 439 440if(!function_exists('utf8_stripspecials')){ 441 /** 442 * Removes special characters (nonalphanumeric) from a UTF-8 string 443 * 444 * This function adds the controlchars 0x00 to 0x19 to the array of 445 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 446 * 447 * @author Andreas Gohr <andi@splitbrain.org> 448 * @param string $string The UTF8 string to strip of special chars 449 * @param string $repl Replace special with this string 450 * @param string $additional Additional chars to strip (used in regexp char class) 451 */ 452 function utf8_stripspecials($string,$repl='',$additional=''){ 453 global $UTF8_SPECIAL_CHARS; 454 global $UTF8_SPECIAL_CHARS2; 455 456 static $specials = null; 457 if(is_null($specials)){ 458 #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 459 $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/'); 460 } 461 462 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 463 } 464} 465 466if(!function_exists('utf8_strpos')){ 467 /** 468 * This is an Unicode aware replacement for strpos 469 * 470 * @author Leo Feyer <leo@typolight.org> 471 * @see strpos() 472 * @param string 473 * @param string 474 * @param integer 475 * @return integer 476 */ 477 function utf8_strpos($haystack, $needle, $offset=0){ 478 $comp = 0; 479 $length = null; 480 481 while (is_null($length) || $length < $offset) { 482 $pos = strpos($haystack, $needle, $offset + $comp); 483 484 if ($pos === false) 485 return false; 486 487 $length = utf8_strlen(substr($haystack, 0, $pos)); 488 489 if ($length < $offset) 490 $comp = $pos - $length; 491 } 492 493 return $length; 494 } 495} 496 497if(!function_exists('utf8_tohtml')){ 498 /** 499 * Encodes UTF-8 characters to HTML entities 500 * 501 * @author Tom N Harris <tnharris@whoopdedo.org> 502 * @author <vpribish at shopping dot com> 503 * @link http://www.php.net/manual/en/function.utf8-decode.php 504 */ 505 function utf8_tohtml ($str) { 506 $ret = ''; 507 foreach (utf8_to_unicode($str) as $cp) { 508 if ($cp < 0x80) 509 $ret .= chr($cp); 510 elseif ($cp < 0x100) 511 $ret .= "&#$cp;"; 512 else 513 $ret .= '&#x'.dechex($cp).';'; 514 } 515 return $ret; 516 } 517} 518 519if(!function_exists('utf8_unhtml')){ 520 /** 521 * Decodes HTML entities to UTF-8 characters 522 * 523 * Convert any &#..; entity to a codepoint, 524 * The entities flag defaults to only decoding numeric entities. 525 * Pass HTML_ENTITIES and named entities, including & < etc. 526 * are handled as well. Avoids the problem that would occur if you 527 * had to decode "&#38;&amp;#38;" 528 * 529 * unhtmlspecialchars(utf8_unhtml($s)) -> "&&" 530 * utf8_unhtml(unhtmlspecialchars($s)) -> "&&#38;" 531 * what it should be -> "&&#38;" 532 * 533 * @author Tom N Harris <tnharris@whoopdedo.org> 534 * @param string $str UTF-8 encoded string 535 * @param boolean $entities Flag controlling decoding of named entities. 536 * @return UTF-8 encoded string with numeric (and named) entities replaced. 537 */ 538 function utf8_unhtml($str, $entities=null) { 539 static $decoder = null; 540 if (is_null($decoder)) 541 $decoder = new utf8_entity_decoder(); 542 if (is_null($entities)) 543 return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m', 544 'utf8_decode_numeric', $str); 545 else 546 return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m', 547 array(&$decoder, 'decode'), $str); 548 } 549} 550 551if(!function_exists('utf8_decode_numeric')){ 552 function utf8_decode_numeric($ent) { 553 switch ($ent[2]) { 554 case 'X': 555 case 'x': 556 $cp = hexdec($ent[3]); 557 break; 558 default: 559 $cp = intval($ent[3]); 560 break; 561 } 562 return unicode_to_utf8(array($cp)); 563 } 564} 565 566if(!class_exists('utf8_entity_decoder')){ 567 class utf8_entity_decoder { 568 var $table; 569 function utf8_entity_decoder() { 570 $table = get_html_translation_table(HTML_ENTITIES); 571 $table = array_flip($table); 572 $this->table = array_map(array(&$this,'makeutf8'), $table); 573 } 574 function makeutf8($c) { 575 return unicode_to_utf8(array(ord($c))); 576 } 577 function decode($ent) { 578 if ($ent[1] == '#') { 579 return utf8_decode_numeric($ent); 580 } elseif (array_key_exists($ent[0],$this->table)) { 581 return $this->table[$ent[0]]; 582 } else { 583 return $ent[0]; 584 } 585 } 586 } 587} 588 589if(!function_exists('utf8_to_unicode')){ 590 /** 591 * Takes an UTF-8 string and returns an array of ints representing the 592 * Unicode characters. Astral planes are supported ie. the ints in the 593 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 594 * are not allowed. 595 * 596 * If $strict is set to true the function returns false if the input 597 * string isn't a valid UTF-8 octet sequence and raises a PHP error at 598 * level E_USER_WARNING 599 * 600 * Note: this function has been modified slightly in this library to 601 * trigger errors on encountering bad bytes 602 * 603 * @author <hsivonen@iki.fi> 604 * @author Harry Fuecks <hfuecks@gmail.com> 605 * @param string UTF-8 encoded string 606 * @param boolean Check for invalid sequences? 607 * @return mixed array of unicode code points or false if UTF-8 invalid 608 * @see unicode_to_utf8 609 * @link http://hsivonen.iki.fi/php-utf8/ 610 * @link http://sourceforge.net/projects/phputf8/ 611 */ 612 function utf8_to_unicode($str,$strict=false) { 613 $mState = 0; // cached expected number of octets after the current octet 614 // until the beginning of the next UTF8 character sequence 615 $mUcs4 = 0; // cached Unicode character 616 $mBytes = 1; // cached expected number of octets in the current sequence 617 618 $out = array(); 619 620 $len = strlen($str); 621 622 for($i = 0; $i < $len; $i++) { 623 624 $in = ord($str{$i}); 625 626 if ( $mState == 0) { 627 628 // When mState is zero we expect either a US-ASCII character or a 629 // multi-octet sequence. 630 if (0 == (0x80 & ($in))) { 631 // US-ASCII, pass straight through. 632 $out[] = $in; 633 $mBytes = 1; 634 635 } else if (0xC0 == (0xE0 & ($in))) { 636 // First octet of 2 octet sequence 637 $mUcs4 = ($in); 638 $mUcs4 = ($mUcs4 & 0x1F) << 6; 639 $mState = 1; 640 $mBytes = 2; 641 642 } else if (0xE0 == (0xF0 & ($in))) { 643 // First octet of 3 octet sequence 644 $mUcs4 = ($in); 645 $mUcs4 = ($mUcs4 & 0x0F) << 12; 646 $mState = 2; 647 $mBytes = 3; 648 649 } else if (0xF0 == (0xF8 & ($in))) { 650 // First octet of 4 octet sequence 651 $mUcs4 = ($in); 652 $mUcs4 = ($mUcs4 & 0x07) << 18; 653 $mState = 3; 654 $mBytes = 4; 655 656 } else if (0xF8 == (0xFC & ($in))) { 657 /* First octet of 5 octet sequence. 658 * 659 * This is illegal because the encoded codepoint must be either 660 * (a) not the shortest form or 661 * (b) outside the Unicode range of 0-0x10FFFF. 662 * Rather than trying to resynchronize, we will carry on until the end 663 * of the sequence and let the later error handling code catch it. 664 */ 665 $mUcs4 = ($in); 666 $mUcs4 = ($mUcs4 & 0x03) << 24; 667 $mState = 4; 668 $mBytes = 5; 669 670 } else if (0xFC == (0xFE & ($in))) { 671 // First octet of 6 octet sequence, see comments for 5 octet sequence. 672 $mUcs4 = ($in); 673 $mUcs4 = ($mUcs4 & 1) << 30; 674 $mState = 5; 675 $mBytes = 6; 676 677 } elseif($strict) { 678 /* Current octet is neither in the US-ASCII range nor a legal first 679 * octet of a multi-octet sequence. 680 */ 681 trigger_error( 682 'utf8_to_unicode: Illegal sequence identifier '. 683 'in UTF-8 at byte '.$i, 684 E_USER_WARNING 685 ); 686 return false; 687 688 } 689 690 } else { 691 692 // When mState is non-zero, we expect a continuation of the multi-octet 693 // sequence 694 if (0x80 == (0xC0 & ($in))) { 695 696 // Legal continuation. 697 $shift = ($mState - 1) * 6; 698 $tmp = $in; 699 $tmp = ($tmp & 0x0000003F) << $shift; 700 $mUcs4 |= $tmp; 701 702 /** 703 * End of the multi-octet sequence. mUcs4 now contains the final 704 * Unicode codepoint to be output 705 */ 706 if (0 == --$mState) { 707 708 /* 709 * Check for illegal sequences and codepoints. 710 */ 711 // From Unicode 3.1, non-shortest form is illegal 712 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 713 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 714 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 715 (4 < $mBytes) || 716 // From Unicode 3.2, surrogate characters are illegal 717 (($mUcs4 & 0xFFFFF800) == 0xD800) || 718 // Codepoints outside the Unicode range are illegal 719 ($mUcs4 > 0x10FFFF)) { 720 721 if($strict){ 722 trigger_error( 723 'utf8_to_unicode: Illegal sequence or codepoint '. 724 'in UTF-8 at byte '.$i, 725 E_USER_WARNING 726 ); 727 728 return false; 729 } 730 731 } 732 733 if (0xFEFF != $mUcs4) { 734 // BOM is legal but we don't want to output it 735 $out[] = $mUcs4; 736 } 737 738 //initialize UTF8 cache 739 $mState = 0; 740 $mUcs4 = 0; 741 $mBytes = 1; 742 } 743 744 } elseif($strict) { 745 /** 746 *((0xC0 & (*in) != 0x80) && (mState != 0)) 747 * Incomplete multi-octet sequence. 748 */ 749 trigger_error( 750 'utf8_to_unicode: Incomplete multi-octet '. 751 ' sequence in UTF-8 at byte '.$i, 752 E_USER_WARNING 753 ); 754 755 return false; 756 } 757 } 758 } 759 return $out; 760 } 761} 762 763if(!function_exists('unicode_to_utf8')){ 764 /** 765 * Takes an array of ints representing the Unicode characters and returns 766 * a UTF-8 string. Astral planes are supported ie. the ints in the 767 * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 768 * are not allowed. 769 * 770 * If $strict is set to true the function returns false if the input 771 * array contains ints that represent surrogates or are outside the 772 * Unicode range and raises a PHP error at level E_USER_WARNING 773 * 774 * Note: this function has been modified slightly in this library to use 775 * output buffering to concatenate the UTF-8 string (faster) as well as 776 * reference the array by it's keys 777 * 778 * @param array of unicode code points representing a string 779 * @param boolean Check for invalid sequences? 780 * @return mixed UTF-8 string or false if array contains invalid code points 781 * @author <hsivonen@iki.fi> 782 * @author Harry Fuecks <hfuecks@gmail.com> 783 * @see utf8_to_unicode 784 * @link http://hsivonen.iki.fi/php-utf8/ 785 * @link http://sourceforge.net/projects/phputf8/ 786 */ 787 function unicode_to_utf8($arr,$strict=false) { 788 if (!is_array($arr)) return ''; 789 ob_start(); 790 791 foreach (array_keys($arr) as $k) { 792 793 # ASCII range (including control chars) 794 if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { 795 796 echo chr($arr[$k]); 797 798 # 2 byte sequence 799 } else if ($arr[$k] <= 0x07ff) { 800 801 echo chr(0xc0 | ($arr[$k] >> 6)); 802 echo chr(0x80 | ($arr[$k] & 0x003f)); 803 804 # Byte order mark (skip) 805 } else if($arr[$k] == 0xFEFF) { 806 807 // nop -- zap the BOM 808 809 # Test for illegal surrogates 810 } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { 811 812 // found a surrogate 813 if($strict){ 814 trigger_error( 815 'unicode_to_utf8: Illegal surrogate '. 816 'at index: '.$k.', value: '.$arr[$k], 817 E_USER_WARNING 818 ); 819 return false; 820 } 821 822 # 3 byte sequence 823 } else if ($arr[$k] <= 0xffff) { 824 825 echo chr(0xe0 | ($arr[$k] >> 12)); 826 echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); 827 echo chr(0x80 | ($arr[$k] & 0x003f)); 828 829 # 4 byte sequence 830 } else if ($arr[$k] <= 0x10ffff) { 831 832 echo chr(0xf0 | ($arr[$k] >> 18)); 833 echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); 834 echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); 835 echo chr(0x80 | ($arr[$k] & 0x3f)); 836 837 } elseif($strict) { 838 839 trigger_error( 840 'unicode_to_utf8: Codepoint out of Unicode range '. 841 'at index: '.$k.', value: '.$arr[$k], 842 E_USER_WARNING 843 ); 844 845 // out of range 846 return false; 847 } 848 } 849 850 $result = ob_get_contents(); 851 ob_end_clean(); 852 return $result; 853 } 854} 855 856if(!function_exists('utf8_to_utf16be')){ 857 /** 858 * UTF-8 to UTF-16BE conversion. 859 * 860 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 861 */ 862 function utf8_to_utf16be(&$str, $bom = false) { 863 $out = $bom ? "\xFE\xFF" : ''; 864 if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 865 866 $uni = utf8_to_unicode($str); 867 foreach($uni as $cp){ 868 $out .= pack('n',$cp); 869 } 870 return $out; 871 } 872} 873 874if(!function_exists('utf16be_to_utf8')){ 875 /** 876 * UTF-8 to UTF-16BE conversion. 877 * 878 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 879 */ 880 function utf16be_to_utf8(&$str) { 881 $uni = unpack('n*',$str); 882 return unicode_to_utf8($uni); 883 } 884} 885 886if(!function_exists('utf8_bad_replace')){ 887 /** 888 * Replace bad bytes with an alternative character 889 * 890 * ASCII character is recommended for replacement char 891 * 892 * PCRE Pattern to locate bad bytes in a UTF-8 string 893 * Comes from W3 FAQ: Multilingual Forms 894 * Note: modified to include full ASCII range including control chars 895 * 896 * @author Harry Fuecks <hfuecks@gmail.com> 897 * @see http://www.w3.org/International/questions/qa-forms-utf-8 898 * @param string to search 899 * @param string to replace bad bytes with (defaults to '?') - use ASCII 900 * @return string 901 */ 902 function utf8_bad_replace($str, $replace = '') { 903 $UTF8_BAD = 904 '([\x00-\x7F]'. # ASCII (including control chars) 905 '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte 906 '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs 907 '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte 908 '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates 909 '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 910 '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 911 '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 912 '|(.{1}))'; # invalid byte 913 ob_start(); 914 while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { 915 if ( !isset($matches[2])) { 916 echo $matches[0]; 917 } else { 918 echo $replace; 919 } 920 $str = substr($str,strlen($matches[0])); 921 } 922 $result = ob_get_contents(); 923 ob_end_clean(); 924 return $result; 925 } 926} 927 928if(!function_exists('utf8_correctIdx')){ 929 /** 930 * adjust a byte index into a utf8 string to a utf8 character boundary 931 * 932 * @param $str string utf8 character string 933 * @param $i int byte index into $str 934 * @param $next bool direction to search for boundary, 935 * false = up (current character) 936 * true = down (next character) 937 * 938 * @return int byte index into $str now pointing to a utf8 character boundary 939 * 940 * @author chris smith <chris@jalakai.co.uk> 941 */ 942 function utf8_correctIdx(&$str,$i,$next=false) { 943 944 if ($i <= 0) return 0; 945 946 $limit = strlen($str); 947 if ($i>=$limit) return $limit; 948 949 if ($next) { 950 while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; 951 } else { 952 while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; 953 } 954 955 return $i; 956 } 957} 958 959// only needed if no mb_string available 960if(!UTF8_MBSTRING){ 961 /** 962 * UTF-8 Case lookup table 963 * 964 * This lookuptable defines the upper case letters to their correspponding 965 * lower case letter in UTF-8 966 * 967 * @author Andreas Gohr <andi@splitbrain.org> 968 */ 969 global $UTF8_LOWER_TO_UPPER; 970 if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array( 971 "z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T","s"=>"S","r"=>"R","q"=>"Q", 972 "p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J","i"=>"I","h"=>"H","g"=>"G", 973 "f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A","ῳ"=>"ῼ","ῥ"=>"Ῥ","ῡ"=>"Ῡ","ῑ"=>"Ῑ", 974 "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ", 975 "ᾣ"=>"ᾫ","ᾢ"=>"ᾪ","ᾡ"=>"ᾩ","ᾗ"=>"ᾟ","ᾖ"=>"ᾞ","ᾕ"=>"ᾝ","ᾔ"=>"ᾜ","ᾓ"=>"ᾛ","ᾒ"=>"ᾚ","ᾑ"=>"ᾙ", 976 "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ", 977 "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ", 978 "ὲ"=>"Ὲ","ά"=>"Ά","ὰ"=>"Ὰ","ὧ"=>"Ὧ","ὦ"=>"Ὦ","ὥ"=>"Ὥ","ὤ"=>"Ὤ","ὣ"=>"Ὣ","ὢ"=>"Ὢ","ὡ"=>"Ὡ", 979 "ὗ"=>"Ὗ","ὕ"=>"Ὕ","ὓ"=>"Ὓ","ὑ"=>"Ὑ","ὅ"=>"Ὅ","ὄ"=>"Ὄ","ὃ"=>"Ὃ","ὂ"=>"Ὂ","ὁ"=>"Ὁ","ὀ"=>"Ὀ", 980 "ἷ"=>"Ἷ","ἶ"=>"Ἶ","ἵ"=>"Ἵ","ἴ"=>"Ἴ","ἳ"=>"Ἳ","ἲ"=>"Ἲ","ἱ"=>"Ἱ","ἰ"=>"Ἰ","ἧ"=>"Ἧ","ἦ"=>"Ἦ", 981 "ἥ"=>"Ἥ","ἤ"=>"Ἤ","ἣ"=>"Ἣ","ἢ"=>"Ἢ","ἡ"=>"Ἡ","ἕ"=>"Ἕ","ἔ"=>"Ἔ","ἓ"=>"Ἓ","ἒ"=>"Ἒ","ἑ"=>"Ἑ", 982 "ἐ"=>"Ἐ","ἇ"=>"Ἇ","ἆ"=>"Ἆ","ἅ"=>"Ἅ","ἄ"=>"Ἄ","ἃ"=>"Ἃ","ἂ"=>"Ἂ","ἁ"=>"Ἁ","ἀ"=>"Ἀ","ỹ"=>"Ỹ", 983 "ỷ"=>"Ỷ","ỵ"=>"Ỵ","ỳ"=>"Ỳ","ự"=>"Ự","ữ"=>"Ữ","ử"=>"Ử","ừ"=>"Ừ","ứ"=>"Ứ","ủ"=>"Ủ","ụ"=>"Ụ", 984 "ợ"=>"Ợ","ỡ"=>"Ỡ","ở"=>"Ở","ờ"=>"Ờ","ớ"=>"Ớ","ộ"=>"Ộ","ỗ"=>"Ỗ","ổ"=>"Ổ","ồ"=>"Ồ","ố"=>"Ố", 985 "ỏ"=>"Ỏ","ọ"=>"Ọ","ị"=>"Ị","ỉ"=>"Ỉ","ệ"=>"Ệ","ễ"=>"Ễ","ể"=>"Ể","ề"=>"Ề","ế"=>"Ế","ẽ"=>"Ẽ", 986 "ẻ"=>"Ẻ","ẹ"=>"Ẹ","ặ"=>"Ặ","ẵ"=>"Ẵ","ẳ"=>"Ẳ","ằ"=>"Ằ","ắ"=>"Ắ","ậ"=>"Ậ","ẫ"=>"Ẫ","ẩ"=>"Ẩ", 987 "ầ"=>"Ầ","ấ"=>"Ấ","ả"=>"Ả","ạ"=>"Ạ","ẛ"=>"Ṡ","ẕ"=>"Ẕ","ẓ"=>"Ẓ","ẑ"=>"Ẑ","ẏ"=>"Ẏ","ẍ"=>"Ẍ", 988 "ẋ"=>"Ẋ","ẉ"=>"Ẉ","ẇ"=>"Ẇ","ẅ"=>"Ẅ","ẃ"=>"Ẃ","ẁ"=>"Ẁ","ṿ"=>"Ṿ","ṽ"=>"Ṽ","ṻ"=>"Ṻ","ṹ"=>"Ṹ", 989 "ṷ"=>"Ṷ","ṵ"=>"Ṵ","ṳ"=>"Ṳ","ṱ"=>"Ṱ","ṯ"=>"Ṯ","ṭ"=>"Ṭ","ṫ"=>"Ṫ","ṩ"=>"Ṩ","ṧ"=>"Ṧ","ṥ"=>"Ṥ", 990 "ṣ"=>"Ṣ","ṡ"=>"Ṡ","ṟ"=>"Ṟ","ṝ"=>"Ṝ","ṛ"=>"Ṛ","ṙ"=>"Ṙ","ṗ"=>"Ṗ","ṕ"=>"Ṕ","ṓ"=>"Ṓ","ṑ"=>"Ṑ", 991 "ṏ"=>"Ṏ","ṍ"=>"Ṍ","ṋ"=>"Ṋ","ṉ"=>"Ṉ","ṇ"=>"Ṇ","ṅ"=>"Ṅ","ṃ"=>"Ṃ","ṁ"=>"Ṁ","ḿ"=>"Ḿ","ḽ"=>"Ḽ", 992 "ḻ"=>"Ḻ","ḹ"=>"Ḹ","ḷ"=>"Ḷ","ḵ"=>"Ḵ","ḳ"=>"Ḳ","ḱ"=>"Ḱ","ḯ"=>"Ḯ","ḭ"=>"Ḭ","ḫ"=>"Ḫ","ḩ"=>"Ḩ", 993 "ḧ"=>"Ḧ","ḥ"=>"Ḥ","ḣ"=>"Ḣ","ḡ"=>"Ḡ","ḟ"=>"Ḟ","ḝ"=>"Ḝ","ḛ"=>"Ḛ","ḙ"=>"Ḙ","ḗ"=>"Ḗ","ḕ"=>"Ḕ", 994 "ḓ"=>"Ḓ","ḑ"=>"Ḑ","ḏ"=>"Ḏ","ḍ"=>"Ḍ","ḋ"=>"Ḋ","ḉ"=>"Ḉ","ḇ"=>"Ḇ","ḅ"=>"Ḅ","ḃ"=>"Ḃ","ḁ"=>"Ḁ", 995 "ֆ"=>"Ֆ","օ"=>"Օ","ք"=>"Ք","փ"=>"Փ","ւ"=>"Ւ","ց"=>"Ց","ր"=>"Ր","տ"=>"Տ","վ"=>"Վ","ս"=>"Ս", 996 "ռ"=>"Ռ","ջ"=>"Ջ","պ"=>"Պ","չ"=>"Չ","ո"=>"Ո","շ"=>"Շ","ն"=>"Ն","յ"=>"Յ","մ"=>"Մ","ճ"=>"Ճ", 997 "ղ"=>"Ղ","ձ"=>"Ձ","հ"=>"Հ","կ"=>"Կ","ծ"=>"Ծ","խ"=>"Խ","լ"=>"Լ","ի"=>"Ի","ժ"=>"Ժ","թ"=>"Թ", 998 "ը"=>"Ը","է"=>"Է","զ"=>"Զ","ե"=>"Ե","դ"=>"Դ","գ"=>"Գ","բ"=>"Բ","ա"=>"Ա","ԏ"=>"Ԏ","ԍ"=>"Ԍ", 999 "ԋ"=>"Ԋ","ԉ"=>"Ԉ","ԇ"=>"Ԇ","ԅ"=>"Ԅ","ԃ"=>"Ԃ","ԁ"=>"Ԁ","ӹ"=>"Ӹ","ӵ"=>"Ӵ","ӳ"=>"Ӳ","ӱ"=>"Ӱ", 1000 "ӯ"=>"Ӯ","ӭ"=>"Ӭ","ӫ"=>"Ӫ","ө"=>"Ө","ӧ"=>"Ӧ","ӥ"=>"Ӥ","ӣ"=>"Ӣ","ӡ"=>"Ӡ","ӟ"=>"Ӟ","ӝ"=>"Ӝ", 1001 "ӛ"=>"Ӛ","ә"=>"Ә","ӗ"=>"Ӗ","ӕ"=>"Ӕ","ӓ"=>"Ӓ","ӑ"=>"Ӑ","ӎ"=>"Ӎ","ӌ"=>"Ӌ","ӊ"=>"Ӊ","ӈ"=>"Ӈ", 1002 "ӆ"=>"Ӆ","ӄ"=>"Ӄ","ӂ"=>"Ӂ","ҿ"=>"Ҿ","ҽ"=>"Ҽ","һ"=>"Һ","ҹ"=>"Ҹ","ҷ"=>"Ҷ","ҵ"=>"Ҵ","ҳ"=>"Ҳ", 1003 "ұ"=>"Ұ","ү"=>"Ү","ҭ"=>"Ҭ","ҫ"=>"Ҫ","ҩ"=>"Ҩ","ҧ"=>"Ҧ","ҥ"=>"Ҥ","ң"=>"Ң","ҡ"=>"Ҡ","ҟ"=>"Ҟ", 1004 "ҝ"=>"Ҝ","қ"=>"Қ","ҙ"=>"Ҙ","җ"=>"Җ","ҕ"=>"Ҕ","ғ"=>"Ғ","ґ"=>"Ґ","ҏ"=>"Ҏ","ҍ"=>"Ҍ","ҋ"=>"Ҋ", 1005 "ҁ"=>"Ҁ","ѿ"=>"Ѿ","ѽ"=>"Ѽ","ѻ"=>"Ѻ","ѹ"=>"Ѹ","ѷ"=>"Ѷ","ѵ"=>"Ѵ","ѳ"=>"Ѳ","ѱ"=>"Ѱ","ѯ"=>"Ѯ", 1006 "ѭ"=>"Ѭ","ѫ"=>"Ѫ","ѩ"=>"Ѩ","ѧ"=>"Ѧ","ѥ"=>"Ѥ","ѣ"=>"Ѣ","ѡ"=>"Ѡ","џ"=>"Џ","ў"=>"Ў","ѝ"=>"Ѝ", 1007 "ќ"=>"Ќ","ћ"=>"Ћ","њ"=>"Њ","љ"=>"Љ","ј"=>"Ј","ї"=>"Ї","і"=>"І","ѕ"=>"Ѕ","є"=>"Є","ѓ"=>"Ѓ", 1008 "ђ"=>"Ђ","ё"=>"Ё","ѐ"=>"Ѐ","я"=>"Я","ю"=>"Ю","э"=>"Э","ь"=>"Ь","ы"=>"Ы","ъ"=>"Ъ","щ"=>"Щ", 1009 "ш"=>"Ш","ч"=>"Ч","ц"=>"Ц","х"=>"Х","ф"=>"Ф","у"=>"У","т"=>"Т","с"=>"С","р"=>"Р","п"=>"П", 1010 "о"=>"О","н"=>"Н","м"=>"М","л"=>"Л","к"=>"К","й"=>"Й","и"=>"И","з"=>"З","ж"=>"Ж","е"=>"Е", 1011 "д"=>"Д","г"=>"Г","в"=>"В","б"=>"Б","а"=>"А","ϵ"=>"Ε","ϲ"=>"Σ","ϱ"=>"Ρ","ϰ"=>"Κ","ϯ"=>"Ϯ", 1012 "ϭ"=>"Ϭ","ϫ"=>"Ϫ","ϩ"=>"Ϩ","ϧ"=>"Ϧ","ϥ"=>"Ϥ","ϣ"=>"Ϣ","ϡ"=>"Ϡ","ϟ"=>"Ϟ","ϝ"=>"Ϝ","ϛ"=>"Ϛ", 1013 "ϙ"=>"Ϙ","ϖ"=>"Π","ϕ"=>"Φ","ϑ"=>"Θ","ϐ"=>"Β","ώ"=>"Ώ","ύ"=>"Ύ","ό"=>"Ό","ϋ"=>"Ϋ","ϊ"=>"Ϊ", 1014 "ω"=>"Ω","ψ"=>"Ψ","χ"=>"Χ","φ"=>"Φ","υ"=>"Υ","τ"=>"Τ","σ"=>"Σ","ς"=>"Σ","ρ"=>"Ρ","π"=>"Π", 1015 "ο"=>"Ο","ξ"=>"Ξ","ν"=>"Ν","μ"=>"Μ","λ"=>"Λ","κ"=>"Κ","ι"=>"Ι","θ"=>"Θ","η"=>"Η","ζ"=>"Ζ", 1016 "ε"=>"Ε","δ"=>"Δ","γ"=>"Γ","β"=>"Β","α"=>"Α","ί"=>"Ί","ή"=>"Ή","έ"=>"Έ","ά"=>"Ά","ʒ"=>"Ʒ", 1017 "ʋ"=>"Ʋ","ʊ"=>"Ʊ","ʈ"=>"Ʈ","ʃ"=>"Ʃ","ʀ"=>"Ʀ","ɵ"=>"Ɵ","ɲ"=>"Ɲ","ɯ"=>"Ɯ","ɩ"=>"Ɩ","ɨ"=>"Ɨ", 1018 "ɣ"=>"Ɣ","ɛ"=>"Ɛ","ə"=>"Ə","ɗ"=>"Ɗ","ɖ"=>"Ɖ","ɔ"=>"Ɔ","ɓ"=>"Ɓ","ȳ"=>"Ȳ","ȱ"=>"Ȱ","ȯ"=>"Ȯ", 1019 "ȭ"=>"Ȭ","ȫ"=>"Ȫ","ȩ"=>"Ȩ","ȧ"=>"Ȧ","ȥ"=>"Ȥ","ȣ"=>"Ȣ","ȟ"=>"Ȟ","ȝ"=>"Ȝ","ț"=>"Ț","ș"=>"Ș", 1020 "ȗ"=>"Ȗ","ȕ"=>"Ȕ","ȓ"=>"Ȓ","ȑ"=>"Ȑ","ȏ"=>"Ȏ","ȍ"=>"Ȍ","ȋ"=>"Ȋ","ȉ"=>"Ȉ","ȇ"=>"Ȇ","ȅ"=>"Ȅ", 1021 "ȃ"=>"Ȃ","ȁ"=>"Ȁ","ǿ"=>"Ǿ","ǽ"=>"Ǽ","ǻ"=>"Ǻ","ǹ"=>"Ǹ","ǵ"=>"Ǵ","dz"=>"Dz","ǯ"=>"Ǯ","ǭ"=>"Ǭ", 1022 "ǫ"=>"Ǫ","ǩ"=>"Ǩ","ǧ"=>"Ǧ","ǥ"=>"Ǥ","ǣ"=>"Ǣ","ǡ"=>"Ǡ","ǟ"=>"Ǟ","ǝ"=>"Ǝ","ǜ"=>"Ǜ","ǚ"=>"Ǚ", 1023 "ǘ"=>"Ǘ","ǖ"=>"Ǖ","ǔ"=>"Ǔ","ǒ"=>"Ǒ","ǐ"=>"Ǐ","ǎ"=>"Ǎ","nj"=>"Nj","lj"=>"Lj","dž"=>"Dž","ƿ"=>"Ƿ", 1024 "ƽ"=>"Ƽ","ƹ"=>"Ƹ","ƶ"=>"Ƶ","ƴ"=>"Ƴ","ư"=>"Ư","ƭ"=>"Ƭ","ƨ"=>"Ƨ","ƥ"=>"Ƥ","ƣ"=>"Ƣ","ơ"=>"Ơ", 1025 "ƞ"=>"Ƞ","ƙ"=>"Ƙ","ƕ"=>"Ƕ","ƒ"=>"Ƒ","ƌ"=>"Ƌ","ƈ"=>"Ƈ","ƅ"=>"Ƅ","ƃ"=>"Ƃ","ſ"=>"S","ž"=>"Ž", 1026 "ż"=>"Ż","ź"=>"Ź","ŷ"=>"Ŷ","ŵ"=>"Ŵ","ų"=>"Ų","ű"=>"Ű","ů"=>"Ů","ŭ"=>"Ŭ","ū"=>"Ū","ũ"=>"Ũ", 1027 "ŧ"=>"Ŧ","ť"=>"Ť","ţ"=>"Ţ","š"=>"Š","ş"=>"Ş","ŝ"=>"Ŝ","ś"=>"Ś","ř"=>"Ř","ŗ"=>"Ŗ","ŕ"=>"Ŕ", 1028 "œ"=>"Œ","ő"=>"Ő","ŏ"=>"Ŏ","ō"=>"Ō","ŋ"=>"Ŋ","ň"=>"Ň","ņ"=>"Ņ","ń"=>"Ń","ł"=>"Ł","ŀ"=>"Ŀ", 1029 "ľ"=>"Ľ","ļ"=>"Ļ","ĺ"=>"Ĺ","ķ"=>"Ķ","ĵ"=>"Ĵ","ij"=>"IJ","ı"=>"I","į"=>"Į","ĭ"=>"Ĭ","ī"=>"Ī", 1030 "ĩ"=>"Ĩ","ħ"=>"Ħ","ĥ"=>"Ĥ","ģ"=>"Ģ","ġ"=>"Ġ","ğ"=>"Ğ","ĝ"=>"Ĝ","ě"=>"Ě","ę"=>"Ę","ė"=>"Ė", 1031 "ĕ"=>"Ĕ","ē"=>"Ē","đ"=>"Đ","ď"=>"Ď","č"=>"Č","ċ"=>"Ċ","ĉ"=>"Ĉ","ć"=>"Ć","ą"=>"Ą","ă"=>"Ă", 1032 "ā"=>"Ā","ÿ"=>"Ÿ","þ"=>"Þ","ý"=>"Ý","ü"=>"Ü","û"=>"Û","ú"=>"Ú","ù"=>"Ù","ø"=>"Ø","ö"=>"Ö", 1033 "õ"=>"Õ","ô"=>"Ô","ó"=>"Ó","ò"=>"Ò","ñ"=>"Ñ","ð"=>"Ð","ï"=>"Ï","î"=>"Î","í"=>"Í","ì"=>"Ì", 1034 "ë"=>"Ë","ê"=>"Ê","é"=>"É","è"=>"È","ç"=>"Ç","æ"=>"Æ","å"=>"Å","ä"=>"Ä","ã"=>"Ã","â"=>"Â", 1035 "á"=>"Á","à"=>"À","µ"=>"Μ","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T", 1036 "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J", 1037 "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A" 1038 ); 1039 1040 /** 1041 * UTF-8 Case lookup table 1042 * 1043 * This lookuptable defines the lower case letters to their correspponding 1044 * upper case letter in UTF-8 1045 * 1046 * @author Andreas Gohr <andi@splitbrain.org> 1047 */ 1048 global $UTF8_UPPER_TO_LOWER; 1049 if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array ( 1050 "Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t","S"=>"s","R"=>"r","Q"=>"q", 1051 "P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j","I"=>"i","H"=>"h","G"=>"g", 1052 "F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a","ῼ"=>"ῳ","Ῥ"=>"ῥ","Ῡ"=>"ῡ","Ῑ"=>"ῑ", 1053 "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ", 1054 "ᾫ"=>"ᾣ","ᾪ"=>"ᾢ","ᾩ"=>"ᾡ","ᾟ"=>"ᾗ","ᾞ"=>"ᾖ","ᾝ"=>"ᾕ","ᾜ"=>"ᾔ","ᾛ"=>"ᾓ","ᾚ"=>"ᾒ","ᾙ"=>"ᾑ", 1055 "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ", 1056 "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ", 1057 "Ὲ"=>"ὲ","Ά"=>"ά","Ὰ"=>"ὰ","Ὧ"=>"ὧ","Ὦ"=>"ὦ","Ὥ"=>"ὥ","Ὤ"=>"ὤ","Ὣ"=>"ὣ","Ὢ"=>"ὢ","Ὡ"=>"ὡ", 1058 "Ὗ"=>"ὗ","Ὕ"=>"ὕ","Ὓ"=>"ὓ","Ὑ"=>"ὑ","Ὅ"=>"ὅ","Ὄ"=>"ὄ","Ὃ"=>"ὃ","Ὂ"=>"ὂ","Ὁ"=>"ὁ","Ὀ"=>"ὀ", 1059 "Ἷ"=>"ἷ","Ἶ"=>"ἶ","Ἵ"=>"ἵ","Ἴ"=>"ἴ","Ἳ"=>"ἳ","Ἲ"=>"ἲ","Ἱ"=>"ἱ","Ἰ"=>"ἰ","Ἧ"=>"ἧ","Ἦ"=>"ἦ", 1060 "Ἥ"=>"ἥ","Ἤ"=>"ἤ","Ἣ"=>"ἣ","Ἢ"=>"ἢ","Ἡ"=>"ἡ","Ἕ"=>"ἕ","Ἔ"=>"ἔ","Ἓ"=>"ἓ","Ἒ"=>"ἒ","Ἑ"=>"ἑ", 1061 "Ἐ"=>"ἐ","Ἇ"=>"ἇ","Ἆ"=>"ἆ","Ἅ"=>"ἅ","Ἄ"=>"ἄ","Ἃ"=>"ἃ","Ἂ"=>"ἂ","Ἁ"=>"ἁ","Ἀ"=>"ἀ","Ỹ"=>"ỹ", 1062 "Ỷ"=>"ỷ","Ỵ"=>"ỵ","Ỳ"=>"ỳ","Ự"=>"ự","Ữ"=>"ữ","Ử"=>"ử","Ừ"=>"ừ","Ứ"=>"ứ","Ủ"=>"ủ","Ụ"=>"ụ", 1063 "Ợ"=>"ợ","Ỡ"=>"ỡ","Ở"=>"ở","Ờ"=>"ờ","Ớ"=>"ớ","Ộ"=>"ộ","Ỗ"=>"ỗ","Ổ"=>"ổ","Ồ"=>"ồ","Ố"=>"ố", 1064 "Ỏ"=>"ỏ","Ọ"=>"ọ","Ị"=>"ị","Ỉ"=>"ỉ","Ệ"=>"ệ","Ễ"=>"ễ","Ể"=>"ể","Ề"=>"ề","Ế"=>"ế","Ẽ"=>"ẽ", 1065 "Ẻ"=>"ẻ","Ẹ"=>"ẹ","Ặ"=>"ặ","Ẵ"=>"ẵ","Ẳ"=>"ẳ","Ằ"=>"ằ","Ắ"=>"ắ","Ậ"=>"ậ","Ẫ"=>"ẫ","Ẩ"=>"ẩ", 1066 "Ầ"=>"ầ","Ấ"=>"ấ","Ả"=>"ả","Ạ"=>"ạ","Ṡ"=>"ẛ","Ẕ"=>"ẕ","Ẓ"=>"ẓ","Ẑ"=>"ẑ","Ẏ"=>"ẏ","Ẍ"=>"ẍ", 1067 "Ẋ"=>"ẋ","Ẉ"=>"ẉ","Ẇ"=>"ẇ","Ẅ"=>"ẅ","Ẃ"=>"ẃ","Ẁ"=>"ẁ","Ṿ"=>"ṿ","Ṽ"=>"ṽ","Ṻ"=>"ṻ","Ṹ"=>"ṹ", 1068 "Ṷ"=>"ṷ","Ṵ"=>"ṵ","Ṳ"=>"ṳ","Ṱ"=>"ṱ","Ṯ"=>"ṯ","Ṭ"=>"ṭ","Ṫ"=>"ṫ","Ṩ"=>"ṩ","Ṧ"=>"ṧ","Ṥ"=>"ṥ", 1069 "Ṣ"=>"ṣ","Ṡ"=>"ṡ","Ṟ"=>"ṟ","Ṝ"=>"ṝ","Ṛ"=>"ṛ","Ṙ"=>"ṙ","Ṗ"=>"ṗ","Ṕ"=>"ṕ","Ṓ"=>"ṓ","Ṑ"=>"ṑ", 1070 "Ṏ"=>"ṏ","Ṍ"=>"ṍ","Ṋ"=>"ṋ","Ṉ"=>"ṉ","Ṇ"=>"ṇ","Ṅ"=>"ṅ","Ṃ"=>"ṃ","Ṁ"=>"ṁ","Ḿ"=>"ḿ","Ḽ"=>"ḽ", 1071 "Ḻ"=>"ḻ","Ḹ"=>"ḹ","Ḷ"=>"ḷ","Ḵ"=>"ḵ","Ḳ"=>"ḳ","Ḱ"=>"ḱ","Ḯ"=>"ḯ","Ḭ"=>"ḭ","Ḫ"=>"ḫ","Ḩ"=>"ḩ", 1072 "Ḧ"=>"ḧ","Ḥ"=>"ḥ","Ḣ"=>"ḣ","Ḡ"=>"ḡ","Ḟ"=>"ḟ","Ḝ"=>"ḝ","Ḛ"=>"ḛ","Ḙ"=>"ḙ","Ḗ"=>"ḗ","Ḕ"=>"ḕ", 1073 "Ḓ"=>"ḓ","Ḑ"=>"ḑ","Ḏ"=>"ḏ","Ḍ"=>"ḍ","Ḋ"=>"ḋ","Ḉ"=>"ḉ","Ḇ"=>"ḇ","Ḅ"=>"ḅ","Ḃ"=>"ḃ","Ḁ"=>"ḁ", 1074 "Ֆ"=>"ֆ","Օ"=>"օ","Ք"=>"ք","Փ"=>"փ","Ւ"=>"ւ","Ց"=>"ց","Ր"=>"ր","Տ"=>"տ","Վ"=>"վ","Ս"=>"ս", 1075 "Ռ"=>"ռ","Ջ"=>"ջ","Պ"=>"պ","Չ"=>"չ","Ո"=>"ո","Շ"=>"շ","Ն"=>"ն","Յ"=>"յ","Մ"=>"մ","Ճ"=>"ճ", 1076 "Ղ"=>"ղ","Ձ"=>"ձ","Հ"=>"հ","Կ"=>"կ","Ծ"=>"ծ","Խ"=>"խ","Լ"=>"լ","Ի"=>"ի","Ժ"=>"ժ","Թ"=>"թ", 1077 "Ը"=>"ը","Է"=>"է","Զ"=>"զ","Ե"=>"ե","Դ"=>"դ","Գ"=>"գ","Բ"=>"բ","Ա"=>"ա","Ԏ"=>"ԏ","Ԍ"=>"ԍ", 1078 "Ԋ"=>"ԋ","Ԉ"=>"ԉ","Ԇ"=>"ԇ","Ԅ"=>"ԅ","Ԃ"=>"ԃ","Ԁ"=>"ԁ","Ӹ"=>"ӹ","Ӵ"=>"ӵ","Ӳ"=>"ӳ","Ӱ"=>"ӱ", 1079 "Ӯ"=>"ӯ","Ӭ"=>"ӭ","Ӫ"=>"ӫ","Ө"=>"ө","Ӧ"=>"ӧ","Ӥ"=>"ӥ","Ӣ"=>"ӣ","Ӡ"=>"ӡ","Ӟ"=>"ӟ","Ӝ"=>"ӝ", 1080 "Ӛ"=>"ӛ","Ә"=>"ә","Ӗ"=>"ӗ","Ӕ"=>"ӕ","Ӓ"=>"ӓ","Ӑ"=>"ӑ","Ӎ"=>"ӎ","Ӌ"=>"ӌ","Ӊ"=>"ӊ","Ӈ"=>"ӈ", 1081 "Ӆ"=>"ӆ","Ӄ"=>"ӄ","Ӂ"=>"ӂ","Ҿ"=>"ҿ","Ҽ"=>"ҽ","Һ"=>"һ","Ҹ"=>"ҹ","Ҷ"=>"ҷ","Ҵ"=>"ҵ","Ҳ"=>"ҳ", 1082 "Ұ"=>"ұ","Ү"=>"ү","Ҭ"=>"ҭ","Ҫ"=>"ҫ","Ҩ"=>"ҩ","Ҧ"=>"ҧ","Ҥ"=>"ҥ","Ң"=>"ң","Ҡ"=>"ҡ","Ҟ"=>"ҟ", 1083 "Ҝ"=>"ҝ","Қ"=>"қ","Ҙ"=>"ҙ","Җ"=>"җ","Ҕ"=>"ҕ","Ғ"=>"ғ","Ґ"=>"ґ","Ҏ"=>"ҏ","Ҍ"=>"ҍ","Ҋ"=>"ҋ", 1084 "Ҁ"=>"ҁ","Ѿ"=>"ѿ","Ѽ"=>"ѽ","Ѻ"=>"ѻ","Ѹ"=>"ѹ","Ѷ"=>"ѷ","Ѵ"=>"ѵ","Ѳ"=>"ѳ","Ѱ"=>"ѱ","Ѯ"=>"ѯ", 1085 "Ѭ"=>"ѭ","Ѫ"=>"ѫ","Ѩ"=>"ѩ","Ѧ"=>"ѧ","Ѥ"=>"ѥ","Ѣ"=>"ѣ","Ѡ"=>"ѡ","Џ"=>"џ","Ў"=>"ў","Ѝ"=>"ѝ", 1086 "Ќ"=>"ќ","Ћ"=>"ћ","Њ"=>"њ","Љ"=>"љ","Ј"=>"ј","Ї"=>"ї","І"=>"і","Ѕ"=>"ѕ","Є"=>"є","Ѓ"=>"ѓ", 1087 "Ђ"=>"ђ","Ё"=>"ё","Ѐ"=>"ѐ","Я"=>"я","Ю"=>"ю","Э"=>"э","Ь"=>"ь","Ы"=>"ы","Ъ"=>"ъ","Щ"=>"щ", 1088 "Ш"=>"ш","Ч"=>"ч","Ц"=>"ц","Х"=>"х","Ф"=>"ф","У"=>"у","Т"=>"т","С"=>"с","Р"=>"р","П"=>"п", 1089 "О"=>"о","Н"=>"н","М"=>"м","Л"=>"л","К"=>"к","Й"=>"й","И"=>"и","З"=>"з","Ж"=>"ж","Е"=>"е", 1090 "Д"=>"д","Г"=>"г","В"=>"в","Б"=>"б","А"=>"а","Ε"=>"ϵ","Σ"=>"ϲ","Ρ"=>"ϱ","Κ"=>"ϰ","Ϯ"=>"ϯ", 1091 "Ϭ"=>"ϭ","Ϫ"=>"ϫ","Ϩ"=>"ϩ","Ϧ"=>"ϧ","Ϥ"=>"ϥ","Ϣ"=>"ϣ","Ϡ"=>"ϡ","Ϟ"=>"ϟ","Ϝ"=>"ϝ","Ϛ"=>"ϛ", 1092 "Ϙ"=>"ϙ","Π"=>"ϖ","Φ"=>"ϕ","Θ"=>"ϑ","Β"=>"ϐ","Ώ"=>"ώ","Ύ"=>"ύ","Ό"=>"ό","Ϋ"=>"ϋ","Ϊ"=>"ϊ", 1093 "Ω"=>"ω","Ψ"=>"ψ","Χ"=>"χ","Φ"=>"φ","Υ"=>"υ","Τ"=>"τ","Σ"=>"σ","Σ"=>"ς","Ρ"=>"ρ","Π"=>"π", 1094 "Ο"=>"ο","Ξ"=>"ξ","Ν"=>"ν","Μ"=>"μ","Λ"=>"λ","Κ"=>"κ","Ι"=>"ι","Θ"=>"θ","Η"=>"η","Ζ"=>"ζ", 1095 "Ε"=>"ε","Δ"=>"δ","Γ"=>"γ","Β"=>"β","Α"=>"α","Ί"=>"ί","Ή"=>"ή","Έ"=>"έ","Ά"=>"ά","Ʒ"=>"ʒ", 1096 "Ʋ"=>"ʋ","Ʊ"=>"ʊ","Ʈ"=>"ʈ","Ʃ"=>"ʃ","Ʀ"=>"ʀ","Ɵ"=>"ɵ","Ɲ"=>"ɲ","Ɯ"=>"ɯ","Ɩ"=>"ɩ","Ɨ"=>"ɨ", 1097 "Ɣ"=>"ɣ","Ɛ"=>"ɛ","Ə"=>"ə","Ɗ"=>"ɗ","Ɖ"=>"ɖ","Ɔ"=>"ɔ","Ɓ"=>"ɓ","Ȳ"=>"ȳ","Ȱ"=>"ȱ","Ȯ"=>"ȯ", 1098 "Ȭ"=>"ȭ","Ȫ"=>"ȫ","Ȩ"=>"ȩ","Ȧ"=>"ȧ","Ȥ"=>"ȥ","Ȣ"=>"ȣ","Ȟ"=>"ȟ","Ȝ"=>"ȝ","Ț"=>"ț","Ș"=>"ș", 1099 "Ȗ"=>"ȗ","Ȕ"=>"ȕ","Ȓ"=>"ȓ","Ȑ"=>"ȑ","Ȏ"=>"ȏ","Ȍ"=>"ȍ","Ȋ"=>"ȋ","Ȉ"=>"ȉ","Ȇ"=>"ȇ","Ȅ"=>"ȅ", 1100 "Ȃ"=>"ȃ","Ȁ"=>"ȁ","Ǿ"=>"ǿ","Ǽ"=>"ǽ","Ǻ"=>"ǻ","Ǹ"=>"ǹ","Ǵ"=>"ǵ","Dz"=>"dz","Ǯ"=>"ǯ","Ǭ"=>"ǭ", 1101 "Ǫ"=>"ǫ","Ǩ"=>"ǩ","Ǧ"=>"ǧ","Ǥ"=>"ǥ","Ǣ"=>"ǣ","Ǡ"=>"ǡ","Ǟ"=>"ǟ","Ǝ"=>"ǝ","Ǜ"=>"ǜ","Ǚ"=>"ǚ", 1102 "Ǘ"=>"ǘ","Ǖ"=>"ǖ","Ǔ"=>"ǔ","Ǒ"=>"ǒ","Ǐ"=>"ǐ","Ǎ"=>"ǎ","Nj"=>"nj","Lj"=>"lj","Dž"=>"dž","Ƿ"=>"ƿ", 1103 "Ƽ"=>"ƽ","Ƹ"=>"ƹ","Ƶ"=>"ƶ","Ƴ"=>"ƴ","Ư"=>"ư","Ƭ"=>"ƭ","Ƨ"=>"ƨ","Ƥ"=>"ƥ","Ƣ"=>"ƣ","Ơ"=>"ơ", 1104 "Ƞ"=>"ƞ","Ƙ"=>"ƙ","Ƕ"=>"ƕ","Ƒ"=>"ƒ","Ƌ"=>"ƌ","Ƈ"=>"ƈ","Ƅ"=>"ƅ","Ƃ"=>"ƃ","S"=>"ſ","Ž"=>"ž", 1105 "Ż"=>"ż","Ź"=>"ź","Ŷ"=>"ŷ","Ŵ"=>"ŵ","Ų"=>"ų","Ű"=>"ű","Ů"=>"ů","Ŭ"=>"ŭ","Ū"=>"ū","Ũ"=>"ũ", 1106 "Ŧ"=>"ŧ","Ť"=>"ť","Ţ"=>"ţ","Š"=>"š","Ş"=>"ş","Ŝ"=>"ŝ","Ś"=>"ś","Ř"=>"ř","Ŗ"=>"ŗ","Ŕ"=>"ŕ", 1107 "Œ"=>"œ","Ő"=>"ő","Ŏ"=>"ŏ","Ō"=>"ō","Ŋ"=>"ŋ","Ň"=>"ň","Ņ"=>"ņ","Ń"=>"ń","Ł"=>"ł","Ŀ"=>"ŀ", 1108 "Ľ"=>"ľ","Ļ"=>"ļ","Ĺ"=>"ĺ","Ķ"=>"ķ","Ĵ"=>"ĵ","IJ"=>"ij","I"=>"ı","Į"=>"į","Ĭ"=>"ĭ","Ī"=>"ī", 1109 "Ĩ"=>"ĩ","Ħ"=>"ħ","Ĥ"=>"ĥ","Ģ"=>"ģ","Ġ"=>"ġ","Ğ"=>"ğ","Ĝ"=>"ĝ","Ě"=>"ě","Ę"=>"ę","Ė"=>"ė", 1110 "Ĕ"=>"ĕ","Ē"=>"ē","Đ"=>"đ","Ď"=>"ď","Č"=>"č","Ċ"=>"ċ","Ĉ"=>"ĉ","Ć"=>"ć","Ą"=>"ą","Ă"=>"ă", 1111 "Ā"=>"ā","Ÿ"=>"ÿ","Þ"=>"þ","Ý"=>"ý","Ü"=>"ü","Û"=>"û","Ú"=>"ú","Ù"=>"ù","Ø"=>"ø","Ö"=>"ö", 1112 "Õ"=>"õ","Ô"=>"ô","Ó"=>"ó","Ò"=>"ò","Ñ"=>"ñ","Ð"=>"ð","Ï"=>"ï","Î"=>"î","Í"=>"í","Ì"=>"ì", 1113 "Ë"=>"ë","Ê"=>"ê","É"=>"é","È"=>"è","Ç"=>"ç","Æ"=>"æ","Å"=>"å","Ä"=>"ä","Ã"=>"ã","Â"=>"â", 1114 "Á"=>"á","À"=>"à","Μ"=>"µ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t", 1115 "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j", 1116 "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a" 1117 ); 1118}; // end of case lookup tables 1119 1120/** 1121 * UTF-8 lookup table for lower case accented letters 1122 * 1123 * This lookuptable defines replacements for accented characters from the ASCII-7 1124 * range. This are lower case letters only. 1125 * 1126 * @author Andreas Gohr <andi@splitbrain.org> 1127 * @see utf8_deaccent() 1128 */ 1129global $UTF8_LOWER_ACCENTS; 1130if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array( 1131 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 1132 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 1133 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 1134 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 1135 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 1136 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 1137 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 1138 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 1139 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 1140 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 1141 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 1142 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 1143 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 1144 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 1145 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', 1146); 1147 1148/** 1149 * UTF-8 lookup table for upper case accented letters 1150 * 1151 * This lookuptable defines replacements for accented characters from the ASCII-7 1152 * range. This are upper case letters only. 1153 * 1154 * @author Andreas Gohr <andi@splitbrain.org> 1155 * @see utf8_deaccent() 1156 */ 1157global $UTF8_UPPER_ACCENTS; 1158if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array( 1159 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', 1160 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 1161 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', 1162 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', 1163 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', 1164 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', 1165 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', 1166 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', 1167 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 1168 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', 1169 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', 1170 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 1171 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 1172 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', 1173 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', 1174); 1175 1176/** 1177 * UTF-8 array of common special characters 1178 * 1179 * This array should contain all special characters (not a letter or digit) 1180 * defined in the various local charsets - it's not a complete list of non-alphanum 1181 * characters in UTF-8. It's not perfect but should match most cases of special 1182 * chars. 1183 * 1184 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 1185 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 1186 * 1187 * @author Andreas Gohr <andi@splitbrain.org> 1188 * @see utf8_stripspecials() 1189 */ 1190global $UTF8_SPECIAL_CHARS; 1191if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array( 1192 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 1193 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 1194 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 1195 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 1196 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 1197 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 1198 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 1199 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 1200 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 1201 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 1202 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 1203 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 1204 0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 1205 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 1206 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 1207 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 1208 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 1209 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 1210 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 1211 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 1212 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 1213 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 1214 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 1215 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 1216 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 1217 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 1218 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 1219 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 1220 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 1221 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 1222 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 1223 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 1224 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 1225 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 1226 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 1227 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 1228 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 1229 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 1230 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 1231 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 1232 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 1233 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 1234 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 1235 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 1236 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 1237 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 1238 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017, 1239 0x3018, 0x3019, 0x301a, 0x301b, 0x3036, 1240 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 1241 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 1242 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 1243 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 1244 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 1245 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 1246 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c, 1247 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b, 1248 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 1249 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea, 1250 0xffeb, 0xffec, 0xffed, 0xffee, 1251 0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703, 1252 0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b, 1253 0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713, 1254 0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b, 1255 0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf, 1256); 1257 1258// utf8 version of above data 1259global $UTF8_SPECIAL_CHARS2; 1260if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 = 1261 "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~ �'. 1262 '� ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½�'. 1263 '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. 1264 '�ׁׂ׃׳״،؛؟ـًٌٍَُِّْ٪฿–—―‗‘’‚“”�'. 1265 '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'. 1266 '⇐⇑⇒⇓⇔∀∂∃∅∆∇∈∉∋∏∑−∕∗∙√∝∞∠∧∨�'. 1267 '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'. 1268 '��┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠'. 1269 '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌▐░▒▓■▲▼◆◊●�'. 1270 '�★☎☛☞♠♣♥♦✁✂✃✄✆✇✈✉✌✍✎✏✐✑✒✓✔✕�'. 1271 '��✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✩✪✫✬✭✮✯✰✱'. 1272 '✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋�'. 1273 '�❏❐❑❒❖❘❙❚❛❜❝❞❡❢❣❤❥❦❧❿➉➓➔➘➙➚�'. 1274 '��➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶'. 1275 '➷➸➹➺➻➼➽➾'. 1276 ' 、。〃〈〉《》「」『』【】〒〔〕〖〗〘〙〚〛〶'. 1277 '�'. 1278 '�ﹼﹽ'. 1279 '!"#$%&'()*+,-./:;<=>?@[\]^`{|}~'. 1280 '⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○'. 1281 ''. 1282 ' '; 1283 1284/** 1285 * Romanization lookup table 1286 * 1287 * This lookup tables provides a way to transform strings written in a language 1288 * different from the ones based upon latin letters into plain ASCII. 1289 * 1290 * Please note: this is not a scientific transliteration table. It only works 1291 * oneway from nonlatin to ASCII and it works by simple character replacement 1292 * only. Specialities of each language are not supported. 1293 * 1294 * @author Andreas Gohr <andi@splitbrain.org> 1295 * @author Vitaly Blokhin <vitinfo@vitn.com> 1296 * @link http://www.uconv.com/translit.htm 1297 * @author Bisqwit <bisqwit@iki.fi> 1298 * @link http://kanjidict.stc.cx/hiragana.php?src=2 1299 * @link http://www.translatum.gr/converter/greek-transliteration.htm 1300 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 1301 * @link http://www.btranslations.com/resources/romanization/korean.asp 1302 * @author Arthit Suriyawongkul <arthit@gmail.com> 1303 * @author Denis Scheither <amorphis@uni-bremen.de> 1304 */ 1305global $UTF8_ROMANIZATION; 1306if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array( 1307 // scandinavian - differs from what we do in deaccent 1308 'å'=>'a','Å'=>'A','ä'=>'a','Ä'=>'A','ö'=>'o','Ö'=>'O', 1309 1310 //russian cyrillic 1311 'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G', 1312 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh', 1313 'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K', 1314 'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Н'=>'N','о'=>'o','О'=>'O', 1315 'п'=>'p','П'=>'P','р'=>'r','Р'=>'R','с'=>'s','С'=>'S','т'=>'t','Т'=>'T', 1316 'у'=>'u','У'=>'U','ф'=>'f','Ф'=>'F','х'=>'x','Х'=>'X','ц'=>'c','Ц'=>'C', 1317 'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'sch','Щ'=>'Sch','ъ'=>'', 1318 'Ъ'=>'','ы'=>'y','Ы'=>'Y','ь'=>'','Ь'=>'','э'=>'eh','Э'=>'Eh','ю'=>'ju', 1319 'Ю'=>'Ju','я'=>'ja','Я'=>'Ja', 1320 // Ukrainian cyrillic 1321 'Ґ'=>'Gh','ґ'=>'gh','Є'=>'Je','є'=>'je','І'=>'I','і'=>'i','Ї'=>'Ji','ї'=>'ji', 1322 // Georgian 1323 'ა'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th', 1324 'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','ო'=>'o','პ'=>'p','ჟ'=>'zh', 1325 'რ'=>'r','ს'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ქ'=>'kh','ღ'=>'gh','ყ'=>'q', 1326 'შ'=>'sh','ჩ'=>'ch','ც'=>'c','ძ'=>'dh','წ'=>'w','ჭ'=>'j','ხ'=>'x','ჯ'=>'jh', 1327 'ჰ'=>'xh', 1328 //Sanskrit 1329 'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry', 1330 'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','ए'=>'e','ऐ'=>'ay','ओ'=>'o','औ'=>'aw', 1331 'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh', 1332 'च'=>'c','छ'=>'ch','ज'=>'j','झ'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh', 1333 'ड'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','थ'=>'th','द'=>'d','ध'=>'dh', 1334 'न'=>'n','प'=>'p','फ'=>'ph','ब'=>'b','भ'=>'bh','म'=>'m','य'=>'z','र'=>'r', 1335 'ल'=>'l','व'=>'v','श'=>'sh','ष'=>'sqh','स'=>'s','ह'=>'x', 1336 //Hebrew 1337 'א'=>'a', 'ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'kh','ט'=>'th', 1338 'י'=>'y','ך'=>'h','כ'=>'k','ל'=>'l','ם'=>'m','מ'=>'m','ן'=>'n','נ'=>'n', 1339 'ס'=>'s','ע'=>'ah','ף'=>'f','פ'=>'p','ץ'=>'c','צ'=>'c','ק'=>'q','ר'=>'r', 1340 'ש'=>'sh','ת'=>'t', 1341 //Arabic 1342 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'g','ح'=>'xh','خ'=>'x','د'=>'d', 1343 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s\'','ض'=>'d\'', 1344 'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k', 1345 'ل'=>'l','م'=>'m','ن'=>'n','ه'=>'x\'','و'=>'u','ي'=>'i', 1346 1347 // Japanese characters (last update: 2008-05-09) 1348 1349 // Japanese hiragana 1350 1351 // 3 character syllables, っ doubles the consonant after 1352 'っちゃ'=>'ccha','っちぇ'=>'cche','っちょ'=>'ccho','っちゅ'=>'cchu', 1353 'っびゃ'=>'bbya','っびぇ'=>'bbye','っびぃ'=>'bbyi','っびょ'=>'bbyo','っびゅ'=>'bbyu', 1354 'っぴゃ'=>'ppya','っぴぇ'=>'ppye','っぴぃ'=>'ppyi','っぴょ'=>'ppyo','っぴゅ'=>'ppyu', 1355 'っちゃ'=>'ccha','っちぇ'=>'cche','っち'=>'cchi','っちょ'=>'ccho','っちゅ'=>'cchu', 1356 // 'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu', 1357 'っきゃ'=>'kkya','っきぇ'=>'kkye','っきぃ'=>'kkyi','っきょ'=>'kkyo','っきゅ'=>'kkyu', 1358 'っぎゃ'=>'ggya','っぎぇ'=>'ggye','っぎぃ'=>'ggyi','っぎょ'=>'ggyo','っぎゅ'=>'ggyu', 1359 'っみゃ'=>'mmya','っみぇ'=>'mmye','っみぃ'=>'mmyi','っみょ'=>'mmyo','っみゅ'=>'mmyu', 1360 'っにゃ'=>'nnya','っにぇ'=>'nnye','っにぃ'=>'nnyi','っにょ'=>'nnyo','っにゅ'=>'nnyu', 1361 'っりゃ'=>'rrya','っりぇ'=>'rrye','っりぃ'=>'rryi','っりょ'=>'rryo','っりゅ'=>'rryu', 1362 'っしゃ'=>'ssha','っしぇ'=>'sshe','っし'=>'sshi','っしょ'=>'ssho','っしゅ'=>'sshu', 1363 1364 // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway) 1365 'んあ'=>'n_a','んえ'=>'n_e','んい'=>'n_i','んお'=>'n_o','んう'=>'n_u', 1366 'んや'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu', 1367 1368 // 2 character syllables - normal 1369 'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo', 1370 'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu', 1371 'ひゃ'=>'hya','ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu', 1372 'びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu', 1373 'ぴゃ'=>'pya','ぴぇ'=>'pye','ぴぃ'=>'pyi','ぴょ'=>'pyo','ぴゅ'=>'pyu', 1374 'きゃ'=>'kya','きぇ'=>'kye','きぃ'=>'kyi','きょ'=>'kyo','きゅ'=>'kyu', 1375 'ぎゃ'=>'gya','ぎぇ'=>'gye','ぎぃ'=>'gyi','ぎょ'=>'gyo','ぎゅ'=>'gyu', 1376 'みゃ'=>'mya','みぇ'=>'mye','みぃ'=>'myi','みょ'=>'myo','みゅ'=>'myu', 1377 'にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu', 1378 'りゃ'=>'rya','りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu', 1379 'しゃ'=>'sha','しぇ'=>'she','し'=>'shi','しょ'=>'sho','しゅ'=>'shu', 1380 'じゃ'=>'ja','じぇ'=>'je','じょ'=>'jo','じゅ'=>'ju', 1381 'うぇ'=>'we','うぃ'=>'wi', 1382 'いぇ'=>'ye', 1383 1384 // 2 character syllables, っ doubles the consonant after 1385 'っば'=>'bba','っべ'=>'bbe','っび'=>'bbi','っぼ'=>'bbo','っぶ'=>'bbu', 1386 'っぱ'=>'ppa','っぺ'=>'ppe','っぴ'=>'ppi','っぽ'=>'ppo','っぷ'=>'ppu', 1387 'った'=>'tta','って'=>'tte','っち'=>'cchi','っと'=>'tto','っつ'=>'ttsu', 1388 'っだ'=>'dda','っで'=>'dde','っぢ'=>'ddi','っど'=>'ddo','っづ'=>'ddu', 1389 'っが'=>'gga','っげ'=>'gge','っぎ'=>'ggi','っご'=>'ggo','っぐ'=>'ggu', 1390 'っか'=>'kka','っけ'=>'kke','っき'=>'kki','っこ'=>'kko','っく'=>'kku', 1391 'っま'=>'mma','っめ'=>'mme','っみ'=>'mmi','っも'=>'mmo','っむ'=>'mmu', 1392 'っな'=>'nna','っね'=>'nne','っに'=>'nni','っの'=>'nno','っぬ'=>'nnu', 1393 'っら'=>'rra','っれ'=>'rre','っり'=>'rri','っろ'=>'rro','っる'=>'rru', 1394 'っさ'=>'ssa','っせ'=>'sse','っし'=>'sshi','っそ'=>'sso','っす'=>'ssu', 1395 'っざ'=>'zza','っぜ'=>'zze','っじ'=>'jji','っぞ'=>'zzo','っず'=>'zzu', 1396 1397 // 1 character syllabels 1398 'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ん'=>'n', 1399 'は'=>'ha','へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'fu', 1400 'ば'=>'ba','べ'=>'be','び'=>'bi','ぼ'=>'bo','ぶ'=>'bu', 1401 'ぱ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po','ぷ'=>'pu', 1402 'た'=>'ta','て'=>'te','ち'=>'chi','と'=>'to','つ'=>'tsu', 1403 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1404 'が'=>'ga','げ'=>'ge','ぎ'=>'gi','ご'=>'go','ぐ'=>'gu', 1405 'か'=>'ka','け'=>'ke','き'=>'ki','こ'=>'ko','く'=>'ku', 1406 'ま'=>'ma','め'=>'me','み'=>'mi','も'=>'mo','む'=>'mu', 1407 'な'=>'na','ね'=>'ne','に'=>'ni','の'=>'no','ぬ'=>'nu', 1408 'ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru', 1409 'さ'=>'sa','せ'=>'se','し'=>'shi','そ'=>'so','す'=>'su', 1410 'わ'=>'wa','を'=>'wo', 1411 'ざ'=>'za','ぜ'=>'ze','じ'=>'ji','ぞ'=>'zo','ず'=>'zu', 1412 'や'=>'ya','よ'=>'yo','ゆ'=>'yu', 1413 // old characters 1414 'ゑ'=>'we','ゐ'=>'wi', 1415 1416 // convert what's left (probably only kicks in when something's missing above) 1417 // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u', 1418 // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu', 1419 1420 // never seen one of those (disabled for the moment) 1421 // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu', 1422 // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu', 1423 // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu', 1424 // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu', 1425 // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu', 1426 // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu', 1427 // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu', 1428 // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu', 1429 // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu', 1430 // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu', 1431 // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu', 1432 // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu', 1433 // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu', 1434 // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu', 1435 1436 // 'spare' characters from other romanization systems 1437 // 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1438 // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu', 1439 // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su', 1440 // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu', 1441 //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu', 1442 //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu', 1443 //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu', 1444 //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu', 1445 //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi', 1446 //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju', 1447 1448 1449 // Japanese katakana 1450 1451 // 4 character syllables: ッ doubles the consonant after, ー doubles the vowel before (usualy written with macron, but we don't want that in our URLs) 1452 'ッビャー'=>'bbyaa','ッビェー'=>'bbyee','ッビィー'=>'bbyii','ッビョー'=>'bbyoo','ッビュー'=>'bbyuu', 1453 'ッピャー'=>'ppyaa','ッピェー'=>'ppyee','ッピィー'=>'ppyii','ッピョー'=>'ppyoo','ッピュー'=>'ppyuu', 1454 'ッキャー'=>'kkyaa','ッキェー'=>'kkyee','ッキィー'=>'kkyii','ッキョー'=>'kkyoo','ッキュー'=>'kkyuu', 1455 'ッギャー'=>'ggyaa','ッギェー'=>'ggyee','ッギィー'=>'ggyii','ッギョー'=>'ggyoo','ッギュー'=>'ggyuu', 1456 'ッミャー'=>'mmyaa','ッミェー'=>'mmyee','ッミィー'=>'mmyii','ッミョー'=>'mmyoo','ッミュー'=>'mmyuu', 1457 'ッニャー'=>'nnyaa','ッニェー'=>'nnyee','ッニィー'=>'nnyii','ッニョー'=>'nnyoo','ッニュー'=>'nnyuu', 1458 'ッリャー'=>'rryaa','ッリェー'=>'rryee','ッリィー'=>'rryii','ッリョー'=>'rryoo','ッリュー'=>'rryuu', 1459 'ッシャー'=>'sshaa','ッシェー'=>'sshee','ッシー'=>'sshii','ッショー'=>'sshoo','ッシュー'=>'sshuu', 1460 'ッチャー'=>'cchaa','ッチェー'=>'cchee','ッチー'=>'cchii','ッチョー'=>'cchoo','ッチュー'=>'cchuu', 1461 'ッティー'=>'ttii', 1462 'ッヂィー'=>'ddii', 1463 1464 // 3 character syllables - doubled vowels 1465 'ファー'=>'faa','フェー'=>'fee','フィー'=>'fii','フォー'=>'foo', 1466 'フャー'=>'fyaa','フェー'=>'fyee','フィー'=>'fyii','フョー'=>'fyoo','フュー'=>'fyuu', 1467 'ヒャー'=>'hyaa','ヒェー'=>'hyee','ヒィー'=>'hyii','ヒョー'=>'hyoo','ヒュー'=>'hyuu', 1468 'ビャー'=>'byaa','ビェー'=>'byee','ビィー'=>'byii','ビョー'=>'byoo','ビュー'=>'byuu', 1469 'ピャー'=>'pyaa','ピェー'=>'pyee','ピィー'=>'pyii','ピョー'=>'pyoo','ピュー'=>'pyuu', 1470 'キャー'=>'kyaa','キェー'=>'kyee','キィー'=>'kyii','キョー'=>'kyoo','キュー'=>'kyuu', 1471 'ギャー'=>'gyaa','ギェー'=>'gyee','ギィー'=>'gyii','ギョー'=>'gyoo','ギュー'=>'gyuu', 1472 'ミャー'=>'myaa','ミェー'=>'myee','ミィー'=>'myii','ミョー'=>'myoo','ミュー'=>'myuu', 1473 'ニャー'=>'nyaa','ニェー'=>'nyee','ニィー'=>'nyii','ニョー'=>'nyoo','ニュー'=>'nyuu', 1474 'リャー'=>'ryaa','リェー'=>'ryee','リィー'=>'ryii','リョー'=>'ryoo','リュー'=>'ryuu', 1475 'シャー'=>'shaa','シェー'=>'shee','シー'=>'shii','ショー'=>'shoo','シュー'=>'shuu', 1476 'ジャー'=>'jaa','ジェー'=>'jee','ジー'=>'jii','ジョー'=>'joo','ジュー'=>'juu', 1477 'スァー'=>'swaa','スェー'=>'swee','スィー'=>'swii','スォー'=>'swoo','スゥー'=>'swuu', 1478 'デァー'=>'daa','デェー'=>'dee','ディー'=>'dii','デォー'=>'doo','デゥー'=>'duu', 1479 'チャー'=>'chaa','チェー'=>'chee','チー'=>'chii','チョー'=>'choo','チュー'=>'chuu', 1480 'ヂャー'=>'dyaa','ヂェー'=>'dyee','ヂィー'=>'dyii','ヂョー'=>'dyoo','ヂュー'=>'dyuu', 1481 'ツャー'=>'tsaa','ツェー'=>'tsee','ツィー'=>'tsii','ツョー'=>'tsoo','ツー'=>'tsuu', 1482 'トァー'=>'twaa','トェー'=>'twee','トィー'=>'twii','トォー'=>'twoo','トゥー'=>'twuu', 1483 'ドァー'=>'dwaa','ドェー'=>'dwee','ドィー'=>'dwii','ドォー'=>'dwoo','ドゥー'=>'dwuu', 1484 'ウァー'=>'whaa','ウェー'=>'whee','ウィー'=>'whii','ウォー'=>'whoo','ウゥー'=>'whuu', 1485 'ヴャー'=>'vyaa','ヴェー'=>'vyee','ヴィー'=>'vyii','ヴョー'=>'vyoo','ヴュー'=>'vyuu', 1486 'ヴァー'=>'vaa','ヴェー'=>'vee','ヴィー'=>'vii','ヴォー'=>'voo','ヴー'=>'vuu', 1487 'ウェー'=>'wee','ウィー'=>'wii', 1488 'イェー'=>'yee', 1489 'ティー'=>'tii', 1490 'ヂィー'=>'dii', 1491 1492 // 3 character syllables - doubled consonants 1493 'ッビャ'=>'bbya','ッビェ'=>'bbye','ッビィ'=>'bbyi','ッビョ'=>'bbyo','ッビュ'=>'bbyu', 1494 'ッピャ'=>'ppya','ッピェ'=>'ppye','ッピィ'=>'ppyi','ッピョ'=>'ppyo','ッピュ'=>'ppyu', 1495 'ッキャ'=>'kkya','ッキェ'=>'kkye','ッキィ'=>'kkyi','ッキョ'=>'kkyo','ッキュ'=>'kkyu', 1496 'ッギャ'=>'ggya','ッギェ'=>'ggye','ッギィ'=>'ggyi','ッギョ'=>'ggyo','ッギュ'=>'ggyu', 1497 'ッミャ'=>'mmya','ッミェ'=>'mmye','ッミィ'=>'mmyi','ッミョ'=>'mmyo','ッミュ'=>'mmyu', 1498 'ッニャ'=>'nnya','ッニェ'=>'nnye','ッニィ'=>'nnyi','ッニョ'=>'nnyo','ッニュ'=>'nnyu', 1499 'ッリャ'=>'rrya','ッリェ'=>'rrye','ッリィ'=>'rryi','ッリョ'=>'rryo','ッリュ'=>'rryu', 1500 'ッシャ'=>'ssha','ッシェ'=>'sshe','ッシ'=>'sshi','ッショ'=>'ssho','ッシュ'=>'sshu', 1501 'ッチャ'=>'ccha','ッチェ'=>'cche','ッチ'=>'cchi','ッチョ'=>'ccho','ッチュ'=>'cchu', 1502 'ッティ'=>'tti', 1503 'ッヂィ'=>'ddi', 1504 1505 // 3 character syllables - doubled vowel and consonants 1506 'ッバー'=>'bbaa','ッベー'=>'bbee','ッビー'=>'bbii','ッボー'=>'bboo','ッブー'=>'bbuu', 1507 'ッパー'=>'ppaa','ッペー'=>'ppee','ッピー'=>'ppii','ッポー'=>'ppoo','ップー'=>'ppuu', 1508 'ッケー'=>'kkee','ッキー'=>'kkii','ッコー'=>'kkoo','ックー'=>'kkuu','ッカー'=>'kkaa', 1509 'ッガー'=>'ggaa','ッゲー'=>'ggee','ッギー'=>'ggii','ッゴー'=>'ggoo','ッグー'=>'gguu', 1510 'ッマー'=>'maa','ッメー'=>'mee','ッミー'=>'mii','ッモー'=>'moo','ッムー'=>'muu', 1511 'ッナー'=>'nnaa','ッネー'=>'nnee','ッニー'=>'nnii','ッノー'=>'nnoo','ッヌー'=>'nnuu', 1512 'ッラー'=>'rraa','ッレー'=>'rree','ッリー'=>'rrii','ッロー'=>'rroo','ッルー'=>'rruu', 1513 'ッサー'=>'ssaa','ッセー'=>'ssee','ッシー'=>'sshii','ッソー'=>'ssoo','ッスー'=>'ssuu', 1514 'ッザー'=>'zzaa','ッゼー'=>'zzee','ッジー'=>'jjii','ッゾー'=>'zzoo','ッズー'=>'zzuu', 1515 'ッター'=>'ttaa','ッテー'=>'ttee','ッチー'=>'chii','ットー'=>'ttoo','ッツー'=>'ttsuu', 1516 'ッダー'=>'ddaa','ッデー'=>'ddee','ッヂー'=>'ddii','ッドー'=>'ddoo','ッヅー'=>'dduu', 1517 1518 // 2 character syllables - normal 1519 'ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フゥ'=>'fu', 1520 // 'フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo','フュ'=>'fyu', 1521 'フャ'=>'fa','フェ'=>'fe','フィ'=>'fi','フョ'=>'fo','フュ'=>'fu', 1522 'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu', 1523 'ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo','ビュ'=>'byu', 1524 'ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu', 1525 'キャ'=>'kya','キェ'=>'kye','キィ'=>'kyi','キョ'=>'kyo','キュ'=>'kyu', 1526 'ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu', 1527 'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu', 1528 'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu', 1529 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu', 1530 'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu', 1531 'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju', 1532 'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu', 1533 'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du', 1534 'チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu', 1535 // 'ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi','ヂョ'=>'dyo','ヂュ'=>'dyu', 1536 'ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu', 1537 'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu', 1538 'ドァ'=>'dwa','ドェ'=>'dwe','ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu', 1539 'ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi','ウォ'=>'who','ウゥ'=>'whu', 1540 'ヴャ'=>'vya','ヴェ'=>'vye','ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu', 1541 'ヴァ'=>'va','ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu', 1542 'ウェ'=>'we','ウィ'=>'wi', 1543 'イェ'=>'ye', 1544 'ティ'=>'ti', 1545 'ヂィ'=>'di', 1546 1547 // 2 character syllables - doubled vocal 1548 'アー'=>'aa','エー'=>'ee','イー'=>'ii','オー'=>'oo','ウー'=>'uu', 1549 'ダー'=>'daa','デー'=>'dee','ヂー'=>'dii','ドー'=>'doo','ヅー'=>'duu', 1550 'ハー'=>'haa','ヘー'=>'hee','ヒー'=>'hii','ホー'=>'hoo','フー'=>'fuu', 1551 'バー'=>'baa','ベー'=>'bee','ビー'=>'bii','ボー'=>'boo','ブー'=>'buu', 1552 'パー'=>'paa','ペー'=>'pee','ピー'=>'pii','ポー'=>'poo','プー'=>'puu', 1553 'ケー'=>'kee','キー'=>'kii','コー'=>'koo','クー'=>'kuu','カー'=>'kaa', 1554 'ガー'=>'gaa','ゲー'=>'gee','ギー'=>'gii','ゴー'=>'goo','グー'=>'guu', 1555 'マー'=>'maa','メー'=>'mee','ミー'=>'mii','モー'=>'moo','ムー'=>'muu', 1556 'ナー'=>'naa','ネー'=>'nee','ニー'=>'nii','ノー'=>'noo','ヌー'=>'nuu', 1557 'ラー'=>'raa','レー'=>'ree','リー'=>'rii','ロー'=>'roo','ルー'=>'ruu', 1558 'サー'=>'saa','セー'=>'see','シー'=>'shii','ソー'=>'soo','スー'=>'suu', 1559 'ザー'=>'zaa','ゼー'=>'zee','ジー'=>'jii','ゾー'=>'zoo','ズー'=>'zuu', 1560 'ター'=>'taa','テー'=>'tee','チー'=>'chii','トー'=>'too','ツー'=>'tsuu', 1561 'ワー'=>'waa','ヲー'=>'woo', 1562 'ヤー'=>'yaa','ヨー'=>'yoo','ユー'=>'yuu', 1563 'ヵー'=>'kaa','ヶー'=>'kee', 1564 // old characters 1565 'ヱー'=>'wee','ヰー'=>'wii', 1566 1567 // seperate katakana 'n' 1568 'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u', 1569 'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンユ'=>'n_yu', 1570 1571 // 2 character syllables - doubled consonants 1572 'ッバ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu', 1573 'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッポ'=>'ppo','ップ'=>'ppu', 1574 'ッケ'=>'kke','ッキ'=>'kki','ッコ'=>'kko','ック'=>'kku','ッカ'=>'kka', 1575 'ッガ'=>'gga','ッゲ'=>'gge','ッギ'=>'ggi','ッゴ'=>'ggo','ッグ'=>'ggu', 1576 'ッマ'=>'ma','ッメ'=>'me','ッミ'=>'mi','ッモ'=>'mo','ッム'=>'mu', 1577 'ッナ'=>'nna','ッネ'=>'nne','ッニ'=>'nni','ッノ'=>'nno','ッヌ'=>'nnu', 1578 'ッラ'=>'rra','ッレ'=>'rre','ッリ'=>'rri','ッロ'=>'rro','ッル'=>'rru', 1579 'ッサ'=>'ssa','ッセ'=>'sse','ッシ'=>'sshi','ッソ'=>'sso','ッス'=>'ssu', 1580 'ッザ'=>'zza','ッゼ'=>'zze','ッジ'=>'jji','ッゾ'=>'zzo','ッズ'=>'zzu', 1581 'ッタ'=>'tta','ッテ'=>'tte','ッチ'=>'cchi','ット'=>'tto','ッツ'=>'ttsu', 1582 'ッダ'=>'dda','ッデ'=>'dde','ッヂ'=>'ddi','ッド'=>'ddo','ッヅ'=>'ddu', 1583 1584 // 1 character syllables 1585 'ア'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','ン'=>'n', 1586 'ハ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho','フ'=>'fu', 1587 'バ'=>'ba','ベ'=>'be','ビ'=>'bi','ボ'=>'bo','ブ'=>'bu', 1588 'パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ポ'=>'po','プ'=>'pu', 1589 'ケ'=>'ke','キ'=>'ki','コ'=>'ko','ク'=>'ku','カ'=>'ka', 1590 'ガ'=>'ga','ゲ'=>'ge','ギ'=>'gi','ゴ'=>'go','グ'=>'gu', 1591 'マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu', 1592 'ナ'=>'na','ネ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu', 1593 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ロ'=>'ro','ル'=>'ru', 1594 'サ'=>'sa','セ'=>'se','シ'=>'shi','ソ'=>'so','ス'=>'su', 1595 'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu', 1596 'タ'=>'ta','テ'=>'te','チ'=>'chi','ト'=>'to','ツ'=>'tsu', 1597 'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du', 1598 'ワ'=>'wa','ヲ'=>'wo', 1599 'ヤ'=>'ya','ヨ'=>'yo','ユ'=>'yu', 1600 'ヵ'=>'ka','ヶ'=>'ke', 1601 // old characters 1602 'ヱ'=>'we','ヰ'=>'wi', 1603 1604 // convert what's left (probably only kicks in when something's missing above) 1605 'ァ'=>'a','ェ'=>'e','ィ'=>'i','ォ'=>'o','ゥ'=>'u', 1606 'ャ'=>'ya','ョ'=>'yo','ュ'=>'yu', 1607 1608 // special characters 1609 '・'=>'_','、'=>'_', 1610 'ー'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise 1611 1612 // 'ラ'=>'la','レ'=>'le','リ'=>'li','ロ'=>'lo','ル'=>'lu', 1613 // 'チャ'=>'cya','チェ'=>'cye','チィ'=>'cyi','チョ'=>'cyo','チュ'=>'cyu', 1614 //'デャ'=>'dha','デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu', 1615 // 'リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi','リョ'=>'lyo','リュ'=>'lyu', 1616 // 'テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho','テュ'=>'thu', 1617 //'ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi','フォ'=>'fwo','フゥ'=>'fwu', 1618 //'チャ'=>'tya','チェ'=>'tye','チィ'=>'tyi','チョ'=>'tyo','チュ'=>'tyu', 1619 // 'ジャ'=>'jya','ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu', 1620 // 'ジャ'=>'zha','ジェ'=>'zhe','ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu', 1621 //'ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi','ジョ'=>'zyo','ジュ'=>'zyu', 1622 //'シャ'=>'sya','シェ'=>'sye','シィ'=>'syi','ショ'=>'syo','シュ'=>'syu', 1623 //'シ'=>'ci','フ'=>'hu',シ'=>'si','チ'=>'ti','ツ'=>'tu','イ'=>'yi','ヂ'=>'dzi', 1624 1625 // "Greeklish" 1626 'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Π'=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps', 1627 'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','π'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps', 1628 1629 // Thai 1630 'ก'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch', 1631 'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','ญ'=>'y','ฎ'=>'d','ฏ'=>'t','ฐ'=>'th', 1632 'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th', 1633 'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','ฝ'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph', 1634 'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue', 1635 'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h', 1636 'ะ'=>'a','ั'=>'a','รร'=>'a','า'=>'a','ๅ'=>'a','ำ'=>'am','ํา'=>'am', 1637 'ิ'=>'i','ี'=>'i','ึ'=>'ue','ี'=>'ue','ุ'=>'u','ู'=>'u', 1638 'เ'=>'e','แ'=>'ae','โ'=>'o','อ'=>'o', 1639 'ียะ'=>'ia','ีย'=>'ia','ือะ'=>'uea','ือ'=>'uea','ัวะ'=>'ua','ัว'=>'ua', 1640 'ใ'=>'ai','ไ'=>'ai','ัย'=>'ai','าย'=>'ai','าว'=>'ao', 1641 'ุย'=>'ui','อย'=>'oi','ือย'=>'ueai','วย'=>'uai', 1642 'ิว'=>'io','็ว'=>'eo','ียว'=>'iao', 1643 '่'=>'','้'=>'','๊'=>'','๋'=>'','็'=>'', 1644 '์'=>'','๎'=>'','ํ'=>'','ฺ'=>'', 1645 'ๆ'=>'2','๏'=>'o','ฯ'=>'-','๚'=>'-','๛'=>'-', 1646 '๐'=>'0','๑'=>'1','๒'=>'2','๓'=>'3','๔'=>'4', 1647 '๕'=>'5','๖'=>'6','๗'=>'7','๘'=>'8','๙'=>'9', 1648 1649 // Korean 1650 'ㄱ'=>'k','ㅋ'=>'kh','ㄲ'=>'kk','ㄷ'=>'t','ㅌ'=>'th','ㄸ'=>'tt','ㅂ'=>'p', 1651 'ㅍ'=>'ph','ㅃ'=>'pp','ㅈ'=>'c','ㅊ'=>'ch','ㅉ'=>'cc','ㅅ'=>'s','ㅆ'=>'ss', 1652 'ㅎ'=>'h','ㅇ'=>'ng','ㄴ'=>'n','ㄹ'=>'l','ㅁ'=>'m', 'ㅏ'=>'a','ㅓ'=>'e','ㅗ'=>'o', 1653 'ㅜ'=>'wu','ㅡ'=>'u','ㅣ'=>'i','ㅐ'=>'ay','ㅔ'=>'ey','ㅚ'=>'oy','ㅘ'=>'wa','ㅝ'=>'we', 1654 'ㅟ'=>'wi','ㅙ'=>'way','ㅞ'=>'wey','ㅢ'=>'uy','ㅑ'=>'ya','ㅕ'=>'ye','ㅛ'=>'oy', 1655 'ㅠ'=>'yu','ㅒ'=>'yay','ㅖ'=>'yey', 1656); 1657 1658//Setup VIM: ex: et ts=2 enc=utf-8 : 1659 1660