1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license LGPL (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * check for mb_string support 11 */ 12if(!defined('UTF8_MBSTRING')){ 13 if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){ 14 define('UTF8_MBSTRING',1); 15 }else{ 16 define('UTF8_MBSTRING',0); 17 } 18} 19 20if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } 21 22if(!function_exists('utf8_encodeFN')){ 23 /** 24 * URL-Encode a filename to allow unicodecharacters 25 * 26 * Slashes are not encoded 27 * 28 * When the second parameter is true the string will 29 * be encoded only if non ASCII characters are detected - 30 * This makes it safe to run it multiple times on the 31 * same string (default is true) 32 * 33 * @author Andreas Gohr <andi@splitbrain.org> 34 * @see urlencode 35 */ 36 function utf8_encodeFN($file,$safe=true){ 37 if($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#',$file)){ 38 return $file; 39 } 40 $file = urlencode($file); 41 $file = str_replace('%2F','/',$file); 42 return $file; 43 } 44} 45 46if(!function_exists('utf8_decodeFN')){ 47 /** 48 * URL-Decode a filename 49 * 50 * This is just a wrapper around urldecode 51 * 52 * @author Andreas Gohr <andi@splitbrain.org> 53 * @see urldecode 54 */ 55 function utf8_decodeFN($file){ 56 $file = urldecode($file); 57 return $file; 58 } 59} 60 61if(!function_exists('utf8_isASCII')){ 62 /** 63 * Checks if a string contains 7bit ASCII only 64 * 65 * @author Andreas Haerter <netzmeister@andreas-haerter.de> 66 */ 67 function utf8_isASCII($str){ 68 return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); 69 } 70} 71 72if(!function_exists('utf8_strip')){ 73 /** 74 * Strips all highbyte chars 75 * 76 * Returns a pure ASCII7 string 77 * 78 * @author Andreas Gohr <andi@splitbrain.org> 79 */ 80 function utf8_strip($str){ 81 $ascii = ''; 82 for($i=0; $i<strlen($str); $i++){ 83 if(ord($str{$i}) <128){ 84 $ascii .= $str{$i}; 85 } 86 } 87 return $ascii; 88 } 89} 90 91if(!function_exists('utf8_check')){ 92 /** 93 * Tries to detect if a string is in Unicode encoding 94 * 95 * @author <bmorel@ssi.fr> 96 * @link http://www.php.net/manual/en/function.utf8-encode.php 97 */ 98 function utf8_check($Str) { 99 for ($i=0; $i<strlen($Str); $i++) { 100 $b = ord($Str[$i]); 101 if ($b < 0x80) continue; # 0bbbbbbb 102 elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb 103 elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb 104 elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb 105 elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb 106 elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b 107 else return false; # Does not match any model 108 109 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 110 if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80)) 111 return false; 112 } 113 } 114 return true; 115 } 116} 117 118if(!function_exists('utf8_strlen')){ 119 /** 120 * Unicode aware replacement for strlen() 121 * 122 * utf8_decode() converts characters that are not in ISO-8859-1 123 * to '?', which, for the purpose of counting, is alright - It's 124 * even faster than mb_strlen. 125 * 126 * @author <chernyshevsky at hotmail dot com> 127 * @see strlen() 128 * @see utf8_decode() 129 */ 130 function utf8_strlen($string){ 131 return strlen(utf8_decode($string)); 132 } 133} 134 135if(!function_exists('utf8_substr')){ 136 /** 137 * UTF-8 aware alternative to substr 138 * 139 * Return part of a string given character offset (and optionally length) 140 * 141 * @author Harry Fuecks <hfuecks@gmail.com> 142 * @author Chris Smith <chris@jalakai.co.uk> 143 * @param string 144 * @param integer number of UTF-8 characters offset (from left) 145 * @param integer (optional) length in UTF-8 characters from offset 146 * @return mixed string or false if failure 147 */ 148 function utf8_substr($str, $offset, $length = null) { 149 if(UTF8_MBSTRING){ 150 if( $length === null ){ 151 return mb_substr($str, $offset); 152 }else{ 153 return mb_substr($str, $offset, $length); 154 } 155 } 156 157 /* 158 * Notes: 159 * 160 * no mb string support, so we'll use pcre regex's with 'u' flag 161 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 162 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 163 * 164 * substr documentation states false can be returned in some cases (e.g. offset > string length) 165 * mb_substr never returns false, it will return an empty string instead. 166 * 167 * calculating the number of characters in the string is a relatively expensive operation, so 168 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 169 */ 170 171 // cast parameters to appropriate types to avoid multiple notices/warnings 172 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 173 $offset = (int)$offset; 174 if (!is_null($length)) $length = (int)$length; 175 176 // handle trivial cases 177 if ($length === 0) return ''; 178 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 179 180 $offset_pattern = ''; 181 $length_pattern = ''; 182 183 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 184 if ($offset < 0) { 185 $strlen = strlen(utf8_decode($str)); // see notes 186 $offset = $strlen + $offset; 187 if ($offset < 0) $offset = 0; 188 } 189 190 // establish a pattern for offset, a non-captured group equal in length to offset 191 if ($offset > 0) { 192 $Ox = (int)($offset/65535); 193 $Oy = $offset%65535; 194 195 if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}'; 196 $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})'; 197 } else { 198 $offset_pattern = '^'; // offset == 0; just anchor the pattern 199 } 200 201 // establish a pattern for length 202 if (is_null($length)) { 203 $length_pattern = '(.*)$'; // the rest of the string 204 } else { 205 206 if (!isset($strlen)) $strlen = strlen(utf8_decode($str)); // see notes 207 if ($offset > $strlen) return ''; // another trivial case 208 209 if ($length > 0) { 210 211 $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string 212 213 $Lx = (int)($length/65535); 214 $Ly = $length%65535; 215 216 // +ve length requires ... a captured group of length characters 217 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 218 $length_pattern = '('.$length_pattern.'.{'.$Ly.'})'; 219 220 } else if ($length < 0) { 221 222 if ($length < ($offset - $strlen)) return ''; 223 224 $Lx = (int)((-$length)/65535); 225 $Ly = (-$length)%65535; 226 227 // -ve length requires ... capture everything except a group of -length characters 228 // anchored at the tail-end of the string 229 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 230 $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$'; 231 } 232 } 233 234 if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return ''; 235 return $match[1]; 236 } 237} 238 239if(!function_exists('utf8_substr_replace')){ 240 /** 241 * Unicode aware replacement for substr_replace() 242 * 243 * @author Andreas Gohr <andi@splitbrain.org> 244 * @see substr_replace() 245 */ 246 function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 247 $ret = ''; 248 if($start>0) $ret .= utf8_substr($string, 0, $start); 249 $ret .= $replacement; 250 $ret .= utf8_substr($string, $start+$length); 251 return $ret; 252 } 253} 254 255if(!function_exists('utf8_ltrim')){ 256 /** 257 * Unicode aware replacement for ltrim() 258 * 259 * @author Andreas Gohr <andi@splitbrain.org> 260 * @see ltrim() 261 * @return string 262 */ 263 function utf8_ltrim($str,$charlist=''){ 264 if($charlist == '') return ltrim($str); 265 266 //quote charlist for use in a characterclass 267 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 268 269 return preg_replace('/^['.$charlist.']+/u','',$str); 270 } 271} 272 273if(!function_exists('utf8_rtrim')){ 274 /** 275 * Unicode aware replacement for rtrim() 276 * 277 * @author Andreas Gohr <andi@splitbrain.org> 278 * @see rtrim() 279 * @return string 280 */ 281 function utf8_rtrim($str,$charlist=''){ 282 if($charlist == '') return rtrim($str); 283 284 //quote charlist for use in a characterclass 285 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 286 287 return preg_replace('/['.$charlist.']+$/u','',$str); 288 } 289} 290 291if(!function_exists('utf8_trim')){ 292 /** 293 * Unicode aware replacement for trim() 294 * 295 * @author Andreas Gohr <andi@splitbrain.org> 296 * @see trim() 297 * @return string 298 */ 299 function utf8_trim($str,$charlist='') { 300 if($charlist == '') return trim($str); 301 302 return utf8_ltrim(utf8_rtrim($str,$charlist),$charlist); 303 } 304} 305 306if(!function_exists('utf8_strtolower')){ 307 /** 308 * This is a unicode aware replacement for strtolower() 309 * 310 * Uses mb_string extension if available 311 * 312 * @author Leo Feyer <leo@typolight.org> 313 * @see strtolower() 314 * @see utf8_strtoupper() 315 */ 316 function utf8_strtolower($string){ 317 if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8'); 318 319 global $UTF8_UPPER_TO_LOWER; 320 return strtr($string,$UTF8_UPPER_TO_LOWER); 321 } 322} 323 324if(!function_exists('utf8_strtoupper')){ 325 /** 326 * This is a unicode aware replacement for strtoupper() 327 * 328 * Uses mb_string extension if available 329 * 330 * @author Leo Feyer <leo@typolight.org> 331 * @see strtoupper() 332 * @see utf8_strtoupper() 333 */ 334 function utf8_strtoupper($string){ 335 if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); 336 337 global $UTF8_LOWER_TO_UPPER; 338 return strtr($string,$UTF8_LOWER_TO_UPPER); 339 } 340} 341 342if(!function_exists('utf8_ucfirst')){ 343 /** 344 * UTF-8 aware alternative to ucfirst 345 * Make a string's first character uppercase 346 * 347 * @author Harry Fuecks 348 * @param string 349 * @return string with first character as upper case (if applicable) 350 */ 351 function utf8_ucfirst($str){ 352 switch ( utf8_strlen($str) ) { 353 case 0: 354 return ''; 355 case 1: 356 return utf8_strtoupper($str); 357 default: 358 preg_match('/^(.{1})(.*)$/us', $str, $matches); 359 return utf8_strtoupper($matches[1]).$matches[2]; 360 } 361 } 362} 363 364if(!function_exists('utf8_ucwords')){ 365 /** 366 * UTF-8 aware alternative to ucwords 367 * Uppercase the first character of each word in a string 368 * 369 * @author Harry Fuecks 370 * @param string 371 * @return string with first char of each word uppercase 372 * @see http://www.php.net/ucwords 373 */ 374 function utf8_ucwords($str) { 375 // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; 376 // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns 377 // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords 378 $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; 379 380 return preg_replace_callback($pattern, 'utf8_ucwords_callback',$str); 381 } 382 383 /** 384 * Callback function for preg_replace_callback call in utf8_ucwords 385 * You don't need to call this yourself 386 * 387 * @author Harry Fuecks 388 * @param array of matches corresponding to a single word 389 * @return string with first char of the word in uppercase 390 * @see utf8_ucwords 391 * @see utf8_strtoupper 392 */ 393 function utf8_ucwords_callback($matches) { 394 $leadingws = $matches[2]; 395 $ucfirst = utf8_strtoupper($matches[3]); 396 $ucword = utf8_substr_replace(ltrim($matches[0]),$ucfirst,0,1); 397 return $leadingws . $ucword; 398 } 399} 400 401if(!function_exists('utf8_deaccent')){ 402 /** 403 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 404 * 405 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 406 * letters. Default is to deaccent both cases ($case = 0) 407 * 408 * @author Andreas Gohr <andi@splitbrain.org> 409 */ 410 function utf8_deaccent($string,$case=0){ 411 if($case <= 0){ 412 global $UTF8_LOWER_ACCENTS; 413 $string = strtr($string,$UTF8_LOWER_ACCENTS); 414 } 415 if($case >= 0){ 416 global $UTF8_UPPER_ACCENTS; 417 $string = strtr($string,$UTF8_UPPER_ACCENTS); 418 } 419 return $string; 420 } 421} 422 423if(!function_exists('utf8_romanize')){ 424 /** 425 * Romanize a non-latin string 426 * 427 * @author Andreas Gohr <andi@splitbrain.org> 428 */ 429 function utf8_romanize($string){ 430 if(utf8_isASCII($string)) return $string; //nothing to do 431 432 global $UTF8_ROMANIZATION; 433 return strtr($string,$UTF8_ROMANIZATION); 434 } 435} 436 437if(!function_exists('utf8_stripspecials')){ 438 /** 439 * Removes special characters (nonalphanumeric) from a UTF-8 string 440 * 441 * This function adds the controlchars 0x00 to 0x19 to the array of 442 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 443 * 444 * @author Andreas Gohr <andi@splitbrain.org> 445 * @param string $string The UTF8 string to strip of special chars 446 * @param string $repl Replace special with this string 447 * @param string $additional Additional chars to strip (used in regexp char class) 448 */ 449 function utf8_stripspecials($string,$repl='',$additional=''){ 450 global $UTF8_SPECIAL_CHARS; 451 global $UTF8_SPECIAL_CHARS2; 452 453 static $specials = null; 454 if(is_null($specials)){ 455 #$specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 456 $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/'); 457 } 458 459 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 460 } 461} 462 463if(!function_exists('utf8_strpos')){ 464 /** 465 * This is an Unicode aware replacement for strpos 466 * 467 * @author Leo Feyer <leo@typolight.org> 468 * @see strpos() 469 * @param string 470 * @param string 471 * @param integer 472 * @return integer 473 */ 474 function utf8_strpos($haystack, $needle, $offset=0){ 475 $comp = 0; 476 $length = null; 477 478 while (is_null($length) || $length < $offset) { 479 $pos = strpos($haystack, $needle, $offset + $comp); 480 481 if ($pos === false) 482 return false; 483 484 $length = utf8_strlen(substr($haystack, 0, $pos)); 485 486 if ($length < $offset) 487 $comp = $pos - $length; 488 } 489 490 return $length; 491 } 492} 493 494if(!function_exists('utf8_tohtml')){ 495 /** 496 * Encodes UTF-8 characters to HTML entities 497 * 498 * @author Tom N Harris <tnharris@whoopdedo.org> 499 * @author <vpribish at shopping dot com> 500 * @link http://www.php.net/manual/en/function.utf8-decode.php 501 */ 502 function utf8_tohtml ($str) { 503 $ret = ''; 504 foreach (utf8_to_unicode($str) as $cp) { 505 if ($cp < 0x80) 506 $ret .= chr($cp); 507 elseif ($cp < 0x100) 508 $ret .= "&#$cp;"; 509 else 510 $ret .= '&#x'.dechex($cp).';'; 511 } 512 return $ret; 513 } 514} 515 516if(!function_exists('utf8_unhtml')){ 517 /** 518 * Decodes HTML entities to UTF-8 characters 519 * 520 * Convert any &#..; entity to a codepoint, 521 * The entities flag defaults to only decoding numeric entities. 522 * Pass HTML_ENTITIES and named entities, including & < etc. 523 * are handled as well. Avoids the problem that would occur if you 524 * had to decode "&#38;&amp;#38;" 525 * 526 * unhtmlspecialchars(utf8_unhtml($s)) -> "&&" 527 * utf8_unhtml(unhtmlspecialchars($s)) -> "&&#38;" 528 * what it should be -> "&&#38;" 529 * 530 * @author Tom N Harris <tnharris@whoopdedo.org> 531 * @param string $str UTF-8 encoded string 532 * @param boolean $entities Flag controlling decoding of named entities. 533 * @return UTF-8 encoded string with numeric (and named) entities replaced. 534 */ 535 function utf8_unhtml($str, $entities=null) { 536 static $decoder = null; 537 if (is_null($decoder)) 538 $decoder = new utf8_entity_decoder(); 539 if (is_null($entities)) 540 return preg_replace_callback('/(&#([Xx])?([0-9A-Za-z]+);)/m', 541 'utf8_decode_numeric', $str); 542 else 543 return preg_replace_callback('/&(#)?([Xx])?([0-9A-Za-z]+);/m', 544 array(&$decoder, 'decode'), $str); 545 } 546} 547 548if(!function_exists('utf8_decode_numeric')){ 549 function utf8_decode_numeric($ent) { 550 switch ($ent[2]) { 551 case 'X': 552 case 'x': 553 $cp = hexdec($ent[3]); 554 break; 555 default: 556 $cp = intval($ent[3]); 557 break; 558 } 559 return unicode_to_utf8(array($cp)); 560 } 561} 562 563if(!class_exists('utf8_entity_decoder')){ 564 class utf8_entity_decoder { 565 var $table; 566 function utf8_entity_decoder() { 567 $table = get_html_translation_table(HTML_ENTITIES); 568 $table = array_flip($table); 569 $this->table = array_map(array(&$this,'makeutf8'), $table); 570 } 571 function makeutf8($c) { 572 return unicode_to_utf8(array(ord($c))); 573 } 574 function decode($ent) { 575 if ($ent[1] == '#') { 576 return utf8_decode_numeric($ent); 577 } elseif (array_key_exists($ent[0],$this->table)) { 578 return $this->table[$ent[0]]; 579 } else { 580 return $ent[0]; 581 } 582 } 583 } 584} 585 586if(!function_exists('utf8_to_unicode')){ 587 /** 588 * Takes an UTF-8 string and returns an array of ints representing the 589 * Unicode characters. Astral planes are supported ie. the ints in the 590 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 591 * are not allowed. 592 * 593 * If $strict is set to true the function returns false if the input 594 * string isn't a valid UTF-8 octet sequence and raises a PHP error at 595 * level E_USER_WARNING 596 * 597 * Note: this function has been modified slightly in this library to 598 * trigger errors on encountering bad bytes 599 * 600 * @author <hsivonen@iki.fi> 601 * @author Harry Fuecks <hfuecks@gmail.com> 602 * @param string UTF-8 encoded string 603 * @param boolean Check for invalid sequences? 604 * @return mixed array of unicode code points or false if UTF-8 invalid 605 * @see unicode_to_utf8 606 * @link http://hsivonen.iki.fi/php-utf8/ 607 * @link http://sourceforge.net/projects/phputf8/ 608 */ 609 function utf8_to_unicode($str,$strict=false) { 610 $mState = 0; // cached expected number of octets after the current octet 611 // until the beginning of the next UTF8 character sequence 612 $mUcs4 = 0; // cached Unicode character 613 $mBytes = 1; // cached expected number of octets in the current sequence 614 615 $out = array(); 616 617 $len = strlen($str); 618 619 for($i = 0; $i < $len; $i++) { 620 621 $in = ord($str{$i}); 622 623 if ( $mState == 0) { 624 625 // When mState is zero we expect either a US-ASCII character or a 626 // multi-octet sequence. 627 if (0 == (0x80 & ($in))) { 628 // US-ASCII, pass straight through. 629 $out[] = $in; 630 $mBytes = 1; 631 632 } else if (0xC0 == (0xE0 & ($in))) { 633 // First octet of 2 octet sequence 634 $mUcs4 = ($in); 635 $mUcs4 = ($mUcs4 & 0x1F) << 6; 636 $mState = 1; 637 $mBytes = 2; 638 639 } else if (0xE0 == (0xF0 & ($in))) { 640 // First octet of 3 octet sequence 641 $mUcs4 = ($in); 642 $mUcs4 = ($mUcs4 & 0x0F) << 12; 643 $mState = 2; 644 $mBytes = 3; 645 646 } else if (0xF0 == (0xF8 & ($in))) { 647 // First octet of 4 octet sequence 648 $mUcs4 = ($in); 649 $mUcs4 = ($mUcs4 & 0x07) << 18; 650 $mState = 3; 651 $mBytes = 4; 652 653 } else if (0xF8 == (0xFC & ($in))) { 654 /* First octet of 5 octet sequence. 655 * 656 * This is illegal because the encoded codepoint must be either 657 * (a) not the shortest form or 658 * (b) outside the Unicode range of 0-0x10FFFF. 659 * Rather than trying to resynchronize, we will carry on until the end 660 * of the sequence and let the later error handling code catch it. 661 */ 662 $mUcs4 = ($in); 663 $mUcs4 = ($mUcs4 & 0x03) << 24; 664 $mState = 4; 665 $mBytes = 5; 666 667 } else if (0xFC == (0xFE & ($in))) { 668 // First octet of 6 octet sequence, see comments for 5 octet sequence. 669 $mUcs4 = ($in); 670 $mUcs4 = ($mUcs4 & 1) << 30; 671 $mState = 5; 672 $mBytes = 6; 673 674 } elseif($strict) { 675 /* Current octet is neither in the US-ASCII range nor a legal first 676 * octet of a multi-octet sequence. 677 */ 678 trigger_error( 679 'utf8_to_unicode: Illegal sequence identifier '. 680 'in UTF-8 at byte '.$i, 681 E_USER_WARNING 682 ); 683 return false; 684 685 } 686 687 } else { 688 689 // When mState is non-zero, we expect a continuation of the multi-octet 690 // sequence 691 if (0x80 == (0xC0 & ($in))) { 692 693 // Legal continuation. 694 $shift = ($mState - 1) * 6; 695 $tmp = $in; 696 $tmp = ($tmp & 0x0000003F) << $shift; 697 $mUcs4 |= $tmp; 698 699 /** 700 * End of the multi-octet sequence. mUcs4 now contains the final 701 * Unicode codepoint to be output 702 */ 703 if (0 == --$mState) { 704 705 /* 706 * Check for illegal sequences and codepoints. 707 */ 708 // From Unicode 3.1, non-shortest form is illegal 709 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 710 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 711 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 712 (4 < $mBytes) || 713 // From Unicode 3.2, surrogate characters are illegal 714 (($mUcs4 & 0xFFFFF800) == 0xD800) || 715 // Codepoints outside the Unicode range are illegal 716 ($mUcs4 > 0x10FFFF)) { 717 718 if($strict){ 719 trigger_error( 720 'utf8_to_unicode: Illegal sequence or codepoint '. 721 'in UTF-8 at byte '.$i, 722 E_USER_WARNING 723 ); 724 725 return false; 726 } 727 728 } 729 730 if (0xFEFF != $mUcs4) { 731 // BOM is legal but we don't want to output it 732 $out[] = $mUcs4; 733 } 734 735 //initialize UTF8 cache 736 $mState = 0; 737 $mUcs4 = 0; 738 $mBytes = 1; 739 } 740 741 } elseif($strict) { 742 /** 743 *((0xC0 & (*in) != 0x80) && (mState != 0)) 744 * Incomplete multi-octet sequence. 745 */ 746 trigger_error( 747 'utf8_to_unicode: Incomplete multi-octet '. 748 ' sequence in UTF-8 at byte '.$i, 749 E_USER_WARNING 750 ); 751 752 return false; 753 } 754 } 755 } 756 return $out; 757 } 758} 759 760if(!function_exists('unicode_to_utf8')){ 761 /** 762 * Takes an array of ints representing the Unicode characters and returns 763 * a UTF-8 string. Astral planes are supported ie. the ints in the 764 * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 765 * are not allowed. 766 * 767 * If $strict is set to true the function returns false if the input 768 * array contains ints that represent surrogates or are outside the 769 * Unicode range and raises a PHP error at level E_USER_WARNING 770 * 771 * Note: this function has been modified slightly in this library to use 772 * output buffering to concatenate the UTF-8 string (faster) as well as 773 * reference the array by it's keys 774 * 775 * @param array of unicode code points representing a string 776 * @param boolean Check for invalid sequences? 777 * @return mixed UTF-8 string or false if array contains invalid code points 778 * @author <hsivonen@iki.fi> 779 * @author Harry Fuecks <hfuecks@gmail.com> 780 * @see utf8_to_unicode 781 * @link http://hsivonen.iki.fi/php-utf8/ 782 * @link http://sourceforge.net/projects/phputf8/ 783 */ 784 function unicode_to_utf8($arr,$strict=false) { 785 if (!is_array($arr)) return ''; 786 ob_start(); 787 788 foreach (array_keys($arr) as $k) { 789 790 # ASCII range (including control chars) 791 if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { 792 793 echo chr($arr[$k]); 794 795 # 2 byte sequence 796 } else if ($arr[$k] <= 0x07ff) { 797 798 echo chr(0xc0 | ($arr[$k] >> 6)); 799 echo chr(0x80 | ($arr[$k] & 0x003f)); 800 801 # Byte order mark (skip) 802 } else if($arr[$k] == 0xFEFF) { 803 804 // nop -- zap the BOM 805 806 # Test for illegal surrogates 807 } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { 808 809 // found a surrogate 810 if($strict){ 811 trigger_error( 812 'unicode_to_utf8: Illegal surrogate '. 813 'at index: '.$k.', value: '.$arr[$k], 814 E_USER_WARNING 815 ); 816 return false; 817 } 818 819 # 3 byte sequence 820 } else if ($arr[$k] <= 0xffff) { 821 822 echo chr(0xe0 | ($arr[$k] >> 12)); 823 echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); 824 echo chr(0x80 | ($arr[$k] & 0x003f)); 825 826 # 4 byte sequence 827 } else if ($arr[$k] <= 0x10ffff) { 828 829 echo chr(0xf0 | ($arr[$k] >> 18)); 830 echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); 831 echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); 832 echo chr(0x80 | ($arr[$k] & 0x3f)); 833 834 } elseif($strict) { 835 836 trigger_error( 837 'unicode_to_utf8: Codepoint out of Unicode range '. 838 'at index: '.$k.', value: '.$arr[$k], 839 E_USER_WARNING 840 ); 841 842 // out of range 843 return false; 844 } 845 } 846 847 $result = ob_get_contents(); 848 ob_end_clean(); 849 return $result; 850 } 851} 852 853if(!function_exists('utf8_to_utf16be')){ 854 /** 855 * UTF-8 to UTF-16BE conversion. 856 * 857 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 858 */ 859 function utf8_to_utf16be(&$str, $bom = false) { 860 $out = $bom ? "\xFE\xFF" : ''; 861 if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 862 863 $uni = utf8_to_unicode($str); 864 foreach($uni as $cp){ 865 $out .= pack('n',$cp); 866 } 867 return $out; 868 } 869} 870 871if(!function_exists('utf16be_to_utf8')){ 872 /** 873 * UTF-8 to UTF-16BE conversion. 874 * 875 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 876 */ 877 function utf16be_to_utf8(&$str) { 878 $uni = unpack('n*',$str); 879 return unicode_to_utf8($uni); 880 } 881} 882 883if(!function_exists('utf8_bad_replace')){ 884 /** 885 * Replace bad bytes with an alternative character 886 * 887 * ASCII character is recommended for replacement char 888 * 889 * PCRE Pattern to locate bad bytes in a UTF-8 string 890 * Comes from W3 FAQ: Multilingual Forms 891 * Note: modified to include full ASCII range including control chars 892 * 893 * @author Harry Fuecks <hfuecks@gmail.com> 894 * @see http://www.w3.org/International/questions/qa-forms-utf-8 895 * @param string to search 896 * @param string to replace bad bytes with (defaults to '?') - use ASCII 897 * @return string 898 */ 899 function utf8_bad_replace($str, $replace = '') { 900 $UTF8_BAD = 901 '([\x00-\x7F]'. # ASCII (including control chars) 902 '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte 903 '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs 904 '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte 905 '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates 906 '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 907 '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 908 '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 909 '|(.{1}))'; # invalid byte 910 ob_start(); 911 while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { 912 if ( !isset($matches[2])) { 913 echo $matches[0]; 914 } else { 915 echo $replace; 916 } 917 $str = substr($str,strlen($matches[0])); 918 } 919 $result = ob_get_contents(); 920 ob_end_clean(); 921 return $result; 922 } 923} 924 925if(!function_exists('utf8_correctIdx')){ 926 /** 927 * adjust a byte index into a utf8 string to a utf8 character boundary 928 * 929 * @param $str string utf8 character string 930 * @param $i int byte index into $str 931 * @param $next bool direction to search for boundary, 932 * false = up (current character) 933 * true = down (next character) 934 * 935 * @return int byte index into $str now pointing to a utf8 character boundary 936 * 937 * @author chris smith <chris@jalakai.co.uk> 938 */ 939 function utf8_correctIdx(&$str,$i,$next=false) { 940 941 if ($i <= 0) return 0; 942 943 $limit = strlen($str); 944 if ($i>=$limit) return $limit; 945 946 if ($next) { 947 while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; 948 } else { 949 while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; 950 } 951 952 return $i; 953 } 954} 955 956// only needed if no mb_string available 957if(!UTF8_MBSTRING){ 958 /** 959 * UTF-8 Case lookup table 960 * 961 * This lookuptable defines the upper case letters to their correspponding 962 * lower case letter in UTF-8 963 * 964 * @author Andreas Gohr <andi@splitbrain.org> 965 */ 966 global $UTF8_LOWER_TO_UPPER; 967 if(empty($UTF8_LOWER_TO_UPPER)) $UTF8_LOWER_TO_UPPER = array( 968 "z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T","s"=>"S","r"=>"R","q"=>"Q", 969 "p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J","i"=>"I","h"=>"H","g"=>"G", 970 "f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A","ῳ"=>"ῼ","ῥ"=>"Ῥ","ῡ"=>"Ῡ","ῑ"=>"Ῑ", 971 "ῐ"=>"Ῐ","ῃ"=>"ῌ","ι"=>"Ι","ᾳ"=>"ᾼ","ᾱ"=>"Ᾱ","ᾰ"=>"Ᾰ","ᾧ"=>"ᾯ","ᾦ"=>"ᾮ","ᾥ"=>"ᾭ","ᾤ"=>"ᾬ", 972 "ᾣ"=>"ᾫ","ᾢ"=>"ᾪ","ᾡ"=>"ᾩ","ᾗ"=>"ᾟ","ᾖ"=>"ᾞ","ᾕ"=>"ᾝ","ᾔ"=>"ᾜ","ᾓ"=>"ᾛ","ᾒ"=>"ᾚ","ᾑ"=>"ᾙ", 973 "ᾐ"=>"ᾘ","ᾇ"=>"ᾏ","ᾆ"=>"ᾎ","ᾅ"=>"ᾍ","ᾄ"=>"ᾌ","ᾃ"=>"ᾋ","ᾂ"=>"ᾊ","ᾁ"=>"ᾉ","ᾀ"=>"ᾈ","ώ"=>"Ώ", 974 "ὼ"=>"Ὼ","ύ"=>"Ύ","ὺ"=>"Ὺ","ό"=>"Ό","ὸ"=>"Ὸ","ί"=>"Ί","ὶ"=>"Ὶ","ή"=>"Ή","ὴ"=>"Ὴ","έ"=>"Έ", 975 "ὲ"=>"Ὲ","ά"=>"Ά","ὰ"=>"Ὰ","ὧ"=>"Ὧ","ὦ"=>"Ὦ","ὥ"=>"Ὥ","ὤ"=>"Ὤ","ὣ"=>"Ὣ","ὢ"=>"Ὢ","ὡ"=>"Ὡ", 976 "ὗ"=>"Ὗ","ὕ"=>"Ὕ","ὓ"=>"Ὓ","ὑ"=>"Ὑ","ὅ"=>"Ὅ","ὄ"=>"Ὄ","ὃ"=>"Ὃ","ὂ"=>"Ὂ","ὁ"=>"Ὁ","ὀ"=>"Ὀ", 977 "ἷ"=>"Ἷ","ἶ"=>"Ἶ","ἵ"=>"Ἵ","ἴ"=>"Ἴ","ἳ"=>"Ἳ","ἲ"=>"Ἲ","ἱ"=>"Ἱ","ἰ"=>"Ἰ","ἧ"=>"Ἧ","ἦ"=>"Ἦ", 978 "ἥ"=>"Ἥ","ἤ"=>"Ἤ","ἣ"=>"Ἣ","ἢ"=>"Ἢ","ἡ"=>"Ἡ","ἕ"=>"Ἕ","ἔ"=>"Ἔ","ἓ"=>"Ἓ","ἒ"=>"Ἒ","ἑ"=>"Ἑ", 979 "ἐ"=>"Ἐ","ἇ"=>"Ἇ","ἆ"=>"Ἆ","ἅ"=>"Ἅ","ἄ"=>"Ἄ","ἃ"=>"Ἃ","ἂ"=>"Ἂ","ἁ"=>"Ἁ","ἀ"=>"Ἀ","ỹ"=>"Ỹ", 980 "ỷ"=>"Ỷ","ỵ"=>"Ỵ","ỳ"=>"Ỳ","ự"=>"Ự","ữ"=>"Ữ","ử"=>"Ử","ừ"=>"Ừ","ứ"=>"Ứ","ủ"=>"Ủ","ụ"=>"Ụ", 981 "ợ"=>"Ợ","ỡ"=>"Ỡ","ở"=>"Ở","ờ"=>"Ờ","ớ"=>"Ớ","ộ"=>"Ộ","ỗ"=>"Ỗ","ổ"=>"Ổ","ồ"=>"Ồ","ố"=>"Ố", 982 "ỏ"=>"Ỏ","ọ"=>"Ọ","ị"=>"Ị","ỉ"=>"Ỉ","ệ"=>"Ệ","ễ"=>"Ễ","ể"=>"Ể","ề"=>"Ề","ế"=>"Ế","ẽ"=>"Ẽ", 983 "ẻ"=>"Ẻ","ẹ"=>"Ẹ","ặ"=>"Ặ","ẵ"=>"Ẵ","ẳ"=>"Ẳ","ằ"=>"Ằ","ắ"=>"Ắ","ậ"=>"Ậ","ẫ"=>"Ẫ","ẩ"=>"Ẩ", 984 "ầ"=>"Ầ","ấ"=>"Ấ","ả"=>"Ả","ạ"=>"Ạ","ẛ"=>"Ṡ","ẕ"=>"Ẕ","ẓ"=>"Ẓ","ẑ"=>"Ẑ","ẏ"=>"Ẏ","ẍ"=>"Ẍ", 985 "ẋ"=>"Ẋ","ẉ"=>"Ẉ","ẇ"=>"Ẇ","ẅ"=>"Ẅ","ẃ"=>"Ẃ","ẁ"=>"Ẁ","ṿ"=>"Ṿ","ṽ"=>"Ṽ","ṻ"=>"Ṻ","ṹ"=>"Ṹ", 986 "ṷ"=>"Ṷ","ṵ"=>"Ṵ","ṳ"=>"Ṳ","ṱ"=>"Ṱ","ṯ"=>"Ṯ","ṭ"=>"Ṭ","ṫ"=>"Ṫ","ṩ"=>"Ṩ","ṧ"=>"Ṧ","ṥ"=>"Ṥ", 987 "ṣ"=>"Ṣ","ṡ"=>"Ṡ","ṟ"=>"Ṟ","ṝ"=>"Ṝ","ṛ"=>"Ṛ","ṙ"=>"Ṙ","ṗ"=>"Ṗ","ṕ"=>"Ṕ","ṓ"=>"Ṓ","ṑ"=>"Ṑ", 988 "ṏ"=>"Ṏ","ṍ"=>"Ṍ","ṋ"=>"Ṋ","ṉ"=>"Ṉ","ṇ"=>"Ṇ","ṅ"=>"Ṅ","ṃ"=>"Ṃ","ṁ"=>"Ṁ","ḿ"=>"Ḿ","ḽ"=>"Ḽ", 989 "ḻ"=>"Ḻ","ḹ"=>"Ḹ","ḷ"=>"Ḷ","ḵ"=>"Ḵ","ḳ"=>"Ḳ","ḱ"=>"Ḱ","ḯ"=>"Ḯ","ḭ"=>"Ḭ","ḫ"=>"Ḫ","ḩ"=>"Ḩ", 990 "ḧ"=>"Ḧ","ḥ"=>"Ḥ","ḣ"=>"Ḣ","ḡ"=>"Ḡ","ḟ"=>"Ḟ","ḝ"=>"Ḝ","ḛ"=>"Ḛ","ḙ"=>"Ḙ","ḗ"=>"Ḗ","ḕ"=>"Ḕ", 991 "ḓ"=>"Ḓ","ḑ"=>"Ḑ","ḏ"=>"Ḏ","ḍ"=>"Ḍ","ḋ"=>"Ḋ","ḉ"=>"Ḉ","ḇ"=>"Ḇ","ḅ"=>"Ḅ","ḃ"=>"Ḃ","ḁ"=>"Ḁ", 992 "ֆ"=>"Ֆ","օ"=>"Օ","ք"=>"Ք","փ"=>"Փ","ւ"=>"Ւ","ց"=>"Ց","ր"=>"Ր","տ"=>"Տ","վ"=>"Վ","ս"=>"Ս", 993 "ռ"=>"Ռ","ջ"=>"Ջ","պ"=>"Պ","չ"=>"Չ","ո"=>"Ո","շ"=>"Շ","ն"=>"Ն","յ"=>"Յ","մ"=>"Մ","ճ"=>"Ճ", 994 "ղ"=>"Ղ","ձ"=>"Ձ","հ"=>"Հ","կ"=>"Կ","ծ"=>"Ծ","խ"=>"Խ","լ"=>"Լ","ի"=>"Ի","ժ"=>"Ժ","թ"=>"Թ", 995 "ը"=>"Ը","է"=>"Է","զ"=>"Զ","ե"=>"Ե","դ"=>"Դ","գ"=>"Գ","բ"=>"Բ","ա"=>"Ա","ԏ"=>"Ԏ","ԍ"=>"Ԍ", 996 "ԋ"=>"Ԋ","ԉ"=>"Ԉ","ԇ"=>"Ԇ","ԅ"=>"Ԅ","ԃ"=>"Ԃ","ԁ"=>"Ԁ","ӹ"=>"Ӹ","ӵ"=>"Ӵ","ӳ"=>"Ӳ","ӱ"=>"Ӱ", 997 "ӯ"=>"Ӯ","ӭ"=>"Ӭ","ӫ"=>"Ӫ","ө"=>"Ө","ӧ"=>"Ӧ","ӥ"=>"Ӥ","ӣ"=>"Ӣ","ӡ"=>"Ӡ","ӟ"=>"Ӟ","ӝ"=>"Ӝ", 998 "ӛ"=>"Ӛ","ә"=>"Ә","ӗ"=>"Ӗ","ӕ"=>"Ӕ","ӓ"=>"Ӓ","ӑ"=>"Ӑ","ӎ"=>"Ӎ","ӌ"=>"Ӌ","ӊ"=>"Ӊ","ӈ"=>"Ӈ", 999 "ӆ"=>"Ӆ","ӄ"=>"Ӄ","ӂ"=>"Ӂ","ҿ"=>"Ҿ","ҽ"=>"Ҽ","һ"=>"Һ","ҹ"=>"Ҹ","ҷ"=>"Ҷ","ҵ"=>"Ҵ","ҳ"=>"Ҳ", 1000 "ұ"=>"Ұ","ү"=>"Ү","ҭ"=>"Ҭ","ҫ"=>"Ҫ","ҩ"=>"Ҩ","ҧ"=>"Ҧ","ҥ"=>"Ҥ","ң"=>"Ң","ҡ"=>"Ҡ","ҟ"=>"Ҟ", 1001 "ҝ"=>"Ҝ","қ"=>"Қ","ҙ"=>"Ҙ","җ"=>"Җ","ҕ"=>"Ҕ","ғ"=>"Ғ","ґ"=>"Ґ","ҏ"=>"Ҏ","ҍ"=>"Ҍ","ҋ"=>"Ҋ", 1002 "ҁ"=>"Ҁ","ѿ"=>"Ѿ","ѽ"=>"Ѽ","ѻ"=>"Ѻ","ѹ"=>"Ѹ","ѷ"=>"Ѷ","ѵ"=>"Ѵ","ѳ"=>"Ѳ","ѱ"=>"Ѱ","ѯ"=>"Ѯ", 1003 "ѭ"=>"Ѭ","ѫ"=>"Ѫ","ѩ"=>"Ѩ","ѧ"=>"Ѧ","ѥ"=>"Ѥ","ѣ"=>"Ѣ","ѡ"=>"Ѡ","џ"=>"Џ","ў"=>"Ў","ѝ"=>"Ѝ", 1004 "ќ"=>"Ќ","ћ"=>"Ћ","њ"=>"Њ","љ"=>"Љ","ј"=>"Ј","ї"=>"Ї","і"=>"І","ѕ"=>"Ѕ","є"=>"Є","ѓ"=>"Ѓ", 1005 "ђ"=>"Ђ","ё"=>"Ё","ѐ"=>"Ѐ","я"=>"Я","ю"=>"Ю","э"=>"Э","ь"=>"Ь","ы"=>"Ы","ъ"=>"Ъ","щ"=>"Щ", 1006 "ш"=>"Ш","ч"=>"Ч","ц"=>"Ц","х"=>"Х","ф"=>"Ф","у"=>"У","т"=>"Т","с"=>"С","р"=>"Р","п"=>"П", 1007 "о"=>"О","н"=>"Н","м"=>"М","л"=>"Л","к"=>"К","й"=>"Й","и"=>"И","з"=>"З","ж"=>"Ж","е"=>"Е", 1008 "д"=>"Д","г"=>"Г","в"=>"В","б"=>"Б","а"=>"А","ϵ"=>"Ε","ϲ"=>"Σ","ϱ"=>"Ρ","ϰ"=>"Κ","ϯ"=>"Ϯ", 1009 "ϭ"=>"Ϭ","ϫ"=>"Ϫ","ϩ"=>"Ϩ","ϧ"=>"Ϧ","ϥ"=>"Ϥ","ϣ"=>"Ϣ","ϡ"=>"Ϡ","ϟ"=>"Ϟ","ϝ"=>"Ϝ","ϛ"=>"Ϛ", 1010 "ϙ"=>"Ϙ","ϖ"=>"Π","ϕ"=>"Φ","ϑ"=>"Θ","ϐ"=>"Β","ώ"=>"Ώ","ύ"=>"Ύ","ό"=>"Ό","ϋ"=>"Ϋ","ϊ"=>"Ϊ", 1011 "ω"=>"Ω","ψ"=>"Ψ","χ"=>"Χ","φ"=>"Φ","υ"=>"Υ","τ"=>"Τ","σ"=>"Σ","ς"=>"Σ","ρ"=>"Ρ","π"=>"Π", 1012 "ο"=>"Ο","ξ"=>"Ξ","ν"=>"Ν","μ"=>"Μ","λ"=>"Λ","κ"=>"Κ","ι"=>"Ι","θ"=>"Θ","η"=>"Η","ζ"=>"Ζ", 1013 "ε"=>"Ε","δ"=>"Δ","γ"=>"Γ","β"=>"Β","α"=>"Α","ί"=>"Ί","ή"=>"Ή","έ"=>"Έ","ά"=>"Ά","ʒ"=>"Ʒ", 1014 "ʋ"=>"Ʋ","ʊ"=>"Ʊ","ʈ"=>"Ʈ","ʃ"=>"Ʃ","ʀ"=>"Ʀ","ɵ"=>"Ɵ","ɲ"=>"Ɲ","ɯ"=>"Ɯ","ɩ"=>"Ɩ","ɨ"=>"Ɨ", 1015 "ɣ"=>"Ɣ","ɛ"=>"Ɛ","ə"=>"Ə","ɗ"=>"Ɗ","ɖ"=>"Ɖ","ɔ"=>"Ɔ","ɓ"=>"Ɓ","ȳ"=>"Ȳ","ȱ"=>"Ȱ","ȯ"=>"Ȯ", 1016 "ȭ"=>"Ȭ","ȫ"=>"Ȫ","ȩ"=>"Ȩ","ȧ"=>"Ȧ","ȥ"=>"Ȥ","ȣ"=>"Ȣ","ȟ"=>"Ȟ","ȝ"=>"Ȝ","ț"=>"Ț","ș"=>"Ș", 1017 "ȗ"=>"Ȗ","ȕ"=>"Ȕ","ȓ"=>"Ȓ","ȑ"=>"Ȑ","ȏ"=>"Ȏ","ȍ"=>"Ȍ","ȋ"=>"Ȋ","ȉ"=>"Ȉ","ȇ"=>"Ȇ","ȅ"=>"Ȅ", 1018 "ȃ"=>"Ȃ","ȁ"=>"Ȁ","ǿ"=>"Ǿ","ǽ"=>"Ǽ","ǻ"=>"Ǻ","ǹ"=>"Ǹ","ǵ"=>"Ǵ","dz"=>"Dz","ǯ"=>"Ǯ","ǭ"=>"Ǭ", 1019 "ǫ"=>"Ǫ","ǩ"=>"Ǩ","ǧ"=>"Ǧ","ǥ"=>"Ǥ","ǣ"=>"Ǣ","ǡ"=>"Ǡ","ǟ"=>"Ǟ","ǝ"=>"Ǝ","ǜ"=>"Ǜ","ǚ"=>"Ǚ", 1020 "ǘ"=>"Ǘ","ǖ"=>"Ǖ","ǔ"=>"Ǔ","ǒ"=>"Ǒ","ǐ"=>"Ǐ","ǎ"=>"Ǎ","nj"=>"Nj","lj"=>"Lj","dž"=>"Dž","ƿ"=>"Ƿ", 1021 "ƽ"=>"Ƽ","ƹ"=>"Ƹ","ƶ"=>"Ƶ","ƴ"=>"Ƴ","ư"=>"Ư","ƭ"=>"Ƭ","ƨ"=>"Ƨ","ƥ"=>"Ƥ","ƣ"=>"Ƣ","ơ"=>"Ơ", 1022 "ƞ"=>"Ƞ","ƙ"=>"Ƙ","ƕ"=>"Ƕ","ƒ"=>"Ƒ","ƌ"=>"Ƌ","ƈ"=>"Ƈ","ƅ"=>"Ƅ","ƃ"=>"Ƃ","ſ"=>"S","ž"=>"Ž", 1023 "ż"=>"Ż","ź"=>"Ź","ŷ"=>"Ŷ","ŵ"=>"Ŵ","ų"=>"Ų","ű"=>"Ű","ů"=>"Ů","ŭ"=>"Ŭ","ū"=>"Ū","ũ"=>"Ũ", 1024 "ŧ"=>"Ŧ","ť"=>"Ť","ţ"=>"Ţ","š"=>"Š","ş"=>"Ş","ŝ"=>"Ŝ","ś"=>"Ś","ř"=>"Ř","ŗ"=>"Ŗ","ŕ"=>"Ŕ", 1025 "œ"=>"Œ","ő"=>"Ő","ŏ"=>"Ŏ","ō"=>"Ō","ŋ"=>"Ŋ","ň"=>"Ň","ņ"=>"Ņ","ń"=>"Ń","ł"=>"Ł","ŀ"=>"Ŀ", 1026 "ľ"=>"Ľ","ļ"=>"Ļ","ĺ"=>"Ĺ","ķ"=>"Ķ","ĵ"=>"Ĵ","ij"=>"IJ","ı"=>"I","į"=>"Į","ĭ"=>"Ĭ","ī"=>"Ī", 1027 "ĩ"=>"Ĩ","ħ"=>"Ħ","ĥ"=>"Ĥ","ģ"=>"Ģ","ġ"=>"Ġ","ğ"=>"Ğ","ĝ"=>"Ĝ","ě"=>"Ě","ę"=>"Ę","ė"=>"Ė", 1028 "ĕ"=>"Ĕ","ē"=>"Ē","đ"=>"Đ","ď"=>"Ď","č"=>"Č","ċ"=>"Ċ","ĉ"=>"Ĉ","ć"=>"Ć","ą"=>"Ą","ă"=>"Ă", 1029 "ā"=>"Ā","ÿ"=>"Ÿ","þ"=>"Þ","ý"=>"Ý","ü"=>"Ü","û"=>"Û","ú"=>"Ú","ù"=>"Ù","ø"=>"Ø","ö"=>"Ö", 1030 "õ"=>"Õ","ô"=>"Ô","ó"=>"Ó","ò"=>"Ò","ñ"=>"Ñ","ð"=>"Ð","ï"=>"Ï","î"=>"Î","í"=>"Í","ì"=>"Ì", 1031 "ë"=>"Ë","ê"=>"Ê","é"=>"É","è"=>"È","ç"=>"Ç","æ"=>"Æ","å"=>"Å","ä"=>"Ä","ã"=>"Ã","â"=>"Â", 1032 "á"=>"Á","à"=>"À","µ"=>"Μ","z"=>"Z","y"=>"Y","x"=>"X","w"=>"W","v"=>"V","u"=>"U","t"=>"T", 1033 "s"=>"S","r"=>"R","q"=>"Q","p"=>"P","o"=>"O","n"=>"N","m"=>"M","l"=>"L","k"=>"K","j"=>"J", 1034 "i"=>"I","h"=>"H","g"=>"G","f"=>"F","e"=>"E","d"=>"D","c"=>"C","b"=>"B","a"=>"A" 1035 ); 1036 1037 /** 1038 * UTF-8 Case lookup table 1039 * 1040 * This lookuptable defines the lower case letters to their correspponding 1041 * upper case letter in UTF-8 1042 * 1043 * @author Andreas Gohr <andi@splitbrain.org> 1044 */ 1045 global $UTF8_UPPER_TO_LOWER; 1046 if(empty($UTF8_UPPER_TO_LOWER)) $UTF8_UPPER_TO_LOWER = array ( 1047 "Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t","S"=>"s","R"=>"r","Q"=>"q", 1048 "P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j","I"=>"i","H"=>"h","G"=>"g", 1049 "F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a","ῼ"=>"ῳ","Ῥ"=>"ῥ","Ῡ"=>"ῡ","Ῑ"=>"ῑ", 1050 "Ῐ"=>"ῐ","ῌ"=>"ῃ","Ι"=>"ι","ᾼ"=>"ᾳ","Ᾱ"=>"ᾱ","Ᾰ"=>"ᾰ","ᾯ"=>"ᾧ","ᾮ"=>"ᾦ","ᾭ"=>"ᾥ","ᾬ"=>"ᾤ", 1051 "ᾫ"=>"ᾣ","ᾪ"=>"ᾢ","ᾩ"=>"ᾡ","ᾟ"=>"ᾗ","ᾞ"=>"ᾖ","ᾝ"=>"ᾕ","ᾜ"=>"ᾔ","ᾛ"=>"ᾓ","ᾚ"=>"ᾒ","ᾙ"=>"ᾑ", 1052 "ᾘ"=>"ᾐ","ᾏ"=>"ᾇ","ᾎ"=>"ᾆ","ᾍ"=>"ᾅ","ᾌ"=>"ᾄ","ᾋ"=>"ᾃ","ᾊ"=>"ᾂ","ᾉ"=>"ᾁ","ᾈ"=>"ᾀ","Ώ"=>"ώ", 1053 "Ὼ"=>"ὼ","Ύ"=>"ύ","Ὺ"=>"ὺ","Ό"=>"ό","Ὸ"=>"ὸ","Ί"=>"ί","Ὶ"=>"ὶ","Ή"=>"ή","Ὴ"=>"ὴ","Έ"=>"έ", 1054 "Ὲ"=>"ὲ","Ά"=>"ά","Ὰ"=>"ὰ","Ὧ"=>"ὧ","Ὦ"=>"ὦ","Ὥ"=>"ὥ","Ὤ"=>"ὤ","Ὣ"=>"ὣ","Ὢ"=>"ὢ","Ὡ"=>"ὡ", 1055 "Ὗ"=>"ὗ","Ὕ"=>"ὕ","Ὓ"=>"ὓ","Ὑ"=>"ὑ","Ὅ"=>"ὅ","Ὄ"=>"ὄ","Ὃ"=>"ὃ","Ὂ"=>"ὂ","Ὁ"=>"ὁ","Ὀ"=>"ὀ", 1056 "Ἷ"=>"ἷ","Ἶ"=>"ἶ","Ἵ"=>"ἵ","Ἴ"=>"ἴ","Ἳ"=>"ἳ","Ἲ"=>"ἲ","Ἱ"=>"ἱ","Ἰ"=>"ἰ","Ἧ"=>"ἧ","Ἦ"=>"ἦ", 1057 "Ἥ"=>"ἥ","Ἤ"=>"ἤ","Ἣ"=>"ἣ","Ἢ"=>"ἢ","Ἡ"=>"ἡ","Ἕ"=>"ἕ","Ἔ"=>"ἔ","Ἓ"=>"ἓ","Ἒ"=>"ἒ","Ἑ"=>"ἑ", 1058 "Ἐ"=>"ἐ","Ἇ"=>"ἇ","Ἆ"=>"ἆ","Ἅ"=>"ἅ","Ἄ"=>"ἄ","Ἃ"=>"ἃ","Ἂ"=>"ἂ","Ἁ"=>"ἁ","Ἀ"=>"ἀ","Ỹ"=>"ỹ", 1059 "Ỷ"=>"ỷ","Ỵ"=>"ỵ","Ỳ"=>"ỳ","Ự"=>"ự","Ữ"=>"ữ","Ử"=>"ử","Ừ"=>"ừ","Ứ"=>"ứ","Ủ"=>"ủ","Ụ"=>"ụ", 1060 "Ợ"=>"ợ","Ỡ"=>"ỡ","Ở"=>"ở","Ờ"=>"ờ","Ớ"=>"ớ","Ộ"=>"ộ","Ỗ"=>"ỗ","Ổ"=>"ổ","Ồ"=>"ồ","Ố"=>"ố", 1061 "Ỏ"=>"ỏ","Ọ"=>"ọ","Ị"=>"ị","Ỉ"=>"ỉ","Ệ"=>"ệ","Ễ"=>"ễ","Ể"=>"ể","Ề"=>"ề","Ế"=>"ế","Ẽ"=>"ẽ", 1062 "Ẻ"=>"ẻ","Ẹ"=>"ẹ","Ặ"=>"ặ","Ẵ"=>"ẵ","Ẳ"=>"ẳ","Ằ"=>"ằ","Ắ"=>"ắ","Ậ"=>"ậ","Ẫ"=>"ẫ","Ẩ"=>"ẩ", 1063 "Ầ"=>"ầ","Ấ"=>"ấ","Ả"=>"ả","Ạ"=>"ạ","Ṡ"=>"ẛ","Ẕ"=>"ẕ","Ẓ"=>"ẓ","Ẑ"=>"ẑ","Ẏ"=>"ẏ","Ẍ"=>"ẍ", 1064 "Ẋ"=>"ẋ","Ẉ"=>"ẉ","Ẇ"=>"ẇ","Ẅ"=>"ẅ","Ẃ"=>"ẃ","Ẁ"=>"ẁ","Ṿ"=>"ṿ","Ṽ"=>"ṽ","Ṻ"=>"ṻ","Ṹ"=>"ṹ", 1065 "Ṷ"=>"ṷ","Ṵ"=>"ṵ","Ṳ"=>"ṳ","Ṱ"=>"ṱ","Ṯ"=>"ṯ","Ṭ"=>"ṭ","Ṫ"=>"ṫ","Ṩ"=>"ṩ","Ṧ"=>"ṧ","Ṥ"=>"ṥ", 1066 "Ṣ"=>"ṣ","Ṡ"=>"ṡ","Ṟ"=>"ṟ","Ṝ"=>"ṝ","Ṛ"=>"ṛ","Ṙ"=>"ṙ","Ṗ"=>"ṗ","Ṕ"=>"ṕ","Ṓ"=>"ṓ","Ṑ"=>"ṑ", 1067 "Ṏ"=>"ṏ","Ṍ"=>"ṍ","Ṋ"=>"ṋ","Ṉ"=>"ṉ","Ṇ"=>"ṇ","Ṅ"=>"ṅ","Ṃ"=>"ṃ","Ṁ"=>"ṁ","Ḿ"=>"ḿ","Ḽ"=>"ḽ", 1068 "Ḻ"=>"ḻ","Ḹ"=>"ḹ","Ḷ"=>"ḷ","Ḵ"=>"ḵ","Ḳ"=>"ḳ","Ḱ"=>"ḱ","Ḯ"=>"ḯ","Ḭ"=>"ḭ","Ḫ"=>"ḫ","Ḩ"=>"ḩ", 1069 "Ḧ"=>"ḧ","Ḥ"=>"ḥ","Ḣ"=>"ḣ","Ḡ"=>"ḡ","Ḟ"=>"ḟ","Ḝ"=>"ḝ","Ḛ"=>"ḛ","Ḙ"=>"ḙ","Ḗ"=>"ḗ","Ḕ"=>"ḕ", 1070 "Ḓ"=>"ḓ","Ḑ"=>"ḑ","Ḏ"=>"ḏ","Ḍ"=>"ḍ","Ḋ"=>"ḋ","Ḉ"=>"ḉ","Ḇ"=>"ḇ","Ḅ"=>"ḅ","Ḃ"=>"ḃ","Ḁ"=>"ḁ", 1071 "Ֆ"=>"ֆ","Օ"=>"օ","Ք"=>"ք","Փ"=>"փ","Ւ"=>"ւ","Ց"=>"ց","Ր"=>"ր","Տ"=>"տ","Վ"=>"վ","Ս"=>"ս", 1072 "Ռ"=>"ռ","Ջ"=>"ջ","Պ"=>"պ","Չ"=>"չ","Ո"=>"ո","Շ"=>"շ","Ն"=>"ն","Յ"=>"յ","Մ"=>"մ","Ճ"=>"ճ", 1073 "Ղ"=>"ղ","Ձ"=>"ձ","Հ"=>"հ","Կ"=>"կ","Ծ"=>"ծ","Խ"=>"խ","Լ"=>"լ","Ի"=>"ի","Ժ"=>"ժ","Թ"=>"թ", 1074 "Ը"=>"ը","Է"=>"է","Զ"=>"զ","Ե"=>"ե","Դ"=>"դ","Գ"=>"գ","Բ"=>"բ","Ա"=>"ա","Ԏ"=>"ԏ","Ԍ"=>"ԍ", 1075 "Ԋ"=>"ԋ","Ԉ"=>"ԉ","Ԇ"=>"ԇ","Ԅ"=>"ԅ","Ԃ"=>"ԃ","Ԁ"=>"ԁ","Ӹ"=>"ӹ","Ӵ"=>"ӵ","Ӳ"=>"ӳ","Ӱ"=>"ӱ", 1076 "Ӯ"=>"ӯ","Ӭ"=>"ӭ","Ӫ"=>"ӫ","Ө"=>"ө","Ӧ"=>"ӧ","Ӥ"=>"ӥ","Ӣ"=>"ӣ","Ӡ"=>"ӡ","Ӟ"=>"ӟ","Ӝ"=>"ӝ", 1077 "Ӛ"=>"ӛ","Ә"=>"ә","Ӗ"=>"ӗ","Ӕ"=>"ӕ","Ӓ"=>"ӓ","Ӑ"=>"ӑ","Ӎ"=>"ӎ","Ӌ"=>"ӌ","Ӊ"=>"ӊ","Ӈ"=>"ӈ", 1078 "Ӆ"=>"ӆ","Ӄ"=>"ӄ","Ӂ"=>"ӂ","Ҿ"=>"ҿ","Ҽ"=>"ҽ","Һ"=>"һ","Ҹ"=>"ҹ","Ҷ"=>"ҷ","Ҵ"=>"ҵ","Ҳ"=>"ҳ", 1079 "Ұ"=>"ұ","Ү"=>"ү","Ҭ"=>"ҭ","Ҫ"=>"ҫ","Ҩ"=>"ҩ","Ҧ"=>"ҧ","Ҥ"=>"ҥ","Ң"=>"ң","Ҡ"=>"ҡ","Ҟ"=>"ҟ", 1080 "Ҝ"=>"ҝ","Қ"=>"қ","Ҙ"=>"ҙ","Җ"=>"җ","Ҕ"=>"ҕ","Ғ"=>"ғ","Ґ"=>"ґ","Ҏ"=>"ҏ","Ҍ"=>"ҍ","Ҋ"=>"ҋ", 1081 "Ҁ"=>"ҁ","Ѿ"=>"ѿ","Ѽ"=>"ѽ","Ѻ"=>"ѻ","Ѹ"=>"ѹ","Ѷ"=>"ѷ","Ѵ"=>"ѵ","Ѳ"=>"ѳ","Ѱ"=>"ѱ","Ѯ"=>"ѯ", 1082 "Ѭ"=>"ѭ","Ѫ"=>"ѫ","Ѩ"=>"ѩ","Ѧ"=>"ѧ","Ѥ"=>"ѥ","Ѣ"=>"ѣ","Ѡ"=>"ѡ","Џ"=>"џ","Ў"=>"ў","Ѝ"=>"ѝ", 1083 "Ќ"=>"ќ","Ћ"=>"ћ","Њ"=>"њ","Љ"=>"љ","Ј"=>"ј","Ї"=>"ї","І"=>"і","Ѕ"=>"ѕ","Є"=>"є","Ѓ"=>"ѓ", 1084 "Ђ"=>"ђ","Ё"=>"ё","Ѐ"=>"ѐ","Я"=>"я","Ю"=>"ю","Э"=>"э","Ь"=>"ь","Ы"=>"ы","Ъ"=>"ъ","Щ"=>"щ", 1085 "Ш"=>"ш","Ч"=>"ч","Ц"=>"ц","Х"=>"х","Ф"=>"ф","У"=>"у","Т"=>"т","С"=>"с","Р"=>"р","П"=>"п", 1086 "О"=>"о","Н"=>"н","М"=>"м","Л"=>"л","К"=>"к","Й"=>"й","И"=>"и","З"=>"з","Ж"=>"ж","Е"=>"е", 1087 "Д"=>"д","Г"=>"г","В"=>"в","Б"=>"б","А"=>"а","Ε"=>"ϵ","Σ"=>"ϲ","Ρ"=>"ϱ","Κ"=>"ϰ","Ϯ"=>"ϯ", 1088 "Ϭ"=>"ϭ","Ϫ"=>"ϫ","Ϩ"=>"ϩ","Ϧ"=>"ϧ","Ϥ"=>"ϥ","Ϣ"=>"ϣ","Ϡ"=>"ϡ","Ϟ"=>"ϟ","Ϝ"=>"ϝ","Ϛ"=>"ϛ", 1089 "Ϙ"=>"ϙ","Π"=>"ϖ","Φ"=>"ϕ","Θ"=>"ϑ","Β"=>"ϐ","Ώ"=>"ώ","Ύ"=>"ύ","Ό"=>"ό","Ϋ"=>"ϋ","Ϊ"=>"ϊ", 1090 "Ω"=>"ω","Ψ"=>"ψ","Χ"=>"χ","Φ"=>"φ","Υ"=>"υ","Τ"=>"τ","Σ"=>"σ","Σ"=>"ς","Ρ"=>"ρ","Π"=>"π", 1091 "Ο"=>"ο","Ξ"=>"ξ","Ν"=>"ν","Μ"=>"μ","Λ"=>"λ","Κ"=>"κ","Ι"=>"ι","Θ"=>"θ","Η"=>"η","Ζ"=>"ζ", 1092 "Ε"=>"ε","Δ"=>"δ","Γ"=>"γ","Β"=>"β","Α"=>"α","Ί"=>"ί","Ή"=>"ή","Έ"=>"έ","Ά"=>"ά","Ʒ"=>"ʒ", 1093 "Ʋ"=>"ʋ","Ʊ"=>"ʊ","Ʈ"=>"ʈ","Ʃ"=>"ʃ","Ʀ"=>"ʀ","Ɵ"=>"ɵ","Ɲ"=>"ɲ","Ɯ"=>"ɯ","Ɩ"=>"ɩ","Ɨ"=>"ɨ", 1094 "Ɣ"=>"ɣ","Ɛ"=>"ɛ","Ə"=>"ə","Ɗ"=>"ɗ","Ɖ"=>"ɖ","Ɔ"=>"ɔ","Ɓ"=>"ɓ","Ȳ"=>"ȳ","Ȱ"=>"ȱ","Ȯ"=>"ȯ", 1095 "Ȭ"=>"ȭ","Ȫ"=>"ȫ","Ȩ"=>"ȩ","Ȧ"=>"ȧ","Ȥ"=>"ȥ","Ȣ"=>"ȣ","Ȟ"=>"ȟ","Ȝ"=>"ȝ","Ț"=>"ț","Ș"=>"ș", 1096 "Ȗ"=>"ȗ","Ȕ"=>"ȕ","Ȓ"=>"ȓ","Ȑ"=>"ȑ","Ȏ"=>"ȏ","Ȍ"=>"ȍ","Ȋ"=>"ȋ","Ȉ"=>"ȉ","Ȇ"=>"ȇ","Ȅ"=>"ȅ", 1097 "Ȃ"=>"ȃ","Ȁ"=>"ȁ","Ǿ"=>"ǿ","Ǽ"=>"ǽ","Ǻ"=>"ǻ","Ǹ"=>"ǹ","Ǵ"=>"ǵ","Dz"=>"dz","Ǯ"=>"ǯ","Ǭ"=>"ǭ", 1098 "Ǫ"=>"ǫ","Ǩ"=>"ǩ","Ǧ"=>"ǧ","Ǥ"=>"ǥ","Ǣ"=>"ǣ","Ǡ"=>"ǡ","Ǟ"=>"ǟ","Ǝ"=>"ǝ","Ǜ"=>"ǜ","Ǚ"=>"ǚ", 1099 "Ǘ"=>"ǘ","Ǖ"=>"ǖ","Ǔ"=>"ǔ","Ǒ"=>"ǒ","Ǐ"=>"ǐ","Ǎ"=>"ǎ","Nj"=>"nj","Lj"=>"lj","Dž"=>"dž","Ƿ"=>"ƿ", 1100 "Ƽ"=>"ƽ","Ƹ"=>"ƹ","Ƶ"=>"ƶ","Ƴ"=>"ƴ","Ư"=>"ư","Ƭ"=>"ƭ","Ƨ"=>"ƨ","Ƥ"=>"ƥ","Ƣ"=>"ƣ","Ơ"=>"ơ", 1101 "Ƞ"=>"ƞ","Ƙ"=>"ƙ","Ƕ"=>"ƕ","Ƒ"=>"ƒ","Ƌ"=>"ƌ","Ƈ"=>"ƈ","Ƅ"=>"ƅ","Ƃ"=>"ƃ","S"=>"ſ","Ž"=>"ž", 1102 "Ż"=>"ż","Ź"=>"ź","Ŷ"=>"ŷ","Ŵ"=>"ŵ","Ų"=>"ų","Ű"=>"ű","Ů"=>"ů","Ŭ"=>"ŭ","Ū"=>"ū","Ũ"=>"ũ", 1103 "Ŧ"=>"ŧ","Ť"=>"ť","Ţ"=>"ţ","Š"=>"š","Ş"=>"ş","Ŝ"=>"ŝ","Ś"=>"ś","Ř"=>"ř","Ŗ"=>"ŗ","Ŕ"=>"ŕ", 1104 "Œ"=>"œ","Ő"=>"ő","Ŏ"=>"ŏ","Ō"=>"ō","Ŋ"=>"ŋ","Ň"=>"ň","Ņ"=>"ņ","Ń"=>"ń","Ł"=>"ł","Ŀ"=>"ŀ", 1105 "Ľ"=>"ľ","Ļ"=>"ļ","Ĺ"=>"ĺ","Ķ"=>"ķ","Ĵ"=>"ĵ","IJ"=>"ij","I"=>"ı","Į"=>"į","Ĭ"=>"ĭ","Ī"=>"ī", 1106 "Ĩ"=>"ĩ","Ħ"=>"ħ","Ĥ"=>"ĥ","Ģ"=>"ģ","Ġ"=>"ġ","Ğ"=>"ğ","Ĝ"=>"ĝ","Ě"=>"ě","Ę"=>"ę","Ė"=>"ė", 1107 "Ĕ"=>"ĕ","Ē"=>"ē","Đ"=>"đ","Ď"=>"ď","Č"=>"č","Ċ"=>"ċ","Ĉ"=>"ĉ","Ć"=>"ć","Ą"=>"ą","Ă"=>"ă", 1108 "Ā"=>"ā","Ÿ"=>"ÿ","Þ"=>"þ","Ý"=>"ý","Ü"=>"ü","Û"=>"û","Ú"=>"ú","Ù"=>"ù","Ø"=>"ø","Ö"=>"ö", 1109 "Õ"=>"õ","Ô"=>"ô","Ó"=>"ó","Ò"=>"ò","Ñ"=>"ñ","Ð"=>"ð","Ï"=>"ï","Î"=>"î","Í"=>"í","Ì"=>"ì", 1110 "Ë"=>"ë","Ê"=>"ê","É"=>"é","È"=>"è","Ç"=>"ç","Æ"=>"æ","Å"=>"å","Ä"=>"ä","Ã"=>"ã","Â"=>"â", 1111 "Á"=>"á","À"=>"à","Μ"=>"µ","Z"=>"z","Y"=>"y","X"=>"x","W"=>"w","V"=>"v","U"=>"u","T"=>"t", 1112 "S"=>"s","R"=>"r","Q"=>"q","P"=>"p","O"=>"o","N"=>"n","M"=>"m","L"=>"l","K"=>"k","J"=>"j", 1113 "I"=>"i","H"=>"h","G"=>"g","F"=>"f","E"=>"e","D"=>"d","C"=>"c","B"=>"b","A"=>"a" 1114 ); 1115}; // end of case lookup tables 1116 1117/** 1118 * UTF-8 lookup table for lower case accented letters 1119 * 1120 * This lookuptable defines replacements for accented characters from the ASCII-7 1121 * range. This are lower case letters only. 1122 * 1123 * @author Andreas Gohr <andi@splitbrain.org> 1124 * @see utf8_deaccent() 1125 */ 1126global $UTF8_LOWER_ACCENTS; 1127if(empty($UTF8_LOWER_ACCENTS)) $UTF8_LOWER_ACCENTS = array( 1128 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 1129 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 1130 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 1131 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 1132 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 1133 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 1134 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 1135 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 1136 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 1137 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 1138 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 1139 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 1140 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 1141 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 1142 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', 1143); 1144 1145/** 1146 * UTF-8 lookup table for upper case accented letters 1147 * 1148 * This lookuptable defines replacements for accented characters from the ASCII-7 1149 * range. This are upper case letters only. 1150 * 1151 * @author Andreas Gohr <andi@splitbrain.org> 1152 * @see utf8_deaccent() 1153 */ 1154global $UTF8_UPPER_ACCENTS; 1155if(empty($UTF8_UPPER_ACCENTS)) $UTF8_UPPER_ACCENTS = array( 1156 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', 1157 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 1158 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', 1159 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', 1160 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', 1161 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', 1162 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', 1163 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', 1164 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 1165 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', 1166 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', 1167 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 1168 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 1169 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', 1170 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', 1171); 1172 1173/** 1174 * UTF-8 array of common special characters 1175 * 1176 * This array should contain all special characters (not a letter or digit) 1177 * defined in the various local charsets - it's not a complete list of non-alphanum 1178 * characters in UTF-8. It's not perfect but should match most cases of special 1179 * chars. 1180 * 1181 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 1182 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 1183 * 1184 * @author Andreas Gohr <andi@splitbrain.org> 1185 * @see utf8_stripspecials() 1186 */ 1187global $UTF8_SPECIAL_CHARS; 1188if(empty($UTF8_SPECIAL_CHARS)) $UTF8_SPECIAL_CHARS = array( 1189 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 1190 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 1191 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 1192 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 1193 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 1194 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 1195 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 1196 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 1197 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 1198 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 1199 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 1200 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 1201 0x0385, 0x0387, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 1202 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 1203 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 1204 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 1205 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 1206 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 1207 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 1208 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 1209 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 1210 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 1211 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 1212 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 1213 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 1214 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 1215 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 1216 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 1217 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 1218 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 1219 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 1220 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 1221 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 1222 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 1223 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 1224 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 1225 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 1226 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 1227 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 1228 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 1229 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 1230 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 1231 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 1232 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 1233 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 1234 0x27be, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x300c, 1235 0x300d, 0x300e, 0x300f, 0x3010, 0x3011, 0x3012, 0x3014, 0x3015, 0x3016, 0x3017, 1236 0x3018, 0x3019, 0x301a, 0x301b, 0x3036, 1237 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 1238 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 1239 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 1240 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 1241 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 1242 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 1243 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f, 0xff1a, 0xff1b, 0xff1c, 1244 0xff1d, 0xff1e, 0xff1f, 0xff20, 0xff3b, 0xff3c, 0xff3d, 0xff3e, 0xff40, 0xff5b, 1245 0xff5c, 0xff5d, 0xff5e, 0xff5f, 0xff60, 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 1246 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6, 0xffe8, 0xffe9, 0xffea, 1247 0xffeb, 0xffec, 0xffed, 0xffee, 1248 0x01d6fc, 0x01d6fd, 0x01d6fe, 0x01d6ff, 0x01d700, 0x01d701, 0x01d702, 0x01d703, 1249 0x01d704, 0x01d705, 0x01d706, 0x01d707, 0x01d708, 0x01d709, 0x01d70a, 0x01d70b, 1250 0x01d70c, 0x01d70d, 0x01d70e, 0x01d70f, 0x01d710, 0x01d711, 0x01d712, 0x01d713, 1251 0x01d714, 0x01d715, 0x01d716, 0x01d717, 0x01d718, 0x01d719, 0x01d71a, 0x01d71b, 1252 0xc2a0, 0xe28087, 0xe280af, 0xe281a0, 0xefbbbf, 1253); 1254 1255// utf8 version of above data 1256global $UTF8_SPECIAL_CHARS2; 1257if(empty($UTF8_SPECIAL_CHARS2)) $UTF8_SPECIAL_CHARS2 = 1258 "\x1A".' !"#$%&\'()+,/;<=>?@[\]^`{|}~ �'. 1259 '� ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½�'. 1260 '�¿×÷ˇ˘˙˚˛˜˝̣̀́̃̉΄΅·ϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. 1261 '�ׁׂ׃׳״،؛؟ـًٌٍَُِّْ٪฿–—―‗‘’‚“”�'. 1262 '��†‡•…‰′″‹›⁄₧₪₫€№℘™Ωℵ←↑→↓↔↕↵'. 1263 '⇐⇑⇒⇓⇔∀∂∃∅∆∇∈∉∋∏∑−∕∗∙√∝∞∠∧∨�'. 1264 '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌐⌠⌡〈〉⑩─�'. 1265 '��┌┐└┘├┤┬┴┼═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠'. 1266 '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌▐░▒▓■▲▼◆◊●�'. 1267 '�★☎☛☞♠♣♥♦✁✂✃✄✆✇✈✉✌✍✎✏✐✑✒✓✔✕�'. 1268 '��✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✩✪✫✬✭✮✯✰✱'. 1269 '✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋�'. 1270 '�❏❐❑❒❖❘❙❚❛❜❝❞❡❢❣❤❥❦❧❿➉➓➔➘➙➚�'. 1271 '��➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶'. 1272 '➷➸➹➺➻➼➽➾'. 1273 ' 、。〃〈〉《》「」『』【】〒〔〕〖〗〘〙〚〛〶'. 1274 '�'. 1275 '�ﹼﹽ'. 1276 '!"#$%&'()*+,-./:;<=>?@[\]^`{|}~'. 1277 '⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○'. 1278 ''. 1279 ' '; 1280 1281/** 1282 * Romanization lookup table 1283 * 1284 * This lookup tables provides a way to transform strings written in a language 1285 * different from the ones based upon latin letters into plain ASCII. 1286 * 1287 * Please note: this is not a scientific transliteration table. It only works 1288 * oneway from nonlatin to ASCII and it works by simple character replacement 1289 * only. Specialities of each language are not supported. 1290 * 1291 * @author Andreas Gohr <andi@splitbrain.org> 1292 * @author Vitaly Blokhin <vitinfo@vitn.com> 1293 * @link http://www.uconv.com/translit.htm 1294 * @author Bisqwit <bisqwit@iki.fi> 1295 * @link http://kanjidict.stc.cx/hiragana.php?src=2 1296 * @link http://www.translatum.gr/converter/greek-transliteration.htm 1297 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 1298 * @link http://www.btranslations.com/resources/romanization/korean.asp 1299 * @author Arthit Suriyawongkul <arthit@gmail.com> 1300 * @author Denis Scheither <amorphis@uni-bremen.de> 1301 */ 1302global $UTF8_ROMANIZATION; 1303if(empty($UTF8_ROMANIZATION)) $UTF8_ROMANIZATION = array( 1304 // scandinavian - differs from what we do in deaccent 1305 'å'=>'a','Å'=>'A','ä'=>'a','Ä'=>'A','ö'=>'o','Ö'=>'O', 1306 1307 //russian cyrillic 1308 'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G', 1309 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh', 1310 'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K', 1311 'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Н'=>'N','о'=>'o','О'=>'O', 1312 'п'=>'p','П'=>'P','р'=>'r','Р'=>'R','с'=>'s','С'=>'S','т'=>'t','Т'=>'T', 1313 'у'=>'u','У'=>'U','ф'=>'f','Ф'=>'F','х'=>'x','Х'=>'X','ц'=>'c','Ц'=>'C', 1314 'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'sch','Щ'=>'Sch','ъ'=>'', 1315 'Ъ'=>'','ы'=>'y','Ы'=>'Y','ь'=>'','Ь'=>'','э'=>'eh','Э'=>'Eh','ю'=>'ju', 1316 'Ю'=>'Ju','я'=>'ja','Я'=>'Ja', 1317 // Ukrainian cyrillic 1318 'Ґ'=>'Gh','ґ'=>'gh','Є'=>'Je','є'=>'je','І'=>'I','і'=>'i','Ї'=>'Ji','ї'=>'ji', 1319 // Georgian 1320 'ა'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th', 1321 'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','ო'=>'o','პ'=>'p','ჟ'=>'zh', 1322 'რ'=>'r','ს'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ქ'=>'kh','ღ'=>'gh','ყ'=>'q', 1323 'შ'=>'sh','ჩ'=>'ch','ც'=>'c','ძ'=>'dh','წ'=>'w','ჭ'=>'j','ხ'=>'x','ჯ'=>'jh', 1324 'ჰ'=>'xh', 1325 //Sanskrit 1326 'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry', 1327 'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','ए'=>'e','ऐ'=>'ay','ओ'=>'o','औ'=>'aw', 1328 'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh', 1329 'च'=>'c','छ'=>'ch','ज'=>'j','झ'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh', 1330 'ड'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','थ'=>'th','द'=>'d','ध'=>'dh', 1331 'न'=>'n','प'=>'p','फ'=>'ph','ब'=>'b','भ'=>'bh','म'=>'m','य'=>'z','र'=>'r', 1332 'ल'=>'l','व'=>'v','श'=>'sh','ष'=>'sqh','स'=>'s','ह'=>'x', 1333 //Hebrew 1334 'א'=>'a', 'ב'=>'b','ג'=>'g','ד'=>'d','ה'=>'h','ו'=>'v','ז'=>'z','ח'=>'kh','ט'=>'th', 1335 'י'=>'y','ך'=>'h','כ'=>'k','ל'=>'l','ם'=>'m','מ'=>'m','ן'=>'n','נ'=>'n', 1336 'ס'=>'s','ע'=>'ah','ף'=>'f','פ'=>'p','ץ'=>'c','צ'=>'c','ק'=>'q','ר'=>'r', 1337 'ש'=>'sh','ת'=>'t', 1338 //Arabic 1339 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'g','ح'=>'xh','خ'=>'x','د'=>'d', 1340 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s\'','ض'=>'d\'', 1341 'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k', 1342 'ل'=>'l','م'=>'m','ن'=>'n','ه'=>'x\'','و'=>'u','ي'=>'i', 1343 1344 // Japanese characters (last update: 2008-05-09) 1345 1346 // Japanese hiragana 1347 1348 // 3 character syllables, っ doubles the consonant after 1349 'っちゃ'=>'ccha','っちぇ'=>'cche','っちょ'=>'ccho','っちゅ'=>'cchu', 1350 'っびゃ'=>'bbya','っびぇ'=>'bbye','っびぃ'=>'bbyi','っびょ'=>'bbyo','っびゅ'=>'bbyu', 1351 'っぴゃ'=>'ppya','っぴぇ'=>'ppye','っぴぃ'=>'ppyi','っぴょ'=>'ppyo','っぴゅ'=>'ppyu', 1352 'っちゃ'=>'ccha','っちぇ'=>'cche','っち'=>'cchi','っちょ'=>'ccho','っちゅ'=>'cchu', 1353 // 'っひゃ'=>'hya','っひぇ'=>'hye','っひぃ'=>'hyi','っひょ'=>'hyo','っひゅ'=>'hyu', 1354 'っきゃ'=>'kkya','っきぇ'=>'kkye','っきぃ'=>'kkyi','っきょ'=>'kkyo','っきゅ'=>'kkyu', 1355 'っぎゃ'=>'ggya','っぎぇ'=>'ggye','っぎぃ'=>'ggyi','っぎょ'=>'ggyo','っぎゅ'=>'ggyu', 1356 'っみゃ'=>'mmya','っみぇ'=>'mmye','っみぃ'=>'mmyi','っみょ'=>'mmyo','っみゅ'=>'mmyu', 1357 'っにゃ'=>'nnya','っにぇ'=>'nnye','っにぃ'=>'nnyi','っにょ'=>'nnyo','っにゅ'=>'nnyu', 1358 'っりゃ'=>'rrya','っりぇ'=>'rrye','っりぃ'=>'rryi','っりょ'=>'rryo','っりゅ'=>'rryu', 1359 'っしゃ'=>'ssha','っしぇ'=>'sshe','っし'=>'sshi','っしょ'=>'ssho','っしゅ'=>'sshu', 1360 1361 // seperate hiragana 'n' ('n' + 'i' != 'ni', normally we would write "kon'nichi wa" but the apostrophe would be converted to _ anyway) 1362 'んあ'=>'n_a','んえ'=>'n_e','んい'=>'n_i','んお'=>'n_o','んう'=>'n_u', 1363 'んや'=>'n_ya','んよ'=>'n_yo','んゆ'=>'n_yu', 1364 1365 // 2 character syllables - normal 1366 'ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo', 1367 'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu', 1368 'ひゃ'=>'hya','ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu', 1369 'びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu', 1370 'ぴゃ'=>'pya','ぴぇ'=>'pye','ぴぃ'=>'pyi','ぴょ'=>'pyo','ぴゅ'=>'pyu', 1371 'きゃ'=>'kya','きぇ'=>'kye','きぃ'=>'kyi','きょ'=>'kyo','きゅ'=>'kyu', 1372 'ぎゃ'=>'gya','ぎぇ'=>'gye','ぎぃ'=>'gyi','ぎょ'=>'gyo','ぎゅ'=>'gyu', 1373 'みゃ'=>'mya','みぇ'=>'mye','みぃ'=>'myi','みょ'=>'myo','みゅ'=>'myu', 1374 'にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu', 1375 'りゃ'=>'rya','りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu', 1376 'しゃ'=>'sha','しぇ'=>'she','し'=>'shi','しょ'=>'sho','しゅ'=>'shu', 1377 'じゃ'=>'ja','じぇ'=>'je','じょ'=>'jo','じゅ'=>'ju', 1378 'うぇ'=>'we','うぃ'=>'wi', 1379 'いぇ'=>'ye', 1380 1381 // 2 character syllables, っ doubles the consonant after 1382 'っば'=>'bba','っべ'=>'bbe','っび'=>'bbi','っぼ'=>'bbo','っぶ'=>'bbu', 1383 'っぱ'=>'ppa','っぺ'=>'ppe','っぴ'=>'ppi','っぽ'=>'ppo','っぷ'=>'ppu', 1384 'った'=>'tta','って'=>'tte','っち'=>'cchi','っと'=>'tto','っつ'=>'ttsu', 1385 'っだ'=>'dda','っで'=>'dde','っぢ'=>'ddi','っど'=>'ddo','っづ'=>'ddu', 1386 'っが'=>'gga','っげ'=>'gge','っぎ'=>'ggi','っご'=>'ggo','っぐ'=>'ggu', 1387 'っか'=>'kka','っけ'=>'kke','っき'=>'kki','っこ'=>'kko','っく'=>'kku', 1388 'っま'=>'mma','っめ'=>'mme','っみ'=>'mmi','っも'=>'mmo','っむ'=>'mmu', 1389 'っな'=>'nna','っね'=>'nne','っに'=>'nni','っの'=>'nno','っぬ'=>'nnu', 1390 'っら'=>'rra','っれ'=>'rre','っり'=>'rri','っろ'=>'rro','っる'=>'rru', 1391 'っさ'=>'ssa','っせ'=>'sse','っし'=>'sshi','っそ'=>'sso','っす'=>'ssu', 1392 'っざ'=>'zza','っぜ'=>'zze','っじ'=>'jji','っぞ'=>'zzo','っず'=>'zzu', 1393 1394 // 1 character syllabels 1395 'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ん'=>'n', 1396 'は'=>'ha','へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'fu', 1397 'ば'=>'ba','べ'=>'be','び'=>'bi','ぼ'=>'bo','ぶ'=>'bu', 1398 'ぱ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po','ぷ'=>'pu', 1399 'た'=>'ta','て'=>'te','ち'=>'chi','と'=>'to','つ'=>'tsu', 1400 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1401 'が'=>'ga','げ'=>'ge','ぎ'=>'gi','ご'=>'go','ぐ'=>'gu', 1402 'か'=>'ka','け'=>'ke','き'=>'ki','こ'=>'ko','く'=>'ku', 1403 'ま'=>'ma','め'=>'me','み'=>'mi','も'=>'mo','む'=>'mu', 1404 'な'=>'na','ね'=>'ne','に'=>'ni','の'=>'no','ぬ'=>'nu', 1405 'ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru', 1406 'さ'=>'sa','せ'=>'se','し'=>'shi','そ'=>'so','す'=>'su', 1407 'わ'=>'wa','を'=>'wo', 1408 'ざ'=>'za','ぜ'=>'ze','じ'=>'ji','ぞ'=>'zo','ず'=>'zu', 1409 'や'=>'ya','よ'=>'yo','ゆ'=>'yu', 1410 // old characters 1411 'ゑ'=>'we','ゐ'=>'wi', 1412 1413 // convert what's left (probably only kicks in when something's missing above) 1414 // 'ぁ'=>'a','ぇ'=>'e','ぃ'=>'i','ぉ'=>'o','ぅ'=>'u', 1415 // 'ゃ'=>'ya','ょ'=>'yo','ゅ'=>'yu', 1416 1417 // never seen one of those (disabled for the moment) 1418 // 'ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo','ヴ'=>'vu', 1419 // 'でゃ'=>'dha','でぇ'=>'dhe','でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu', 1420 // 'どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi','どぉ'=>'dwo','どぅ'=>'dwu', 1421 // 'ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo','ぢゅ'=>'dyu', 1422 // 'ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo','ふぅ'=>'fwu', 1423 // 'ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu', 1424 // 'すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi','すぉ'=>'swo','すぅ'=>'swu', 1425 // 'てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu', 1426 // 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu', 1427 // 'とぁ'=>'twa','とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu', 1428 // 'ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi','ヴょ'=>'vyo','ヴゅ'=>'vyu', 1429 // 'うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who','うぅ'=>'whu', 1430 // 'じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi','じょ'=>'zho','じゅ'=>'zhu', 1431 // 'じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo','じゅ'=>'zyu', 1432 1433 // 'spare' characters from other romanization systems 1434 // 'だ'=>'da','で'=>'de','ぢ'=>'di','ど'=>'do','づ'=>'du', 1435 // 'ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu', 1436 // 'さ'=>'sa','せ'=>'se','し'=>'si','そ'=>'so','す'=>'su', 1437 // 'ちゃ'=>'cya','ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu', 1438 //'じゃ'=>'jya','じぇ'=>'jye','じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu', 1439 //'りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo','りゅ'=>'lyu', 1440 //'しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo','しゅ'=>'syu', 1441 //'ちゃ'=>'tya','ちぇ'=>'tye','ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu', 1442 //'し'=>'ci',,い'=>'yi','ぢ'=>'dzi', 1443 //'っじゃ'=>'jja','っじぇ'=>'jje','っじ'=>'jji','っじょ'=>'jjo','っじゅ'=>'jju', 1444 1445 1446 // Japanese katakana 1447 1448 // 4 character syllables: ッ doubles the consonant after, ー doubles the vowel before (usualy written with macron, but we don't want that in our URLs) 1449 'ッビャー'=>'bbyaa','ッビェー'=>'bbyee','ッビィー'=>'bbyii','ッビョー'=>'bbyoo','ッビュー'=>'bbyuu', 1450 'ッピャー'=>'ppyaa','ッピェー'=>'ppyee','ッピィー'=>'ppyii','ッピョー'=>'ppyoo','ッピュー'=>'ppyuu', 1451 'ッキャー'=>'kkyaa','ッキェー'=>'kkyee','ッキィー'=>'kkyii','ッキョー'=>'kkyoo','ッキュー'=>'kkyuu', 1452 'ッギャー'=>'ggyaa','ッギェー'=>'ggyee','ッギィー'=>'ggyii','ッギョー'=>'ggyoo','ッギュー'=>'ggyuu', 1453 'ッミャー'=>'mmyaa','ッミェー'=>'mmyee','ッミィー'=>'mmyii','ッミョー'=>'mmyoo','ッミュー'=>'mmyuu', 1454 'ッニャー'=>'nnyaa','ッニェー'=>'nnyee','ッニィー'=>'nnyii','ッニョー'=>'nnyoo','ッニュー'=>'nnyuu', 1455 'ッリャー'=>'rryaa','ッリェー'=>'rryee','ッリィー'=>'rryii','ッリョー'=>'rryoo','ッリュー'=>'rryuu', 1456 'ッシャー'=>'sshaa','ッシェー'=>'sshee','ッシー'=>'sshii','ッショー'=>'sshoo','ッシュー'=>'sshuu', 1457 'ッチャー'=>'cchaa','ッチェー'=>'cchee','ッチー'=>'cchii','ッチョー'=>'cchoo','ッチュー'=>'cchuu', 1458 'ッティー'=>'ttii', 1459 'ッヂィー'=>'ddii', 1460 1461 // 3 character syllables - doubled vowels 1462 'ファー'=>'faa','フェー'=>'fee','フィー'=>'fii','フォー'=>'foo', 1463 'フャー'=>'fyaa','フェー'=>'fyee','フィー'=>'fyii','フョー'=>'fyoo','フュー'=>'fyuu', 1464 'ヒャー'=>'hyaa','ヒェー'=>'hyee','ヒィー'=>'hyii','ヒョー'=>'hyoo','ヒュー'=>'hyuu', 1465 'ビャー'=>'byaa','ビェー'=>'byee','ビィー'=>'byii','ビョー'=>'byoo','ビュー'=>'byuu', 1466 'ピャー'=>'pyaa','ピェー'=>'pyee','ピィー'=>'pyii','ピョー'=>'pyoo','ピュー'=>'pyuu', 1467 'キャー'=>'kyaa','キェー'=>'kyee','キィー'=>'kyii','キョー'=>'kyoo','キュー'=>'kyuu', 1468 'ギャー'=>'gyaa','ギェー'=>'gyee','ギィー'=>'gyii','ギョー'=>'gyoo','ギュー'=>'gyuu', 1469 'ミャー'=>'myaa','ミェー'=>'myee','ミィー'=>'myii','ミョー'=>'myoo','ミュー'=>'myuu', 1470 'ニャー'=>'nyaa','ニェー'=>'nyee','ニィー'=>'nyii','ニョー'=>'nyoo','ニュー'=>'nyuu', 1471 'リャー'=>'ryaa','リェー'=>'ryee','リィー'=>'ryii','リョー'=>'ryoo','リュー'=>'ryuu', 1472 'シャー'=>'shaa','シェー'=>'shee','シー'=>'shii','ショー'=>'shoo','シュー'=>'shuu', 1473 'ジャー'=>'jaa','ジェー'=>'jee','ジー'=>'jii','ジョー'=>'joo','ジュー'=>'juu', 1474 'スァー'=>'swaa','スェー'=>'swee','スィー'=>'swii','スォー'=>'swoo','スゥー'=>'swuu', 1475 'デァー'=>'daa','デェー'=>'dee','ディー'=>'dii','デォー'=>'doo','デゥー'=>'duu', 1476 'チャー'=>'chaa','チェー'=>'chee','チー'=>'chii','チョー'=>'choo','チュー'=>'chuu', 1477 'ヂャー'=>'dyaa','ヂェー'=>'dyee','ヂィー'=>'dyii','ヂョー'=>'dyoo','ヂュー'=>'dyuu', 1478 'ツャー'=>'tsaa','ツェー'=>'tsee','ツィー'=>'tsii','ツョー'=>'tsoo','ツー'=>'tsuu', 1479 'トァー'=>'twaa','トェー'=>'twee','トィー'=>'twii','トォー'=>'twoo','トゥー'=>'twuu', 1480 'ドァー'=>'dwaa','ドェー'=>'dwee','ドィー'=>'dwii','ドォー'=>'dwoo','ドゥー'=>'dwuu', 1481 'ウァー'=>'whaa','ウェー'=>'whee','ウィー'=>'whii','ウォー'=>'whoo','ウゥー'=>'whuu', 1482 'ヴャー'=>'vyaa','ヴェー'=>'vyee','ヴィー'=>'vyii','ヴョー'=>'vyoo','ヴュー'=>'vyuu', 1483 'ヴァー'=>'vaa','ヴェー'=>'vee','ヴィー'=>'vii','ヴォー'=>'voo','ヴー'=>'vuu', 1484 'ウェー'=>'wee','ウィー'=>'wii', 1485 'イェー'=>'yee', 1486 'ティー'=>'tii', 1487 'ヂィー'=>'dii', 1488 1489 // 3 character syllables - doubled consonants 1490 'ッビャ'=>'bbya','ッビェ'=>'bbye','ッビィ'=>'bbyi','ッビョ'=>'bbyo','ッビュ'=>'bbyu', 1491 'ッピャ'=>'ppya','ッピェ'=>'ppye','ッピィ'=>'ppyi','ッピョ'=>'ppyo','ッピュ'=>'ppyu', 1492 'ッキャ'=>'kkya','ッキェ'=>'kkye','ッキィ'=>'kkyi','ッキョ'=>'kkyo','ッキュ'=>'kkyu', 1493 'ッギャ'=>'ggya','ッギェ'=>'ggye','ッギィ'=>'ggyi','ッギョ'=>'ggyo','ッギュ'=>'ggyu', 1494 'ッミャ'=>'mmya','ッミェ'=>'mmye','ッミィ'=>'mmyi','ッミョ'=>'mmyo','ッミュ'=>'mmyu', 1495 'ッニャ'=>'nnya','ッニェ'=>'nnye','ッニィ'=>'nnyi','ッニョ'=>'nnyo','ッニュ'=>'nnyu', 1496 'ッリャ'=>'rrya','ッリェ'=>'rrye','ッリィ'=>'rryi','ッリョ'=>'rryo','ッリュ'=>'rryu', 1497 'ッシャ'=>'ssha','ッシェ'=>'sshe','ッシ'=>'sshi','ッショ'=>'ssho','ッシュ'=>'sshu', 1498 'ッチャ'=>'ccha','ッチェ'=>'cche','ッチ'=>'cchi','ッチョ'=>'ccho','ッチュ'=>'cchu', 1499 'ッティ'=>'tti', 1500 'ッヂィ'=>'ddi', 1501 1502 // 3 character syllables - doubled vowel and consonants 1503 'ッバー'=>'bbaa','ッベー'=>'bbee','ッビー'=>'bbii','ッボー'=>'bboo','ッブー'=>'bbuu', 1504 'ッパー'=>'ppaa','ッペー'=>'ppee','ッピー'=>'ppii','ッポー'=>'ppoo','ップー'=>'ppuu', 1505 'ッケー'=>'kkee','ッキー'=>'kkii','ッコー'=>'kkoo','ックー'=>'kkuu','ッカー'=>'kkaa', 1506 'ッガー'=>'ggaa','ッゲー'=>'ggee','ッギー'=>'ggii','ッゴー'=>'ggoo','ッグー'=>'gguu', 1507 'ッマー'=>'maa','ッメー'=>'mee','ッミー'=>'mii','ッモー'=>'moo','ッムー'=>'muu', 1508 'ッナー'=>'nnaa','ッネー'=>'nnee','ッニー'=>'nnii','ッノー'=>'nnoo','ッヌー'=>'nnuu', 1509 'ッラー'=>'rraa','ッレー'=>'rree','ッリー'=>'rrii','ッロー'=>'rroo','ッルー'=>'rruu', 1510 'ッサー'=>'ssaa','ッセー'=>'ssee','ッシー'=>'sshii','ッソー'=>'ssoo','ッスー'=>'ssuu', 1511 'ッザー'=>'zzaa','ッゼー'=>'zzee','ッジー'=>'jjii','ッゾー'=>'zzoo','ッズー'=>'zzuu', 1512 'ッター'=>'ttaa','ッテー'=>'ttee','ッチー'=>'chii','ットー'=>'ttoo','ッツー'=>'ttsuu', 1513 'ッダー'=>'ddaa','ッデー'=>'ddee','ッヂー'=>'ddii','ッドー'=>'ddoo','ッヅー'=>'dduu', 1514 1515 // 2 character syllables - normal 1516 'ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フゥ'=>'fu', 1517 // 'フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo','フュ'=>'fyu', 1518 'フャ'=>'fa','フェ'=>'fe','フィ'=>'fi','フョ'=>'fo','フュ'=>'fu', 1519 'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu', 1520 'ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo','ビュ'=>'byu', 1521 'ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu', 1522 'キャ'=>'kya','キェ'=>'kye','キィ'=>'kyi','キョ'=>'kyo','キュ'=>'kyu', 1523 'ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu', 1524 'ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo','ミュ'=>'myu', 1525 'ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo','ニュ'=>'nyu', 1526 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu', 1527 'シャ'=>'sha','シェ'=>'she','ショ'=>'sho','シュ'=>'shu', 1528 'ジャ'=>'ja','ジェ'=>'je','ジョ'=>'jo','ジュ'=>'ju', 1529 'スァ'=>'swa','スェ'=>'swe','スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu', 1530 'デァ'=>'da','デェ'=>'de','ディ'=>'di','デォ'=>'do','デゥ'=>'du', 1531 'チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu', 1532 // 'ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi','ヂョ'=>'dyo','ヂュ'=>'dyu', 1533 'ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu', 1534 'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu', 1535 'ドァ'=>'dwa','ドェ'=>'dwe','ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu', 1536 'ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi','ウォ'=>'who','ウゥ'=>'whu', 1537 'ヴャ'=>'vya','ヴェ'=>'vye','ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu', 1538 'ヴァ'=>'va','ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu', 1539 'ウェ'=>'we','ウィ'=>'wi', 1540 'イェ'=>'ye', 1541 'ティ'=>'ti', 1542 'ヂィ'=>'di', 1543 1544 // 2 character syllables - doubled vocal 1545 'アー'=>'aa','エー'=>'ee','イー'=>'ii','オー'=>'oo','ウー'=>'uu', 1546 'ダー'=>'daa','デー'=>'dee','ヂー'=>'dii','ドー'=>'doo','ヅー'=>'duu', 1547 'ハー'=>'haa','ヘー'=>'hee','ヒー'=>'hii','ホー'=>'hoo','フー'=>'fuu', 1548 'バー'=>'baa','ベー'=>'bee','ビー'=>'bii','ボー'=>'boo','ブー'=>'buu', 1549 'パー'=>'paa','ペー'=>'pee','ピー'=>'pii','ポー'=>'poo','プー'=>'puu', 1550 'ケー'=>'kee','キー'=>'kii','コー'=>'koo','クー'=>'kuu','カー'=>'kaa', 1551 'ガー'=>'gaa','ゲー'=>'gee','ギー'=>'gii','ゴー'=>'goo','グー'=>'guu', 1552 'マー'=>'maa','メー'=>'mee','ミー'=>'mii','モー'=>'moo','ムー'=>'muu', 1553 'ナー'=>'naa','ネー'=>'nee','ニー'=>'nii','ノー'=>'noo','ヌー'=>'nuu', 1554 'ラー'=>'raa','レー'=>'ree','リー'=>'rii','ロー'=>'roo','ルー'=>'ruu', 1555 'サー'=>'saa','セー'=>'see','シー'=>'shii','ソー'=>'soo','スー'=>'suu', 1556 'ザー'=>'zaa','ゼー'=>'zee','ジー'=>'jii','ゾー'=>'zoo','ズー'=>'zuu', 1557 'ター'=>'taa','テー'=>'tee','チー'=>'chii','トー'=>'too','ツー'=>'tsuu', 1558 'ワー'=>'waa','ヲー'=>'woo', 1559 'ヤー'=>'yaa','ヨー'=>'yoo','ユー'=>'yuu', 1560 'ヵー'=>'kaa','ヶー'=>'kee', 1561 // old characters 1562 'ヱー'=>'wee','ヰー'=>'wii', 1563 1564 // seperate katakana 'n' 1565 'ンア'=>'n_a','ンエ'=>'n_e','ンイ'=>'n_i','ンオ'=>'n_o','ンウ'=>'n_u', 1566 'ンヤ'=>'n_ya','ンヨ'=>'n_yo','ンユ'=>'n_yu', 1567 1568 // 2 character syllables - doubled consonants 1569 'ッバ'=>'bba','ッベ'=>'bbe','ッビ'=>'bbi','ッボ'=>'bbo','ッブ'=>'bbu', 1570 'ッパ'=>'ppa','ッペ'=>'ppe','ッピ'=>'ppi','ッポ'=>'ppo','ップ'=>'ppu', 1571 'ッケ'=>'kke','ッキ'=>'kki','ッコ'=>'kko','ック'=>'kku','ッカ'=>'kka', 1572 'ッガ'=>'gga','ッゲ'=>'gge','ッギ'=>'ggi','ッゴ'=>'ggo','ッグ'=>'ggu', 1573 'ッマ'=>'ma','ッメ'=>'me','ッミ'=>'mi','ッモ'=>'mo','ッム'=>'mu', 1574 'ッナ'=>'nna','ッネ'=>'nne','ッニ'=>'nni','ッノ'=>'nno','ッヌ'=>'nnu', 1575 'ッラ'=>'rra','ッレ'=>'rre','ッリ'=>'rri','ッロ'=>'rro','ッル'=>'rru', 1576 'ッサ'=>'ssa','ッセ'=>'sse','ッシ'=>'sshi','ッソ'=>'sso','ッス'=>'ssu', 1577 'ッザ'=>'zza','ッゼ'=>'zze','ッジ'=>'jji','ッゾ'=>'zzo','ッズ'=>'zzu', 1578 'ッタ'=>'tta','ッテ'=>'tte','ッチ'=>'cchi','ット'=>'tto','ッツ'=>'ttsu', 1579 'ッダ'=>'dda','ッデ'=>'dde','ッヂ'=>'ddi','ッド'=>'ddo','ッヅ'=>'ddu', 1580 1581 // 1 character syllables 1582 'ア'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','ン'=>'n', 1583 'ハ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho','フ'=>'fu', 1584 'バ'=>'ba','ベ'=>'be','ビ'=>'bi','ボ'=>'bo','ブ'=>'bu', 1585 'パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ポ'=>'po','プ'=>'pu', 1586 'ケ'=>'ke','キ'=>'ki','コ'=>'ko','ク'=>'ku','カ'=>'ka', 1587 'ガ'=>'ga','ゲ'=>'ge','ギ'=>'gi','ゴ'=>'go','グ'=>'gu', 1588 'マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu', 1589 'ナ'=>'na','ネ'=>'ne','ニ'=>'ni','ノ'=>'no','ヌ'=>'nu', 1590 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ロ'=>'ro','ル'=>'ru', 1591 'サ'=>'sa','セ'=>'se','シ'=>'shi','ソ'=>'so','ス'=>'su', 1592 'ザ'=>'za','ゼ'=>'ze','ジ'=>'ji','ゾ'=>'zo','ズ'=>'zu', 1593 'タ'=>'ta','テ'=>'te','チ'=>'chi','ト'=>'to','ツ'=>'tsu', 1594 'ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do','ヅ'=>'du', 1595 'ワ'=>'wa','ヲ'=>'wo', 1596 'ヤ'=>'ya','ヨ'=>'yo','ユ'=>'yu', 1597 'ヵ'=>'ka','ヶ'=>'ke', 1598 // old characters 1599 'ヱ'=>'we','ヰ'=>'wi', 1600 1601 // convert what's left (probably only kicks in when something's missing above) 1602 'ァ'=>'a','ェ'=>'e','ィ'=>'i','ォ'=>'o','ゥ'=>'u', 1603 'ャ'=>'ya','ョ'=>'yo','ュ'=>'yu', 1604 1605 // special characters 1606 '・'=>'_','、'=>'_', 1607 'ー'=>'_', // when used with hiragana (seldom), this character would not be converted otherwise 1608 1609 // 'ラ'=>'la','レ'=>'le','リ'=>'li','ロ'=>'lo','ル'=>'lu', 1610 // 'チャ'=>'cya','チェ'=>'cye','チィ'=>'cyi','チョ'=>'cyo','チュ'=>'cyu', 1611 //'デャ'=>'dha','デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu', 1612 // 'リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi','リョ'=>'lyo','リュ'=>'lyu', 1613 // 'テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho','テュ'=>'thu', 1614 //'ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi','フォ'=>'fwo','フゥ'=>'fwu', 1615 //'チャ'=>'tya','チェ'=>'tye','チィ'=>'tyi','チョ'=>'tyo','チュ'=>'tyu', 1616 // 'ジャ'=>'jya','ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu', 1617 // 'ジャ'=>'zha','ジェ'=>'zhe','ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu', 1618 //'ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi','ジョ'=>'zyo','ジュ'=>'zyu', 1619 //'シャ'=>'sya','シェ'=>'sye','シィ'=>'syi','ショ'=>'syo','シュ'=>'syu', 1620 //'シ'=>'ci','フ'=>'hu',シ'=>'si','チ'=>'ti','ツ'=>'tu','イ'=>'yi','ヂ'=>'dzi', 1621 1622 // "Greeklish" 1623 'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Π'=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps', 1624 'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','π'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps', 1625 1626 // Thai 1627 'ก'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch', 1628 'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','ญ'=>'y','ฎ'=>'d','ฏ'=>'t','ฐ'=>'th', 1629 'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th', 1630 'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','ฝ'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph', 1631 'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue', 1632 'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h', 1633 'ะ'=>'a','ั'=>'a','รร'=>'a','า'=>'a','ๅ'=>'a','ำ'=>'am','ํา'=>'am', 1634 'ิ'=>'i','ี'=>'i','ึ'=>'ue','ี'=>'ue','ุ'=>'u','ู'=>'u', 1635 'เ'=>'e','แ'=>'ae','โ'=>'o','อ'=>'o', 1636 'ียะ'=>'ia','ีย'=>'ia','ือะ'=>'uea','ือ'=>'uea','ัวะ'=>'ua','ัว'=>'ua', 1637 'ใ'=>'ai','ไ'=>'ai','ัย'=>'ai','าย'=>'ai','าว'=>'ao', 1638 'ุย'=>'ui','อย'=>'oi','ือย'=>'ueai','วย'=>'uai', 1639 'ิว'=>'io','็ว'=>'eo','ียว'=>'iao', 1640 '่'=>'','้'=>'','๊'=>'','๋'=>'','็'=>'', 1641 '์'=>'','๎'=>'','ํ'=>'','ฺ'=>'', 1642 'ๆ'=>'2','๏'=>'o','ฯ'=>'-','๚'=>'-','๛'=>'-', 1643 '๐'=>'0','๑'=>'1','๒'=>'2','๓'=>'3','๔'=>'4', 1644 '๕'=>'5','๖'=>'6','๗'=>'7','๘'=>'8','๙'=>'9', 1645 1646 // Korean 1647 'ㄱ'=>'k','ㅋ'=>'kh','ㄲ'=>'kk','ㄷ'=>'t','ㅌ'=>'th','ㄸ'=>'tt','ㅂ'=>'p', 1648 'ㅍ'=>'ph','ㅃ'=>'pp','ㅈ'=>'c','ㅊ'=>'ch','ㅉ'=>'cc','ㅅ'=>'s','ㅆ'=>'ss', 1649 'ㅎ'=>'h','ㅇ'=>'ng','ㄴ'=>'n','ㄹ'=>'l','ㅁ'=>'m', 'ㅏ'=>'a','ㅓ'=>'e','ㅗ'=>'o', 1650 'ㅜ'=>'wu','ㅡ'=>'u','ㅣ'=>'i','ㅐ'=>'ay','ㅔ'=>'ey','ㅚ'=>'oy','ㅘ'=>'wa','ㅝ'=>'we', 1651 'ㅟ'=>'wi','ㅙ'=>'way','ㅞ'=>'wey','ㅢ'=>'uy','ㅑ'=>'ya','ㅕ'=>'ye','ㅛ'=>'oy', 1652 'ㅠ'=>'yu','ㅒ'=>'yay','ㅖ'=>'yey', 1653); 1654 1655//Setup VIM: ex: et ts=2 enc=utf-8 : 1656 1657