1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license LGPL (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * URL-Encode a filename to allow unicodecharacters 11 * 12 * Slashes are not encoded 13 * 14 * When the second parameter is true the string will 15 * be encoded only if non ASCII characters are detected - 16 * This makes it safe to run it multiple times on the 17 * same string (default is true) 18 * 19 * @author Andreas Gohr <andi@splitbrain.org> 20 * @see urlencode 21 */ 22function utf8_encodeFN($file,$safe=true){ 23 if($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#',$file)){ 24 return $file; 25 } 26 $file = urlencode($file); 27 $file = str_replace('%2F','/',$file); 28 return $file; 29} 30 31/** 32 * URL-Decode a filename 33 * 34 * This is just a wrapper around urldecode 35 * 36 * @author Andreas Gohr <andi@splitbrain.org> 37 * @see urldecode 38 */ 39function utf8_decodeFN($file){ 40 $file = urldecode($file); 41 return $file; 42} 43 44/** 45 * Checks if a string contains 7bit ASCII only 46 * 47 * @author Andreas Gohr <andi@splitbrain.org> 48 */ 49function utf8_isASCII($str){ 50 for($i=0; $i<strlen($str); $i++){ 51 if(ord($str{$i}) >127) return false; 52 } 53 return true; 54} 55 56/** 57 * Strips all highbyte chars 58 * 59 * Returns a pure ASCII7 string 60 * 61 * @author Andreas Gohr <andi@splitbrain.org> 62 */ 63function utf8_strip($str){ 64 $ascii = ''; 65 for($i=0; $i<strlen($str); $i++){ 66 if(ord($str{$i}) <128){ 67 $ascii .= $str{$i}; 68 } 69 } 70 return $ascii; 71} 72 73/** 74 * Tries to detect if a string is in Unicode encoding 75 * 76 * @author <bmorel@ssi.fr> 77 * @link http://www.php.net/manual/en/function.utf8-encode.php 78 */ 79function utf8_check($Str) { 80 for ($i=0; $i<strlen($Str); $i++) { 81 if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb 82 elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb 83 elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb 84 elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb 85 elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb 86 elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b 87 else return false; # Does not match any model 88 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 89 if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80)) 90 return false; 91 } 92 } 93 return true; 94} 95 96/** 97 * Unicode aware replacement for strlen() 98 * 99 * utf8_decode() converts characters that are not in ISO-8859-1 100 * to '?', which, for the purpose of counting, is alright - It's 101 * even faster than mb_strlen. 102 * 103 * @author <chernyshevsky at hotmail dot com> 104 * @see strlen() 105 * @see utf8_decode() 106 */ 107function utf8_strlen($string){ 108 return strlen(utf8_decode($string)); 109} 110 111/** 112 * Unicode aware replacement for substr() 113 * 114 * @author lmak at NOSPAM dot iti dot gr 115 * @link http://www.php.net/manual/en/function.substr.php 116 * @see substr() 117 */ 118function utf8_substr($str,$start,$length=null){ 119 preg_match_all("/./u", $str, $ar); 120 121 if($length != null) { 122 return join("",array_slice($ar[0],$start,$length)); 123 } else { 124 return join("",array_slice($ar[0],$start)); 125 } 126} 127 128/** 129 * Unicode aware replacement for substr_replace() 130 * 131 * @author Andreas Gohr <andi@splitbrain.org> 132 * @see substr_replace() 133 */ 134function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 135 $ret = ''; 136 if($start>0) $ret .= utf8_substr($string, 0, $start); 137 $ret .= $replacement; 138 $ret .= utf8_substr($string, $start+$length); 139 return $ret; 140} 141 142/** 143 * Unicode aware replacement for explode 144 * 145 * @TODO support third limit arg 146 * @author Harry Fuecks <hfuecks@gmail.com> 147 * @see explode(); 148 */ 149function utf8_explode($sep, $str) { 150 if ( $sep == '' ) { 151 trigger_error('Empty delimiter',E_USER_WARNING); 152 return FALSE; 153 } 154 155 return preg_split('!'.preg_quote($sep,'!').'!u',$str); 156} 157 158/** 159 * Unicode aware replacement for strrepalce() 160 * 161 * @todo support PHP5 count (fourth arg) 162 * @author Harry Fuecks <hfuecks@gmail.com> 163 * @see strreplace(); 164 */ 165function utf8_str_replace($s,$r,$str){ 166 if(!is_array($s)){ 167 $s = '!'.preg_quote($s,'!').'!u'; 168 }else{ 169 foreach ($s as $k => $v) { 170 $s[$k] = '!'.preg_quote($v).'!u'; 171 } 172 } 173 return preg_replace($s,$r,$str); 174} 175 176/** 177 * Unicode aware replacement for ltrim() 178 * 179 * @author Andreas Gohr <andi@splitbrain.org> 180 * @see ltrim() 181 * @return string 182 */ 183function utf8_ltrim($str,$charlist=''){ 184 if($charlist == '') return ltrim($str); 185 186 //quote charlist for use in a characterclass 187 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 188 189 return preg_replace('/^['.$charlist.']+/u','',$str); 190} 191 192/** 193 * Unicode aware replacement for rtrim() 194 * 195 * @author Andreas Gohr <andi@splitbrain.org> 196 * @see rtrim() 197 * @return string 198 */ 199function utf8_rtrim($str,$charlist=''){ 200 if($charlist == '') return rtrim($str); 201 202 //quote charlist for use in a characterclass 203 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 204 205 return preg_replace('/['.$charlist.']+$/u','',$str); 206} 207 208/** 209 * Unicode aware replacement for trim() 210 * 211 * @author Andreas Gohr <andi@splitbrain.org> 212 * @see trim() 213 * @return string 214 */ 215function utf8_trim($str,$charlist='') { 216 if($charlist == '') return trim($str); 217 218 return utf8_ltrim(utf8_rtrim($str)); 219} 220 221 222/** 223 * This is a unicode aware replacement for strtolower() 224 * 225 * Uses mb_string extension if available 226 * 227 * @author Andreas Gohr <andi@splitbrain.org> 228 * @see strtolower() 229 * @see utf8_strtoupper() 230 */ 231function utf8_strtolower($string){ 232 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower')) 233 return mb_strtolower($string,'utf-8'); 234 235 global $UTF8_UPPER_TO_LOWER; 236 $uni = utf8_to_unicode($string); 237 $cnt = count($uni); 238 for ($i=0; $i < $cnt; $i++){ 239 if($UTF8_UPPER_TO_LOWER[$uni[$i]]){ 240 $uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]]; 241 } 242 } 243 return unicode_to_utf8($uni); 244} 245 246/** 247 * This is a unicode aware replacement for strtoupper() 248 * 249 * Uses mb_string extension if available 250 * 251 * @author Andreas Gohr <andi@splitbrain.org> 252 * @see strtoupper() 253 * @see utf8_strtoupper() 254 */ 255function utf8_strtoupper($string){ 256 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower')) 257 return mb_strtoupper($string,'utf-8'); 258 259 global $UTF8_LOWER_TO_UPPER; 260 $uni = utf8_to_unicode($string); 261 $cnt = count($uni); 262 for ($i=0; $i < $cnt; $i++){ 263 if($UTF8_LOWER_TO_UPPER[$uni[$i]]){ 264 $uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]]; 265 } 266 } 267 return unicode_to_utf8($uni); 268} 269 270/** 271 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 272 * 273 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 274 * letters. Default is to deaccent both cases ($case = 0) 275 * 276 * @author Andreas Gohr <andi@splitbrain.org> 277 */ 278function utf8_deaccent($string,$case=0){ 279 if($case <= 0){ 280 global $UTF8_LOWER_ACCENTS; 281 $string = str_replace(array_keys($UTF8_LOWER_ACCENTS),array_values($UTF8_LOWER_ACCENTS),$string); 282 } 283 if($case >= 0){ 284 global $UTF8_UPPER_ACCENTS; 285 $string = str_replace(array_keys($UTF8_UPPER_ACCENTS),array_values($UTF8_UPPER_ACCENTS),$string); 286 } 287 return $string; 288} 289 290/** 291 * Romanize a non-latin string 292 * 293 * @author Andreas Gohr <andi@splitbrain.org> 294 */ 295function utf8_romanize($string){ 296 if(utf8_isASCII($string)) return $string; //nothing to do 297 298 global $UTF8_ROMANIZATION; 299 return strtr($string,$UTF8_ROMANIZATION); 300} 301 302/** 303 * Removes special characters (nonalphanumeric) from a UTF-8 string 304 * 305 * This function adds the controlchars 0x00 to 0x19 to the array of 306 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 307 * 308 * @author Andreas Gohr <andi@splitbrain.org> 309 * @param string $string The UTF8 string to strip of special chars 310 * @param string $repl Replace special with this string 311 * @param string $additional Additional chars to strip (used in regexp char class) 312 */ 313function utf8_stripspecials($string,$repl='',$additional=''){ 314 global $UTF8_SPECIAL_CHARS; 315 316 static $specials = null; 317 if(is_null($specials)){ 318 $specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 319 } 320 321 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 322} 323 324/** 325 * This is an Unicode aware replacement for strpos 326 * 327 * Uses mb_string extension if available 328 * 329 * @author Harry Fuecks <hfuecks@gmail.com> 330 * @see strpos() 331 */ 332function utf8_strpos($haystack, $needle,$offset=0) { 333 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strpos')) 334 return mb_strpos($haystack,$needle,$offset,'utf-8'); 335 336 if(!$offset){ 337 $ar = utf8_explode($needle, $str); 338 if ( count($ar) > 1 ) { 339 return utf8_strlen($ar[0]); 340 } 341 return false; 342 }else{ 343 if ( !is_int($offset) ) { 344 trigger_error('Offset must be an integer',E_USER_WARNING); 345 return false; 346 } 347 348 $str = utf8_substr($str, $offset); 349 350 if ( false !== ($pos = utf8_strpos($str,$needle))){ 351 return $pos + $offset; 352 } 353 return false; 354 } 355} 356 357/** 358 * Encodes UTF-8 characters to HTML entities 359 * 360 * @author <vpribish at shopping dot com> 361 * @link http://www.php.net/manual/en/function.utf8-decode.php 362 */ 363function utf8_tohtml ($str) { 364 $ret = ''; 365 $max = strlen($str); 366 $last = 0; // keeps the index of the last regular character 367 for ($i=0; $i<$max; $i++) { 368 $c = $str{$i}; 369 $c1 = ord($c); 370 if ($c1>>5 == 6) { // 110x xxxx, 110 prefix for 2 bytes unicode 371 $ret .= substr($str, $last, $i-$last); // append all the regular characters we've passed 372 $c1 &= 31; // remove the 3 bit two bytes prefix 373 $c2 = ord($str{++$i}); // the next byte 374 $c2 &= 63; // remove the 2 bit trailing byte prefix 375 $c2 |= (($c1 & 3) << 6); // last 2 bits of c1 become first 2 of c2 376 $c1 >>= 2; // c1 shifts 2 to the right 377 $ret .= '&#' . ($c1 * 100 + $c2) . ';'; // this is the fastest string concatenation 378 $last = $i+1; 379 } 380 } 381 return $ret . substr($str, $last, $i); // append the last batch of regular characters 382} 383 384/** 385 * This function returns any UTF-8 encoded text as a list of 386 * Unicode values: 387 * 388 * @author Scott Michael Reynen <scott@randomchaos.com> 389 * @link http://www.randomchaos.com/document.php?source=php_and_unicode 390 * @see unicode_to_utf8() 391 */ 392function utf8_to_unicode( &$str ) { 393 $unicode = array(); 394 $values = array(); 395 $lookingFor = 1; 396 397 for ($i = 0; $i < strlen( $str ); $i++ ) { 398 $thisValue = ord( $str[ $i ] ); 399 if ( $thisValue < 128 ) $unicode[] = $thisValue; 400 else { 401 if ( count( $values ) == 0 ) $lookingFor = ( $thisValue < 224 ) ? 2 : 3; 402 $values[] = $thisValue; 403 if ( count( $values ) == $lookingFor ) { 404 $number = ( $lookingFor == 3 ) ? 405 ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ): 406 ( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 ); 407 $unicode[] = $number; 408 $values = array(); 409 $lookingFor = 1; 410 } 411 } 412 } 413 return $unicode; 414} 415 416/** 417 * This function converts a Unicode array back to its UTF-8 representation 418 * 419 * @author Scott Michael Reynen <scott@randomchaos.com> 420 * @link http://www.randomchaos.com/document.php?source=php_and_unicode 421 * @see utf8_to_unicode() 422 */ 423function unicode_to_utf8( &$str ) { 424 if (!is_array($str)) return ''; 425 426 $utf8 = ''; 427 foreach( $str as $unicode ) { 428 if ( $unicode < 128 ) { 429 $utf8.= chr( $unicode ); 430 } elseif ( $unicode < 2048 ) { 431 $utf8.= chr( 192 + ( ( $unicode - ( $unicode % 64 ) ) / 64 ) ); 432 $utf8.= chr( 128 + ( $unicode % 64 ) ); 433 } else { 434 $utf8.= chr( 224 + ( ( $unicode - ( $unicode % 4096 ) ) / 4096 ) ); 435 $utf8.= chr( 128 + ( ( ( $unicode % 4096 ) - ( $unicode % 64 ) ) / 64 ) ); 436 $utf8.= chr( 128 + ( $unicode % 64 ) ); 437 } 438 } 439 return $utf8; 440} 441 442/** 443 * UTF-8 to UTF-16BE conversion. 444 * 445 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 446 */ 447function utf8_to_utf16be(&$str, $bom = false) { 448 $out = $bom ? "\xFE\xFF" : ''; 449 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_convert_encoding')) 450 return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 451 452 $uni = utf8_to_unicode($str); 453 foreach($uni as $cp){ 454 $out .= pack('n',$cp); 455 } 456 return $out; 457} 458 459/** 460 * UTF-8 to UTF-16BE conversion. 461 * 462 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 463 */ 464function utf16be_to_utf8(&$str) { 465 $uni = unpack('n*',$str); 466 return unicode_to_utf8($uni); 467} 468 469/** 470 * UTF-8 Case lookup table 471 * 472 * This lookuptable defines the upper case letters to their correspponding 473 * lower case letter in UTF-8 474 * 475 * @author Andreas Gohr <andi@splitbrain.org> 476 */ 477static $UTF8_LOWER_TO_UPPER = array( 478 0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042, 479 0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100, 480 0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393, 481 0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C, 482 0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F, 483 0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E, 484 0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3, 485 0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A, 486 0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9, 487 0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C, 488 0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4, 489 0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164, 490 0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156, 491 0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118, 492 0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128, 493 0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428, 494 0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055, 495 0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A, 496 0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC, 497 0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0, 498 0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D, 499 0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0, 500 0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5, 501 0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA, 502 0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045, 503 0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F, 504 0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048, 505 0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6, 506 0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407, 507 0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395, 508 0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396, 509 0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051, 510 0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408, 511 0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F, 512 0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126, 513 0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C, 514 0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E, 515 0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB, 516 0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421, 517 0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A, 518 0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102, 519 0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9, 520 0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122, 521); 522 523/** 524 * UTF-8 Case lookup table 525 * 526 * This lookuptable defines the lower case letters to their correspponding 527 * upper case letter in UTF-8 (it does so by flipping $UTF8_LOWER_TO_UPPER) 528 * 529 * @author Andreas Gohr <andi@splitbrain.org> 530 */ 531$UTF8_UPPER_TO_LOWER = @array_flip($UTF8_LOWER_TO_UPPER); 532 533/** 534 * UTF-8 lookup table for lower case accented letters 535 * 536 * This lookuptable defines replacements for accented characters from the ASCII-7 537 * range. This are lower case letters only. 538 * 539 * @author Andreas Gohr <andi@splitbrain.org> 540 * @see utf8_deaccent() 541 */ 542$UTF8_LOWER_ACCENTS = array( 543 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 544 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 545 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 546 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 547 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 548 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 549 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 550 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 551 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 552 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 553 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 554 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 555 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 556 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 557 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 558); 559 560/** 561 * UTF-8 lookup table for upper case accented letters 562 * 563 * This lookuptable defines replacements for accented characters from the ASCII-7 564 * range. This are upper case letters only. 565 * 566 * @author Andreas Gohr <andi@splitbrain.org> 567 * @see utf8_deaccent() 568 */ 569$UTF8_UPPER_ACCENTS = array( 570 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', 571 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 572 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', 573 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', 574 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', 575 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', 576 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', 577 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', 578 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 579 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', 580 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', 581 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 582 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 583 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', 584 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 585); 586 587/** 588 * UTF-8 array of common special characters 589 * 590 * This array should contain all special characters (not a letter or digit) 591 * defined in the various local charsets - it's not a complete list of non-alphanum 592 * characters in UTF-8. It's not perfect but should match most cases of special 593 * chars. 594 * 595 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 596 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 597 * 598 * @author Andreas Gohr <andi@splitbrain.org> 599 * @see utf8_stripspecials() 600 */ 601$UTF8_SPECIAL_CHARS = array( 602 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 603 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 604 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 605 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 606 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 607 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 608 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 609 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 610 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 611 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 612 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 613 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 614 0x0385, 0x0387, 0x03b2, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 615 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 616 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 617 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 618 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 619 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 620 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 621 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 622 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 623 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 624 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 625 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 626 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 627 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 628 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 629 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 630 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 631 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 632 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 633 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 634 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 635 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 636 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 637 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 638 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 639 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 640 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 641 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 642 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 643 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 644 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 645 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 646 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 647 0x27be, 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 648 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 649 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 650 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 651 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 652); 653 654/** 655 * Romanization lookup table 656 * 657 * This lookup tables provides a way to transform strings written in a language 658 * different from the ones based upon latin letters into plain ASCII. 659 * 660 * Please note: this is not a scientific transliteration table. It only works 661 * oneway from nonlatin to ASCII and it works by simple character replacement 662 * only. Specialities of each language are not supported. 663 * 664 * @author Andreas Gohr <andi@splitbrain.org> 665 * @author Vitaly Blokhin <vitinfo@vitn.com> 666 * @link http://www.uconv.com/translit.htm 667 * @author Bisqwit <bisqwit@iki.fi> 668 * @link http://kanjidict.stc.cx/hiragana.php?src=2 669 * @link http://www.translatum.gr/converter/greek-transliteration.htm 670 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 671 * @link http://www.btranslations.com/resources/romanization/korean.asp 672 */ 673$UTF8_ROMANIZATION = array( 674 //russian cyrillic 675 'а'=>'a','А'=>'A','б'=>'b','Б'=>'B','в'=>'v','В'=>'V','г'=>'g','Г'=>'G', 676 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','ё'=>'jo','Ё'=>'Jo','ж'=>'zh','Ж'=>'Zh', 677 'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K', 678 'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Н'=>'N','о'=>'o','О'=>'O', 679 'п'=>'p','П'=>'P','р'=>'r','Р'=>'R','с'=>'s','С'=>'S','т'=>'t','Т'=>'T', 680 'у'=>'u','У'=>'U','ф'=>'f','Ф'=>'F','х'=>'x','Х'=>'X','ц'=>'c','Ц'=>'C', 681 'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'th','Щ'=>'Th','ъ'=>'qh', 682 'Ъ'=>'Qh','ы'=>'y','Ы'=>'Y','ь'=>'q','Ь'=>'Q','э'=>'eh','Э'=>'Eh','ю'=>'ju', 683 'Ю'=>'Ju','я'=>'ja','Я'=>'Ja', 684 // Ukrainian cyrillic 685 'Ґ'=>'Gh','ґ'=>'gh','Є'=>'Je','є'=>'je','І'=>'I','і'=>'i','Ї'=>'Ji','ї'=>'ji', 686 // Georgian 687 'ა'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th', 688 'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','ო'=>'o','პ'=>'p','ჟ'=>'zh', 689 'რ'=>'r','ს'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ქ'=>'kh','ღ'=>'gh','ყ'=>'q', 690 'შ'=>'sh','ჩ'=>'ch','ც'=>'c','ძ'=>'dh','წ'=>'w','ჭ'=>'j','ხ'=>'x','ჯ'=>'jh', 691 'ჰ'=>'xh', 692 //Sanskrit 693 'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry', 694 'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','ए'=>'e','ऐ'=>'ay','ओ'=>'o','औ'=>'aw', 695 'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh', 696 'च'=>'c','छ'=>'ch','ज'=>'j','झ'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh', 697 'ड'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','थ'=>'th','द'=>'d','ध'=>'dh', 698 'न'=>'n','प'=>'p','फ'=>'ph','ब'=>'b','भ'=>'bh','म'=>'m','य'=>'z','र'=>'r', 699 'ल'=>'l','व'=>'v','श'=>'sh','ष'=>'sqh','स'=>'s','ह'=>'x', 700 //Hebrew 701 'ב'=>'a','ג'=>'b','ד'=>'g','ה'=>'d','ו'=>'x','ז'=>'v','ח'=>'kh','ט'=>'th', 702 'י'=>'y','ך'=>'k','כ'=>'k','ל'=>'l','ם'=>'m','מ'=>'m','ן'=>'n','נ'=>'n', 703 'ס'=>'s','ע'=>'ah','ף'=>'p','פ'=>'p','ץ'=>'c','צ'=>'c','ק'=>'q','ר'=>'r', 704 'ש'=>'sh','ת'=>'t', 705 //Arabic 706 'ا'=>'a','ب'=>'b','ت'=>'t','ث'=>'th','ج'=>'g','ح'=>'xh','خ'=>'x','د'=>'d', 707 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','ش'=>'sh','ص'=>'s\'','ض'=>'d\'', 708 'ط'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','ف'=>'f','ق'=>'q','ك'=>'k', 709 'ل'=>'l','م'=>'m','ن'=>'n','ه'=>'x\'','و'=>'u','ي'=>'i', 710 711 // Japanese hiragana 712 'あ'=>'a','え'=>'e','い'=>'i','お'=>'o','う'=>'u','ば'=>'ba','べ'=>'be', 713 'び'=>'bi','ぼ'=>'bo','ぶ'=>'bu','し'=>'ci','だ'=>'da','で'=>'de','ぢ'=>'di', 714 'ど'=>'do','づ'=>'du','ふぁ'=>'fa','ふぇ'=>'fe','ふぃ'=>'fi','ふぉ'=>'fo', 715 'ふ'=>'fu','が'=>'ga','げ'=>'ge','ぎ'=>'gi','ご'=>'go','ぐ'=>'gu','は'=>'ha', 716 'へ'=>'he','ひ'=>'hi','ほ'=>'ho','ふ'=>'hu','じゃ'=>'ja','じぇ'=>'je', 717 'じ'=>'ji','じょ'=>'jo','じゅ'=>'ju','か'=>'ka','け'=>'ke','き'=>'ki', 718 'こ'=>'ko','く'=>'ku','ら'=>'la','れ'=>'le','り'=>'li','ろ'=>'lo','る'=>'lu', 719 'ま'=>'ma','め'=>'me','み'=>'mi','も'=>'mo','む'=>'mu','な'=>'na','ね'=>'ne', 720 'に'=>'ni','の'=>'no','ぬ'=>'nu','ぱ'=>'pa','ぺ'=>'pe','ぴ'=>'pi','ぽ'=>'po', 721 'ぷ'=>'pu','ら'=>'ra','れ'=>'re','り'=>'ri','ろ'=>'ro','る'=>'ru','さ'=>'sa', 722 'せ'=>'se','し'=>'si','そ'=>'so','す'=>'su','た'=>'ta','て'=>'te','ち'=>'ti', 723 'と'=>'to','つ'=>'tu','ヴぁ'=>'va','ヴぇ'=>'ve','ヴぃ'=>'vi','ヴぉ'=>'vo', 724 'ヴ'=>'vu','わ'=>'wa','うぇ'=>'we','うぃ'=>'wi','を'=>'wo','や'=>'ya','いぇ'=>'ye', 725 'い'=>'yi','よ'=>'yo','ゆ'=>'yu','ざ'=>'za','ぜ'=>'ze','じ'=>'zi','ぞ'=>'zo', 726 'ず'=>'zu','びゃ'=>'bya','びぇ'=>'bye','びぃ'=>'byi','びょ'=>'byo','びゅ'=>'byu', 727 'ちゃ'=>'cha','ちぇ'=>'che','ち'=>'chi','ちょ'=>'cho','ちゅ'=>'chu','ちゃ'=>'cya', 728 'ちぇ'=>'cye','ちぃ'=>'cyi','ちょ'=>'cyo','ちゅ'=>'cyu','でゃ'=>'dha','でぇ'=>'dhe', 729 'でぃ'=>'dhi','でょ'=>'dho','でゅ'=>'dhu','どぁ'=>'dwa','どぇ'=>'dwe','どぃ'=>'dwi', 730 'どぉ'=>'dwo','どぅ'=>'dwu','ぢゃ'=>'dya','ぢぇ'=>'dye','ぢぃ'=>'dyi','ぢょ'=>'dyo', 731 'ぢゅ'=>'dyu','ぢ'=>'dzi','ふぁ'=>'fwa','ふぇ'=>'fwe','ふぃ'=>'fwi','ふぉ'=>'fwo', 732 'ふぅ'=>'fwu','ふゃ'=>'fya','ふぇ'=>'fye','ふぃ'=>'fyi','ふょ'=>'fyo','ふゅ'=>'fyu', 733 'ぎゃ'=>'gya','ぎぇ'=>'gye','ぎぃ'=>'gyi','ぎょ'=>'gyo','ぎゅ'=>'gyu','ひゃ'=>'hya', 734 'ひぇ'=>'hye','ひぃ'=>'hyi','ひょ'=>'hyo','ひゅ'=>'hyu','じゃ'=>'jya','じぇ'=>'jye', 735 'じぃ'=>'jyi','じょ'=>'jyo','じゅ'=>'jyu','きゃ'=>'kya','きぇ'=>'kye','きぃ'=>'kyi', 736 'きょ'=>'kyo','きゅ'=>'kyu','りゃ'=>'lya','りぇ'=>'lye','りぃ'=>'lyi','りょ'=>'lyo', 737 'りゅ'=>'lyu','みゃ'=>'mya','みぇ'=>'mye','みぃ'=>'myi','みょ'=>'myo','みゅ'=>'myu', 738 'ん'=>'n','にゃ'=>'nya','にぇ'=>'nye','にぃ'=>'nyi','にょ'=>'nyo','にゅ'=>'nyu', 739 'ぴゃ'=>'pya','ぴぇ'=>'pye','ぴぃ'=>'pyi','ぴょ'=>'pyo','ぴゅ'=>'pyu','りゃ'=>'rya', 740 'りぇ'=>'rye','りぃ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu','しゃ'=>'sha','しぇ'=>'she', 741 'し'=>'shi','しょ'=>'sho','しゅ'=>'shu','すぁ'=>'swa','すぇ'=>'swe','すぃ'=>'swi', 742 'すぉ'=>'swo','すぅ'=>'swu','しゃ'=>'sya','しぇ'=>'sye','しぃ'=>'syi','しょ'=>'syo', 743 'しゅ'=>'syu','てゃ'=>'tha','てぇ'=>'the','てぃ'=>'thi','てょ'=>'tho','てゅ'=>'thu', 744 'つゃ'=>'tsa','つぇ'=>'tse','つぃ'=>'tsi','つょ'=>'tso','つ'=>'tsu','とぁ'=>'twa', 745 'とぇ'=>'twe','とぃ'=>'twi','とぉ'=>'two','とぅ'=>'twu','ちゃ'=>'tya','ちぇ'=>'tye', 746 'ちぃ'=>'tyi','ちょ'=>'tyo','ちゅ'=>'tyu','ヴゃ'=>'vya','ヴぇ'=>'vye','ヴぃ'=>'vyi', 747 'ヴょ'=>'vyo','ヴゅ'=>'vyu','うぁ'=>'wha','うぇ'=>'whe','うぃ'=>'whi','うぉ'=>'who', 748 'うぅ'=>'whu','ゑ'=>'wye','ゐ'=>'wyi','じゃ'=>'zha','じぇ'=>'zhe','じぃ'=>'zhi', 749 'じょ'=>'zho','じゅ'=>'zhu','じゃ'=>'zya','じぇ'=>'zye','じぃ'=>'zyi','じょ'=>'zyo', 750 'じゅ'=>'zyu', 751 // Japanese katakana 752 'ア'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','バ'=>'ba','ベ'=>'be','ビ'=>'bi', 753 'ボ'=>'bo','ブ'=>'bu','シ'=>'ci','ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do', 754 'ヅ'=>'du','ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フ'=>'fu','ガ'=>'ga', 755 'ゲ'=>'ge','ギ'=>'gi','ゴ'=>'go','グ'=>'gu','ハ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho', 756 'フ'=>'hu','ジャ'=>'ja','ジェ'=>'je','ジ'=>'ji','ジョ'=>'jo','ジュ'=>'ju','カ'=>'ka', 757 'ケ'=>'ke','キ'=>'ki','コ'=>'ko','ク'=>'ku','ラ'=>'la','レ'=>'le','リ'=>'li','ロ'=>'lo', 758 'ル'=>'lu','マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu','ナ'=>'na','ネ'=>'ne', 759 'ニ'=>'ni','ノ'=>'no','ヌ'=>'nu','パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ポ'=>'po','プ'=>'pu', 760 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ロ'=>'ro','ル'=>'ru','サ'=>'sa','セ'=>'se','シ'=>'si', 761 'ソ'=>'so','ス'=>'su','タ'=>'ta','テ'=>'te','チ'=>'ti','ト'=>'to','ツ'=>'tu','ヴァ'=>'va', 762 'ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu','ワ'=>'wa','ウェ'=>'we','ウィ'=>'wi', 763 'ヲ'=>'wo','ヤ'=>'ya','イェ'=>'ye','イ'=>'yi','ヨ'=>'yo','ユ'=>'yu','ザ'=>'za','ゼ'=>'ze', 764 'ジ'=>'zi','ゾ'=>'zo','ズ'=>'zu','ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo', 765 'ビュ'=>'byu','チャ'=>'cha','チェ'=>'che','チ'=>'chi','チョ'=>'cho','チュ'=>'chu', 766 'チャ'=>'cya','チェ'=>'cye','チィ'=>'cyi','チョ'=>'cyo','チュ'=>'cyu','デャ'=>'dha', 767 'デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu','ドァ'=>'dwa','ドェ'=>'dwe', 768 'ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu','ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi', 769 'ヂョ'=>'dyo','ヂュ'=>'dyu','ヂ'=>'dzi','ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi', 770 'フォ'=>'fwo','フゥ'=>'fwu','フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo', 771 'フュ'=>'fyu','ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu', 772 'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu','ジャ'=>'jya', 773 'ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu','キャ'=>'kya','キェ'=>'kye', 774 'キィ'=>'kyi','キョ'=>'kyo','キュ'=>'kyu','リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi', 775 'リョ'=>'lyo','リュ'=>'lyu','ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo', 776 'ミュ'=>'myu','ン'=>'n','ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo', 777 'ニュ'=>'nyu','ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu', 778 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu','シャ'=>'sha', 779 'シェ'=>'she','シ'=>'shi','ショ'=>'sho','シュ'=>'shu','スァ'=>'swa','スェ'=>'swe', 780 'スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu','シャ'=>'sya','シェ'=>'sye','シィ'=>'syi', 781 'ショ'=>'syo','シュ'=>'syu','テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho', 782 'テュ'=>'thu','ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu', 783 'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu','チャ'=>'tya', 784 'チェ'=>'tye','チィ'=>'tyi','チョ'=>'tyo','チュ'=>'tyu','ヴャ'=>'vya','ヴェ'=>'vye', 785 'ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu','ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi', 786 'ウォ'=>'who','ウゥ'=>'whu','ヱ'=>'wye','ヰ'=>'wyi','ジャ'=>'zha','ジェ'=>'zhe', 787 'ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu','ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi', 788 'ジョ'=>'zyo','ジュ'=>'zyu', 789 790 // "Greeklish" 791 'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Π'=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps', 792 'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','π'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps', 793 794 // Thai 795 'ก'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch', 796 'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','ญ'=>'y','ฎ'=>'d','ฏ'=>'t','ฐ'=>'th', 797 'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th', 798 'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','ฝ'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph', 799 'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue', 800 'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h', 801 'ะ'=>'a','–ั'=>'a','รร'=>'a','า'=>'a','รร'=>'an','ำ'=>'am','–ิ'=>'i','–ี'=>'i', 802 '–ึ'=>'ue','–ื'=>'ue','–ุ'=>'u','–ู'=>'u','เะ'=>'e','เ–็'=>'e','เ'=>'e','แะ'=>'ae', 803 'แ'=>'ae','โะ'=>'o','โ'=>'o','เาะ'=>'o','อ'=>'o','เอะ'=>'oe','เ–ิ'=>'oe', 804 'เอ'=>'oe','เ–ียะ'=>'ia','เ–ีย'=>'ia','เ–ือะ'=>'uea','เ–ือ'=>'uea','–ัวะ'=>'ua', 805 '–ัว'=>'ua','ว'=>'ua','ใ'=>'ai','ไ'=>'ai','–ัย'=>'ai','ไย'=>'ai','าย'=>'ai', 806 'เา'=>'ao','าว'=>'ao','–ุย'=>'ui','โย'=>'oi','อย'=>'oi','เย'=>'oei','เ–ือย'=>'ueai', 807 'วย'=>'uai','–ิว'=>'io','เ–็ว'=>'eo','เว'=>'eo','แ–็ว'=>'aeo','แว'=>'aeo', 808 'เ–ียว'=>'iao', 809 810 // Korean 811 'ㄱ'=>'k','ㅋ'=>'kh','ㄲ'=>'kk','ㄷ'=>'t','ㅌ'=>'th','ㄸ'=>'tt','ㅂ'=>'p', 812 'ㅍ'=>'ph','ㅃ'=>'pp','ㅈ'=>'c','ㅊ'=>'ch','ㅉ'=>'cc','ㅅ'=>'s','ㅆ'=>'ss', 813 'ㅎ'=>'h','ㅇ'=>'ng','ㄴ'=>'n','ㄹ'=>'l','ㅁ'=>'m', 'ㅏ'=>'a','ㅓ'=>'e','ㅗ'=>'o', 814 'ㅜ'=>'wu','ㅡ'=>'u','ㅣ'=>'i','ㅐ'=>'ay','ㅔ'=>'ey','ㅚ'=>'oy','ㅘ'=>'wa','ㅝ'=>'we', 815 'ㅟ'=>'wi','ㅙ'=>'way','ㅞ'=>'wey','ㅢ'=>'uy','ㅑ'=>'ya','ㅕ'=>'ye','ㅛ'=>'oy', 816 'ㅠ'=>'yu','ㅒ'=>'yay','ㅖ'=>'yey', 817); 818 819//Setup VIM: ex: et ts=2 enc=utf-8 : 820 821