1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * URL-Encode a filename to allow unicodecharacters 11 * 12 * Slashes are not encoded 13 * 14 * When the second parameter is true the string will 15 * be encoded only if non ASCII characters are detected - 16 * This makes it safe to run it multiple times on the 17 * same string (default is true) 18 * 19 * @author Andreas Gohr <andi@splitbrain.org> 20 * @see urlencode 21 */ 22function utf8_encodeFN($file,$safe=true){ 23 if($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#',$file)){ 24 return $file; 25 } 26 $file = urlencode($file); 27 $file = str_replace('%2F','/',$file); 28 return $file; 29} 30 31/** 32 * URL-Decode a filename 33 * 34 * This is just a wrapper around urldecode 35 * 36 * @author Andreas Gohr <andi@splitbrain.org> 37 * @see urldecode 38 */ 39function utf8_decodeFN($file){ 40 $file = urldecode($file); 41 return $file; 42} 43 44/** 45 * Checks if a string contains 7bit ASCII only 46 * 47 * @author Andreas Gohr <andi@splitbrain.org> 48 */ 49function utf8_isASCII($str){ 50 for($i=0; $i<strlen($str); $i++){ 51 if(ord($str{$i}) >127) return false; 52 } 53 return true; 54} 55 56/** 57 * Strips all highbyte chars 58 * 59 * Returns a pure ASCII7 string 60 * 61 * @author Andreas Gohr <andi@splitbrain.org> 62 */ 63function utf8_strip($str){ 64 $ascii = ''; 65 for($i=0; $i<strlen($str); $i++){ 66 if(ord($str{$i}) <128){ 67 $ascii .= $str{$i}; 68 } 69 } 70 return $ascii; 71} 72 73/** 74 * Tries to detect if a string is in Unicode encoding 75 * 76 * @author <bmorel@ssi.fr> 77 * @link http://www.php.net/manual/en/function.utf8-encode.php 78 */ 79function utf8_check($Str) { 80 for ($i=0; $i<strlen($Str); $i++) { 81 if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb 82 elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb 83 elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb 84 elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb 85 elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb 86 elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b 87 else return false; # Does not match any model 88 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 89 if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80)) 90 return false; 91 } 92 } 93 return true; 94} 95 96/** 97 * Unicode aware replacement for strlen() 98 * 99 * utf8_decode() converts characters that are not in ISO-8859-1 100 * to '?', which, for the purpose of counting, is alright - It's 101 * even faster than mb_strlen. 102 * 103 * @author <chernyshevsky at hotmail dot com> 104 * @see strlen() 105 * @see utf8_decode() 106 */ 107function utf8_strlen($string){ 108 return strlen(utf8_decode($str)); 109} 110 111/** 112 * Unicode aware replacement for substr() 113 * 114 * @todo Handle negative positions etc. 115 * @author Harry Fuecks <hfuecks@gmail.com> 116 * @see substr() 117 */ 118function utf8_substr($str, $start, $length=null){ 119 if ( is_null($length) ) { 120 $length = '*'; 121 } else { 122 $length = '{0,'.$length.'}'; 123 } 124 $pattern = '/^.{'.$start.'}(.'.$length.')/us'; 125 preg_match($pattern, $str, $matches); 126 127 if ( isset($matches[1]) ) { 128 return $matches[1]; 129 } 130 return false; 131} 132 133/** 134 * Unicode aware replacement for explode 135 * 136 * @TODO support third limit arg 137 * @author Harry Fuecks <hfuecks@gmail.com> 138 * @see explode(); 139 */ 140function utf8_explode($sep, $str) { 141 if ( $sep == '' ) { 142 trigger_error('Empty delimiter',E_USER_WARNING); 143 return FALSE; 144 } 145 146 return preg_split('!'.preg_quote($sep,'!').'!u',$str); 147} 148 149/** 150 * Unicode aware replacement for strrepalce() 151 * 152 * @todo support PHP5 count (fourth arg) 153 * @author Harry Fuecks <hfuecks@gmail.com> 154 * @see strreplace(); 155 */ 156function utf8_str_replace($s,$r,$str){ 157 if(!is_array($s)){ 158 $s = '!'.preg_quote($s,'!').'!u'; 159 }else{ 160 foreach ($s as $k => $v) { 161 $s[$k] = '!'.preg_quote($v).'!u'; 162 } 163 } 164 return preg_replace($s,$r,$str); 165} 166 167/** 168 * Unicode aware replacement for ltrim() 169 * 170 * @author Andreas Gohr <andi@splitbrain.org> 171 * @see ltrim() 172 * @return string 173 */ 174function utf8_ltrim($str,$charlist=''){ 175 if($charlist == '') return ltrim($str); 176 177 //quote charlist for use in a characterclass 178 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 179 180 return preg_replace('/^['.$charlist.']+/u','',$str); 181} 182 183/** 184 * Unicode aware replacement for ltrim() 185 * 186 * @author Andreas Gohr <andi@splitbrain.org> 187 * @see rtrim() 188 * @return string 189 */ 190function utf8_rtrim($str,$charlist=''){ 191 if($charlist == '') return rtrim($str); 192 193 //quote charlist for use in a characterclass 194 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 195 196 return preg_replace('/['.$charlist.']+$/u','',$str); 197} 198 199/** 200 * Unicode aware replacement for trim() 201 * 202 * @author Andreas Gohr <andi@splitbrain.org> 203 * @see trim() 204 * @return string 205 */ 206function utf8_trim($str,$charlist='') { 207 if($charlist == '') return trim($str); 208 209 return utf8_ltrim(utf8_rtrim($str)); 210} 211 212 213/** 214 * This is a unicode aware replacement for strtolower() 215 * 216 * Uses mb_string extension if available 217 * 218 * @author Andreas Gohr <andi@splitbrain.org> 219 * @see strtolower() 220 * @see utf8_strtoupper() 221 */ 222function utf8_strtolower($string){ 223 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower')) 224 return mb_strtolower($string,'utf-8'); 225 226 global $UTF8_UPPER_TO_LOWER; 227 $uni = utf8_to_unicode($string); 228 $cnt = count($uni); 229 for ($i=0; $i < $cnt; $i++){ 230 if($UTF8_UPPER_TO_LOWER[$uni[$i]]){ 231 $uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]]; 232 } 233 } 234 return unicode_to_utf8($uni); 235} 236 237/** 238 * This is a unicode aware replacement for strtoupper() 239 * 240 * Uses mb_string extension if available 241 * 242 * @author Andreas Gohr <andi@splitbrain.org> 243 * @see strtoupper() 244 * @see utf8_strtoupper() 245 */ 246function utf8_strtoupper($string){ 247 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower')) 248 return mb_strtolower($string,'utf-8'); 249 250 global $UTF8_LOWER_TO_UPPER; 251 $uni = utf8_to_unicode($string); 252 $cnt = count($uni); 253 for ($i=0; $i < $cnt; $i++){ 254 if($UTF8_LOWER_TO_UPPER[$uni[$i]]){ 255 $uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]]; 256 } 257 } 258 return unicode_to_utf8($uni); 259} 260 261/** 262 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 263 * 264 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 265 * letters. Default is to deaccent both cases ($case = 0) 266 * 267 * @author Andreas Gohr <andi@splitbrain.org> 268 */ 269function utf8_deaccent($string,$case=0){ 270 if($case <= 0){ 271 global $UTF8_LOWER_ACCENTS; 272 $string = str_replace(array_keys($UTF8_LOWER_ACCENTS),array_values($UTF8_LOWER_ACCENTS),$string); 273 } 274 if($case >= 0){ 275 global $UTF8_UPPER_ACCENTS; 276 $string = str_replace(array_keys($UTF8_UPPER_ACCENTS),array_values($UTF8_UPPER_ACCENTS),$string); 277 } 278 return $string; 279} 280 281/** 282 * Removes special characters (nonalphanumeric) from a UTF-8 string 283 * 284 * Be sure to specify all specialchars you give in $repl in $keep, too 285 * or it won't work. 286 * 287 * This function adds the controlchars 0x00 to 0x19 to the array of 288 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 289 * 290 * @author Andreas Gohr <andi@splitbrain.org> 291 * @param string $string The UTF8 string to strip of special chars 292 * @param string $repl Replace special with this string 293 * @param string $keep Special chars to keep (in UTF8) 294 */ 295function utf8_stripspecials($string,$repl='',$keep=''){ 296 global $UTF8_SPECIAL_CHARS; 297 if($keep != ''){ 298 $specials = array_diff($UTF8_SPECIAL_CHARS, utf8_to_unicode($keep)); 299 }else{ 300 $specials = $UTF8_SPECIAL_CHARS; 301 } 302 303 $specials = unicode_to_utf8($specials); 304 $specials = preg_quote($specials, '/'); 305 306 return preg_replace('/[\x00-\x19'.$specials.']/u',$repl,$string); 307} 308 309/** 310 * This is an Unicode aware replacement for strpos 311 * 312 * Uses mb_string extension if available 313 * 314 * @author Harry Fuecks <hfuecks@gmail.com> 315 * @see strpos() 316 */ 317function utf8_strpos($haystack, $needle,$offset=0) { 318 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strpos')) 319 return mb_strpos($haystack,$needle,$offset,'utf-8'); 320 321 if(!$offset){ 322 $ar = utf8_explode($needle, $str); 323 if ( count($ar) > 1 ) { 324 return utf8_strlen($ar[0]); 325 } 326 return false; 327 }else{ 328 if ( !is_int($offset) ) { 329 trigger_error('Offset must be an integer',E_USER_WARNING); 330 return false; 331 } 332 333 $str = utf8_substr($str, $offset); 334 335 if ( false !== ($pos = utf8_strpos($str,$needle))){ 336 return $pos + $offset; 337 } 338 return false; 339 } 340} 341 342/** 343 * This function returns any UTF-8 encoded text as a list of 344 * Unicode values: 345 * 346 * @author Scott Michael Reynen <scott@randomchaos.com> 347 * @link http://www.randomchaos.com/document.php?source=php_and_unicode 348 * @see unicode_to_utf8() 349 */ 350function utf8_to_unicode( $str ) { 351 $unicode = array(); 352 $values = array(); 353 $lookingFor = 1; 354 355 for ($i = 0; $i < strlen( $str ); $i++ ) { 356 $thisValue = ord( $str[ $i ] ); 357 if ( $thisValue < 128 ) $unicode[] = $thisValue; 358 else { 359 if ( count( $values ) == 0 ) $lookingFor = ( $thisValue < 224 ) ? 2 : 3; 360 $values[] = $thisValue; 361 if ( count( $values ) == $lookingFor ) { 362 $number = ( $lookingFor == 3 ) ? 363 ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ): 364 ( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 ); 365 $unicode[] = $number; 366 $values = array(); 367 $lookingFor = 1; 368 } 369 } 370 } 371 return $unicode; 372} 373 374/** 375 * This function converts a Unicode array back to its UTF-8 representation 376 * 377 * @author Scott Michael Reynen <scott@randomchaos.com> 378 * @link http://www.randomchaos.com/document.php?source=php_and_unicode 379 * @see utf8_to_unicode() 380 */ 381function unicode_to_utf8( $str ) { 382 $utf8 = ''; 383 foreach( $str as $unicode ) { 384 if ( $unicode < 128 ) { 385 $utf8.= chr( $unicode ); 386 } elseif ( $unicode < 2048 ) { 387 $utf8.= chr( 192 + ( ( $unicode - ( $unicode % 64 ) ) / 64 ) ); 388 $utf8.= chr( 128 + ( $unicode % 64 ) ); 389 } else { 390 $utf8.= chr( 224 + ( ( $unicode - ( $unicode % 4096 ) ) / 4096 ) ); 391 $utf8.= chr( 128 + ( ( ( $unicode % 4096 ) - ( $unicode % 64 ) ) / 64 ) ); 392 $utf8.= chr( 128 + ( $unicode % 64 ) ); 393 } 394 } 395 return $utf8; 396} 397 398/** 399 * UTF-8 Case lookup table 400 * 401 * This lookuptable defines the upper case letters to their correspponding 402 * lower case letter in UTF-8 403 * 404 * @author Andreas Gohr <andi@splitbrain.org> 405 */ 406$UTF8_LOWER_TO_UPPER = array( 407 0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042, 408 0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100, 409 0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393, 410 0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C, 411 0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F, 412 0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E, 413 0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3, 414 0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A, 415 0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9, 416 0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C, 417 0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4, 418 0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164, 419 0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156, 420 0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118, 421 0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128, 422 0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428, 423 0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055, 424 0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A, 425 0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC, 426 0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0, 427 0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D, 428 0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0, 429 0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5, 430 0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA, 431 0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045, 432 0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F, 433 0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048, 434 0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6, 435 0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407, 436 0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395, 437 0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396, 438 0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051, 439 0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408, 440 0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F, 441 0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126, 442 0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C, 443 0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E, 444 0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB, 445 0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421, 446 0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A, 447 0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102, 448 0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9, 449 0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122, 450); 451 452/** 453 * UTF-8 Case lookup table 454 * 455 * This lookuptable defines the lower case letters to their correspponding 456 * upper case letter in UTF-8 (it does so by flipping $UTF8_LOWER_TO_UPPER) 457 * 458 * @author Andreas Gohr <andi@splitbrain.org> 459 */ 460$UTF8_UPPER_TO_LOWER = @array_flip($UTF8_LOWER_TO_UPPER); 461 462/** 463 * UTF-8 lookup table for lower case accented letters 464 * 465 * This lookuptable defines replacements for accented characters from the ASCII-7 466 * range. This are lower case letters only. 467 * 468 * @author Andreas Gohr <andi@splitbrain.org> 469 * @see utf8_deaccent() 470 */ 471$UTF8_LOWER_ACCENTS = array( 472 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 473 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 474 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 475 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 476 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 477 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 478 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 479 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 480 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 481 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 482 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 483 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 484 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 485 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 486 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 487); 488 489/** 490 * UTF-8 lookup table for upper case accented letters 491 * 492 * This lookuptable defines replacements for accented characters from the ASCII-7 493 * range. This are upper case letters only. 494 * 495 * @author Andreas Gohr <andi@splitbrain.org> 496 * @see utf8_deaccent() 497 */ 498$UTF8_UPPER_ACCENTS = array( 499 'à' => 'A', 'ô' => 'O', 'ď' => 'D', 'ḟ' => 'F', 'ë' => 'E', 'š' => 'S', 'ơ' => 'O', 500 'ß' => 'Ss', 'ă' => 'A', 'ř' => 'R', 'ț' => 'T', 'ň' => 'N', 'ā' => 'A', 'ķ' => 'K', 501 'ŝ' => 'S', 'ỳ' => 'Y', 'ņ' => 'N', 'ĺ' => 'L', 'ħ' => 'H', 'ṗ' => 'P', 'ó' => 'O', 502 'ú' => 'U', 'ě' => 'E', 'é' => 'E', 'ç' => 'C', 'ẁ' => 'W', 'ċ' => 'C', 'õ' => 'O', 503 'ṡ' => 'S', 'ø' => 'O', 'ģ' => 'G', 'ŧ' => 'T', 'ș' => 'S', 'ė' => 'E', 'ĉ' => 'C', 504 'ś' => 'S', 'î' => 'I', 'ű' => 'U', 'ć' => 'C', 'ę' => 'E', 'ŵ' => 'W', 'ṫ' => 'T', 505 'ū' => 'U', 'č' => 'C', 'ö' => 'Oe', 'è' => 'E', 'ŷ' => 'Y', 'ą' => 'A', 'ł' => 'L', 506 'ų' => 'U', 'ů' => 'U', 'ş' => 'S', 'ğ' => 'G', 'ļ' => 'L', 'ƒ' => 'F', 'ž' => 'Z', 507 'ẃ' => 'W', 'ḃ' => 'B', 'å' => 'A', 'ì' => 'I', 'ï' => 'I', 'ḋ' => 'D', 'ť' => 'T', 508 'ŗ' => 'R', 'ä' => 'Ae', 'í' => 'I', 'ŕ' => 'R', 'ê' => 'E', 'ü' => 'Ue', 'ò' => 'O', 509 'ē' => 'E', 'ñ' => 'N', 'ń' => 'N', 'ĥ' => 'H', 'ĝ' => 'G', 'đ' => 'D', 'ĵ' => 'J', 510 'ÿ' => 'Y', 'ũ' => 'U', 'ŭ' => 'U', 'ư' => 'U', 'ţ' => 'T', 'ý' => 'Y', 'ő' => 'O', 511 'â' => 'A', 'ľ' => 'L', 'ẅ' => 'W', 'ż' => 'Z', 'ī' => 'I', 'ã' => 'A', 'ġ' => 'G', 512 'ṁ' => 'M', 'ō' => 'O', 'ĩ' => 'I', 'ù' => 'U', 'į' => 'I', 'ź' => 'Z', 'á' => 'A', 513 'û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 514); 515 516/** 517 * UTF-8 array of common special characters 518 * 519 * This array should contain all special characters (not a letter or digit) 520 * defined in the various local charsets - it's not a complete list of non-alphanum 521 * characters in UTF-8. It's not perfect but should match most cases of special 522 * chars. 523 * 524 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 525 * 526 * @author Andreas Gohr <andi@splitbrain.org> 527 * @see utf8_stripspecials() 528 */ 529$UTF8_SPECIAL_CHARS = array( 530 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 531 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 532 0x002e, 0x002f, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 533 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 534 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 535 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 536 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 537 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 538 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 539 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 540 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 541 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 542 0x0385, 0x0387, 0x03b2, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 543 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 544 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 545 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 546 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 547 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 548 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 549 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 550 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 551 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 552 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 553 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 554 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 555 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 556 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 557 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 558 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 559 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 560 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 561 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 562 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 563 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 564 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 565 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 566 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 567 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 568 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 569 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 570 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 571 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 572 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 573 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 574 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 575 0x27be, 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 576 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 577 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 578 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 579 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 580); 581 582 583//Setup VIM: ex: et ts=2 enc=utf-8 : 584