1<?php 2/** 3 * UTF8 helper functions 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9/** 10 * URL-Encode a filename to allow unicodecharacters 11 * 12 * Slashes are not encoded 13 * 14 * When the second parameter is true the string will 15 * be encoded only if non ASCII characters are detected - 16 * This makes it safe to run it multiple times on the 17 * same string (default is true) 18 * 19 * @author Andreas Gohr <andi@splitbrain.org> 20 * @see urlencode 21 */ 22function utf8_encodeFN($file,$safe=true){ 23 if($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#',$file)){ 24 return $file; 25 } 26 $file = urlencode($file); 27 $file = str_replace('%2F','/',$file); 28 return $file; 29} 30 31/** 32 * URL-Decode a filename 33 * 34 * This is just a wrapper around urldecode 35 * 36 * @author Andreas Gohr <andi@splitbrain.org> 37 * @see urldecode 38 */ 39function utf8_decodeFN($file){ 40 $file = urldecode($file); 41 return $file; 42} 43 44/** 45 * Checks if a string contains 7bit ASCII only 46 * 47 * @author Andreas Gohr <andi@splitbrain.org> 48 */ 49function utf8_isASCII($str){ 50 for($i=0; $i<strlen($str); $i++){ 51 if(ord($str{$i}) >127) return false; 52 } 53 return true; 54} 55 56/** 57 * Strips all highbyte chars 58 * 59 * Returns a pure ASCII7 string 60 * 61 * @author Andreas Gohr <andi@splitbrain.org> 62 */ 63function utf8_strip($str){ 64 $ascii = ''; 65 for($i=0; $i<strlen($str); $i++){ 66 if(ord($str{$i}) <128){ 67 $ascii .= $str{$i}; 68 } 69 } 70 return $ascii; 71} 72 73/** 74 * Tries to detect if a string is in Unicode encoding 75 * 76 * @author <bmorel@ssi.fr> 77 * @link http://www.php.net/manual/en/function.utf8-encode.php 78 */ 79function utf8_check($Str) { 80 for ($i=0; $i<strlen($Str); $i++) { 81 if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb 82 elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb 83 elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb 84 elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb 85 elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb 86 elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b 87 else return false; # Does not match any model 88 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 89 if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80)) 90 return false; 91 } 92 } 93 return true; 94} 95 96/** 97 * Unicode aware replacement for strlen() 98 * 99 * utf8_decode() converts characters that are not in ISO-8859-1 100 * to '?', which, for the purpose of counting, is alright - It's 101 * even faster than mb_strlen. 102 * 103 * @author <chernyshevsky at hotmail dot com> 104 * @see strlen() 105 * @see utf8_decode() 106 */ 107function utf8_strlen($string){ 108 return strlen(utf8_decode($str)); 109} 110 111/** 112 * Unicode aware replacement for substr() 113 * 114 * @author lmak at NOSPAM dot iti dot gr 115 * @link http://www.php.net/manual/en/function.substr.php 116 * @see substr() 117 */ 118function utf8_substr($str,$start,$length=null){ 119 preg_match_all("/./u", $str, $ar); 120 121 if($length != null) { 122 return join("",array_slice($ar[0],$start,$length)); 123 } else { 124 return join("",array_slice($ar[0],$start)); 125 } 126} 127 128/** 129 * Unicode aware replacement for explode 130 * 131 * @TODO support third limit arg 132 * @author Harry Fuecks <hfuecks@gmail.com> 133 * @see explode(); 134 */ 135function utf8_explode($sep, $str) { 136 if ( $sep == '' ) { 137 trigger_error('Empty delimiter',E_USER_WARNING); 138 return FALSE; 139 } 140 141 return preg_split('!'.preg_quote($sep,'!').'!u',$str); 142} 143 144/** 145 * Unicode aware replacement for strrepalce() 146 * 147 * @todo support PHP5 count (fourth arg) 148 * @author Harry Fuecks <hfuecks@gmail.com> 149 * @see strreplace(); 150 */ 151function utf8_str_replace($s,$r,$str){ 152 if(!is_array($s)){ 153 $s = '!'.preg_quote($s,'!').'!u'; 154 }else{ 155 foreach ($s as $k => $v) { 156 $s[$k] = '!'.preg_quote($v).'!u'; 157 } 158 } 159 return preg_replace($s,$r,$str); 160} 161 162/** 163 * Unicode aware replacement for ltrim() 164 * 165 * @author Andreas Gohr <andi@splitbrain.org> 166 * @see ltrim() 167 * @return string 168 */ 169function utf8_ltrim($str,$charlist=''){ 170 if($charlist == '') return ltrim($str); 171 172 //quote charlist for use in a characterclass 173 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 174 175 return preg_replace('/^['.$charlist.']+/u','',$str); 176} 177 178/** 179 * Unicode aware replacement for ltrim() 180 * 181 * @author Andreas Gohr <andi@splitbrain.org> 182 * @see rtrim() 183 * @return string 184 */ 185function utf8_rtrim($str,$charlist=''){ 186 if($charlist == '') return rtrim($str); 187 188 //quote charlist for use in a characterclass 189 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 190 191 return preg_replace('/['.$charlist.']+$/u','',$str); 192} 193 194/** 195 * Unicode aware replacement for trim() 196 * 197 * @author Andreas Gohr <andi@splitbrain.org> 198 * @see trim() 199 * @return string 200 */ 201function utf8_trim($str,$charlist='') { 202 if($charlist == '') return trim($str); 203 204 return utf8_ltrim(utf8_rtrim($str)); 205} 206 207 208/** 209 * This is a unicode aware replacement for strtolower() 210 * 211 * Uses mb_string extension if available 212 * 213 * @author Andreas Gohr <andi@splitbrain.org> 214 * @see strtolower() 215 * @see utf8_strtoupper() 216 */ 217function utf8_strtolower($string){ 218 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower')) 219 return mb_strtolower($string,'utf-8'); 220 221 global $UTF8_UPPER_TO_LOWER; 222 $uni = utf8_to_unicode($string); 223 $cnt = count($uni); 224 for ($i=0; $i < $cnt; $i++){ 225 if($UTF8_UPPER_TO_LOWER[$uni[$i]]){ 226 $uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]]; 227 } 228 } 229 return unicode_to_utf8($uni); 230} 231 232/** 233 * This is a unicode aware replacement for strtoupper() 234 * 235 * Uses mb_string extension if available 236 * 237 * @author Andreas Gohr <andi@splitbrain.org> 238 * @see strtoupper() 239 * @see utf8_strtoupper() 240 */ 241function utf8_strtoupper($string){ 242 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower')) 243 return mb_strtolower($string,'utf-8'); 244 245 global $UTF8_LOWER_TO_UPPER; 246 $uni = utf8_to_unicode($string); 247 $cnt = count($uni); 248 for ($i=0; $i < $cnt; $i++){ 249 if($UTF8_LOWER_TO_UPPER[$uni[$i]]){ 250 $uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]]; 251 } 252 } 253 return unicode_to_utf8($uni); 254} 255 256/** 257 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 258 * 259 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 260 * letters. Default is to deaccent both cases ($case = 0) 261 * 262 * @author Andreas Gohr <andi@splitbrain.org> 263 */ 264function utf8_deaccent($string,$case=0){ 265 if($case <= 0){ 266 global $UTF8_LOWER_ACCENTS; 267 $string = str_replace(array_keys($UTF8_LOWER_ACCENTS),array_values($UTF8_LOWER_ACCENTS),$string); 268 } 269 if($case >= 0){ 270 global $UTF8_UPPER_ACCENTS; 271 $string = str_replace(array_keys($UTF8_UPPER_ACCENTS),array_values($UTF8_UPPER_ACCENTS),$string); 272 } 273 return $string; 274} 275 276/** 277 * Removes special characters (nonalphanumeric) from a UTF-8 string 278 * 279 * This function adds the controlchars 0x00 to 0x19 to the array of 280 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 281 * 282 * @author Andreas Gohr <andi@splitbrain.org> 283 * @param string $string The UTF8 string to strip of special chars 284 * @param string $repl Replace special with this string 285 */ 286function utf8_stripspecials($string,$repl=''){ 287 global $UTF8_SPECIAL_CHARS; 288 289 static $specials = null; 290 if(is_null($specials)){ 291 $specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 292 } 293 294 return preg_replace('/[\x00-\x19'.$specials.']/u',$repl,$string); 295} 296 297/** 298 * This is an Unicode aware replacement for strpos 299 * 300 * Uses mb_string extension if available 301 * 302 * @author Harry Fuecks <hfuecks@gmail.com> 303 * @see strpos() 304 */ 305function utf8_strpos($haystack, $needle,$offset=0) { 306 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strpos')) 307 return mb_strpos($haystack,$needle,$offset,'utf-8'); 308 309 if(!$offset){ 310 $ar = utf8_explode($needle, $str); 311 if ( count($ar) > 1 ) { 312 return utf8_strlen($ar[0]); 313 } 314 return false; 315 }else{ 316 if ( !is_int($offset) ) { 317 trigger_error('Offset must be an integer',E_USER_WARNING); 318 return false; 319 } 320 321 $str = utf8_substr($str, $offset); 322 323 if ( false !== ($pos = utf8_strpos($str,$needle))){ 324 return $pos + $offset; 325 } 326 return false; 327 } 328} 329 330/** 331 * This function returns any UTF-8 encoded text as a list of 332 * Unicode values: 333 * 334 * @author Scott Michael Reynen <scott@randomchaos.com> 335 * @link http://www.randomchaos.com/document.php?source=php_and_unicode 336 * @see unicode_to_utf8() 337 */ 338function utf8_to_unicode( $str ) { 339 $unicode = array(); 340 $values = array(); 341 $lookingFor = 1; 342 343 for ($i = 0; $i < strlen( $str ); $i++ ) { 344 $thisValue = ord( $str[ $i ] ); 345 if ( $thisValue < 128 ) $unicode[] = $thisValue; 346 else { 347 if ( count( $values ) == 0 ) $lookingFor = ( $thisValue < 224 ) ? 2 : 3; 348 $values[] = $thisValue; 349 if ( count( $values ) == $lookingFor ) { 350 $number = ( $lookingFor == 3 ) ? 351 ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ): 352 ( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 ); 353 $unicode[] = $number; 354 $values = array(); 355 $lookingFor = 1; 356 } 357 } 358 } 359 return $unicode; 360} 361 362/** 363 * This function converts a Unicode array back to its UTF-8 representation 364 * 365 * @author Scott Michael Reynen <scott@randomchaos.com> 366 * @link http://www.randomchaos.com/document.php?source=php_and_unicode 367 * @see utf8_to_unicode() 368 */ 369function unicode_to_utf8( $str ) { 370 $utf8 = ''; 371 foreach( $str as $unicode ) { 372 if ( $unicode < 128 ) { 373 $utf8.= chr( $unicode ); 374 } elseif ( $unicode < 2048 ) { 375 $utf8.= chr( 192 + ( ( $unicode - ( $unicode % 64 ) ) / 64 ) ); 376 $utf8.= chr( 128 + ( $unicode % 64 ) ); 377 } else { 378 $utf8.= chr( 224 + ( ( $unicode - ( $unicode % 4096 ) ) / 4096 ) ); 379 $utf8.= chr( 128 + ( ( ( $unicode % 4096 ) - ( $unicode % 64 ) ) / 64 ) ); 380 $utf8.= chr( 128 + ( $unicode % 64 ) ); 381 } 382 } 383 return $utf8; 384} 385 386/** 387 * UTF-8 Case lookup table 388 * 389 * This lookuptable defines the upper case letters to their correspponding 390 * lower case letter in UTF-8 391 * 392 * @author Andreas Gohr <andi@splitbrain.org> 393 */ 394$UTF8_LOWER_TO_UPPER = array( 395 0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042, 396 0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100, 397 0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393, 398 0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C, 399 0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F, 400 0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E, 401 0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3, 402 0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A, 403 0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9, 404 0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C, 405 0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4, 406 0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164, 407 0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156, 408 0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118, 409 0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128, 410 0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428, 411 0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055, 412 0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A, 413 0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC, 414 0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0, 415 0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D, 416 0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0, 417 0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5, 418 0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA, 419 0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045, 420 0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F, 421 0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048, 422 0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6, 423 0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407, 424 0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395, 425 0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396, 426 0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051, 427 0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408, 428 0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F, 429 0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126, 430 0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C, 431 0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E, 432 0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB, 433 0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421, 434 0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A, 435 0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102, 436 0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9, 437 0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122, 438); 439 440/** 441 * UTF-8 Case lookup table 442 * 443 * This lookuptable defines the lower case letters to their correspponding 444 * upper case letter in UTF-8 (it does so by flipping $UTF8_LOWER_TO_UPPER) 445 * 446 * @author Andreas Gohr <andi@splitbrain.org> 447 */ 448$UTF8_UPPER_TO_LOWER = @array_flip($UTF8_LOWER_TO_UPPER); 449 450/** 451 * UTF-8 lookup table for lower case accented letters 452 * 453 * This lookuptable defines replacements for accented characters from the ASCII-7 454 * range. This are lower case letters only. 455 * 456 * @author Andreas Gohr <andi@splitbrain.org> 457 * @see utf8_deaccent() 458 */ 459$UTF8_LOWER_ACCENTS = array( 460 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 461 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 462 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 463 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 464 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 465 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 466 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 467 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 468 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 469 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 470 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 471 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 472 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 473 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 474 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 475); 476 477/** 478 * UTF-8 lookup table for upper case accented letters 479 * 480 * This lookuptable defines replacements for accented characters from the ASCII-7 481 * range. This are upper case letters only. 482 * 483 * @author Andreas Gohr <andi@splitbrain.org> 484 * @see utf8_deaccent() 485 */ 486$UTF8_UPPER_ACCENTS = array( 487 'à' => 'A', 'ô' => 'O', 'ď' => 'D', 'ḟ' => 'F', 'ë' => 'E', 'š' => 'S', 'ơ' => 'O', 488 'ß' => 'Ss', 'ă' => 'A', 'ř' => 'R', 'ț' => 'T', 'ň' => 'N', 'ā' => 'A', 'ķ' => 'K', 489 'ŝ' => 'S', 'ỳ' => 'Y', 'ņ' => 'N', 'ĺ' => 'L', 'ħ' => 'H', 'ṗ' => 'P', 'ó' => 'O', 490 'ú' => 'U', 'ě' => 'E', 'é' => 'E', 'ç' => 'C', 'ẁ' => 'W', 'ċ' => 'C', 'õ' => 'O', 491 'ṡ' => 'S', 'ø' => 'O', 'ģ' => 'G', 'ŧ' => 'T', 'ș' => 'S', 'ė' => 'E', 'ĉ' => 'C', 492 'ś' => 'S', 'î' => 'I', 'ű' => 'U', 'ć' => 'C', 'ę' => 'E', 'ŵ' => 'W', 'ṫ' => 'T', 493 'ū' => 'U', 'č' => 'C', 'ö' => 'Oe', 'è' => 'E', 'ŷ' => 'Y', 'ą' => 'A', 'ł' => 'L', 494 'ų' => 'U', 'ů' => 'U', 'ş' => 'S', 'ğ' => 'G', 'ļ' => 'L', 'ƒ' => 'F', 'ž' => 'Z', 495 'ẃ' => 'W', 'ḃ' => 'B', 'å' => 'A', 'ì' => 'I', 'ï' => 'I', 'ḋ' => 'D', 'ť' => 'T', 496 'ŗ' => 'R', 'ä' => 'Ae', 'í' => 'I', 'ŕ' => 'R', 'ê' => 'E', 'ü' => 'Ue', 'ò' => 'O', 497 'ē' => 'E', 'ñ' => 'N', 'ń' => 'N', 'ĥ' => 'H', 'ĝ' => 'G', 'đ' => 'D', 'ĵ' => 'J', 498 'ÿ' => 'Y', 'ũ' => 'U', 'ŭ' => 'U', 'ư' => 'U', 'ţ' => 'T', 'ý' => 'Y', 'ő' => 'O', 499 'â' => 'A', 'ľ' => 'L', 'ẅ' => 'W', 'ż' => 'Z', 'ī' => 'I', 'ã' => 'A', 'ġ' => 'G', 500 'ṁ' => 'M', 'ō' => 'O', 'ĩ' => 'I', 'ù' => 'U', 'į' => 'I', 'ź' => 'Z', 'á' => 'A', 501 'û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 502); 503 504/** 505 * UTF-8 array of common special characters 506 * 507 * This array should contain all special characters (not a letter or digit) 508 * defined in the various local charsets - it's not a complete list of non-alphanum 509 * characters in UTF-8. It's not perfect but should match most cases of special 510 * chars. 511 * 512 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 513 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d 514 * 515 * @author Andreas Gohr <andi@splitbrain.org> 516 * @see utf8_stripspecials() 517 */ 518$UTF8_SPECIAL_CHARS = array( 519 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 520 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 521 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 522 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 523 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 524 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 525 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 526 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 527 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 528 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 529 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 530 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 531 0x0385, 0x0387, 0x03b2, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 532 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 533 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 534 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 535 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 536 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 537 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 538 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 539 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 540 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 541 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 542 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 543 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 544 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 545 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 546 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 547 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 548 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 549 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 550 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 551 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 552 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 553 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 554 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 555 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 556 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 557 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 558 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 559 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 560 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 561 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 562 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 563 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 564 0x27be, 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 565 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 566 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 567 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 568 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 569); 570 571 572//Setup VIM: ex: et ts=2 enc=utf-8 : 573