1ed7b5f09Sandi<?php 282257610Sandi/** 382257610Sandi * UTF8 helper functions 482257610Sandi * 54a47269fSandi * @license LGPL (http://www.gnu.org/copyleft/lesser.html) 682257610Sandi * @author Andreas Gohr <andi@splitbrain.org> 782257610Sandi */ 882257610Sandi 982257610Sandi/** 1049c713a3Sandi * URL-Encode a filename to allow unicodecharacters 1149c713a3Sandi * 1249c713a3Sandi * Slashes are not encoded 1349c713a3Sandi * 14f59b22f0Sandi * When the second parameter is true the string will 15f59b22f0Sandi * be encoded only if non ASCII characters are detected - 16f59b22f0Sandi * This makes it safe to run it multiple times on the 17f59b22f0Sandi * same string (default is true) 18f59b22f0Sandi * 1949c713a3Sandi * @author Andreas Gohr <andi@splitbrain.org> 20f59b22f0Sandi * @see urlencode 2149c713a3Sandi */ 22f59b22f0Sandifunction utf8_encodeFN($file,$safe=true){ 23f59b22f0Sandi if($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#',$file)){ 24f59b22f0Sandi return $file; 25f59b22f0Sandi } 26f59b22f0Sandi $file = urlencode($file); 2749c713a3Sandi $file = str_replace('%2F','/',$file); 2849c713a3Sandi return $file; 2949c713a3Sandi} 3049c713a3Sandi 3149c713a3Sandi/** 3249c713a3Sandi * URL-Decode a filename 3349c713a3Sandi * 34f59b22f0Sandi * This is just a wrapper around urldecode 35f59b22f0Sandi * 3649c713a3Sandi * @author Andreas Gohr <andi@splitbrain.org> 37f59b22f0Sandi * @see urldecode 3849c713a3Sandi */ 3949c713a3Sandifunction utf8_decodeFN($file){ 40f59b22f0Sandi $file = urldecode($file); 4149c713a3Sandi return $file; 4249c713a3Sandi} 4349c713a3Sandi 44f29bd553Sandi/** 4544f669e9Sandi * Checks if a string contains 7bit ASCII only 4644f669e9Sandi * 4744f669e9Sandi * @author Andreas Gohr <andi@splitbrain.org> 4844f669e9Sandi */ 4944f669e9Sandifunction utf8_isASCII($str){ 5044f669e9Sandi for($i=0; $i<strlen($str); $i++){ 5144f669e9Sandi if(ord($str{$i}) >127) return false; 5244f669e9Sandi } 5344f669e9Sandi return true; 5444f669e9Sandi} 5544f669e9Sandi 5644f669e9Sandi/** 57e1906e6eSandi * Strips all highbyte chars 58e1906e6eSandi * 59e1906e6eSandi * Returns a pure ASCII7 string 60e1906e6eSandi * 61e1906e6eSandi * @author Andreas Gohr <andi@splitbrain.org> 62e1906e6eSandi */ 63e1906e6eSandifunction utf8_strip($str){ 64e1906e6eSandi $ascii = ''; 65e1906e6eSandi for($i=0; $i<strlen($str); $i++){ 66e1906e6eSandi if(ord($str{$i}) <128){ 67e1906e6eSandi $ascii .= $str{$i}; 68e1906e6eSandi } 69e1906e6eSandi } 70e1906e6eSandi return $ascii; 71e1906e6eSandi} 72e1906e6eSandi 73e1906e6eSandi/** 74f29bd553Sandi * Tries to detect if a string is in Unicode encoding 75f29bd553Sandi * 76f29bd553Sandi * @author <bmorel@ssi.fr> 77f29bd553Sandi * @link http://www.php.net/manual/en/function.utf8-encode.php 78f29bd553Sandi */ 79f29bd553Sandifunction utf8_check($Str) { 80f29bd553Sandi for ($i=0; $i<strlen($Str); $i++) { 81f29bd553Sandi if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb 82f29bd553Sandi elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb 83f29bd553Sandi elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb 84f29bd553Sandi elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb 85f29bd553Sandi elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb 86f29bd553Sandi elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b 87f29bd553Sandi else return false; # Does not match any model 88f29bd553Sandi for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 89f29bd553Sandi if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80)) 90f29bd553Sandi return false; 91f29bd553Sandi } 92f29bd553Sandi } 93f29bd553Sandi return true; 94f29bd553Sandi} 9549c713a3Sandi 962f954959Sandi/** 97f29317c1Sandi * Unicode aware replacement for strlen() 982f954959Sandi * 99f29317c1Sandi * utf8_decode() converts characters that are not in ISO-8859-1 100f29317c1Sandi * to '?', which, for the purpose of counting, is alright - It's 101f29317c1Sandi * even faster than mb_strlen. 1022f954959Sandi * 103f29317c1Sandi * @author <chernyshevsky at hotmail dot com> 1042f954959Sandi * @see strlen() 105f29317c1Sandi * @see utf8_decode() 1062f954959Sandi */ 1072f954959Sandifunction utf8_strlen($string){ 108dc57ef04Sandi return strlen(utf8_decode($string)); 1092f954959Sandi} 1102f954959Sandi 1117077c942Sandi/** 112f29317c1Sandi * Unicode aware replacement for substr() 1137077c942Sandi * 1147d8be200Sandi * @author lmak at NOSPAM dot iti dot gr 1157d8be200Sandi * @link http://www.php.net/manual/en/function.substr.php 1167077c942Sandi * @see substr() 1177077c942Sandi */ 1187077c942Sandifunction utf8_substr($str,$start,$length=null){ 1197d8be200Sandi preg_match_all("/./u", $str, $ar); 120f29317c1Sandi 1217d8be200Sandi if($length != null) { 1227d8be200Sandi return join("",array_slice($ar[0],$start,$length)); 1237d8be200Sandi } else { 1247d8be200Sandi return join("",array_slice($ar[0],$start)); 125f29317c1Sandi } 126f29317c1Sandi} 127f29317c1Sandi 128f29317c1Sandi/** 129dc57ef04Sandi * Unicode aware replacement for substr_replace() 130dc57ef04Sandi * 131dc57ef04Sandi * @author Andreas Gohr <andi@splitbrain.org> 132dc57ef04Sandi * @see substr_replace() 133dc57ef04Sandi */ 134dc57ef04Sandifunction utf8_substr_replace($string, $replacement, $start , $length=0 ){ 135dc57ef04Sandi $ret = ''; 136dc57ef04Sandi if($start>0) $ret .= utf8_substr($string, 0, $start); 137dc57ef04Sandi $ret .= $replacement; 138dc57ef04Sandi $ret .= utf8_substr($string, $start+$length); 139dc57ef04Sandi return $ret; 140dc57ef04Sandi} 141dc57ef04Sandi 142dc57ef04Sandi/** 143f29317c1Sandi * Unicode aware replacement for explode 144f29317c1Sandi * 145f29317c1Sandi * @TODO support third limit arg 146f29317c1Sandi * @author Harry Fuecks <hfuecks@gmail.com> 147f29317c1Sandi * @see explode(); 148f29317c1Sandi */ 149f29317c1Sandifunction utf8_explode($sep, $str) { 150f29317c1Sandi if ( $sep == '' ) { 151f29317c1Sandi trigger_error('Empty delimiter',E_USER_WARNING); 152f29317c1Sandi return FALSE; 153f29317c1Sandi } 154f29317c1Sandi 155f29317c1Sandi return preg_split('!'.preg_quote($sep,'!').'!u',$str); 156f29317c1Sandi} 157f29317c1Sandi 158f29317c1Sandi/** 159f29317c1Sandi * Unicode aware replacement for strrepalce() 160f29317c1Sandi * 161f29317c1Sandi * @todo support PHP5 count (fourth arg) 162f29317c1Sandi * @author Harry Fuecks <hfuecks@gmail.com> 163f29317c1Sandi * @see strreplace(); 164f29317c1Sandi */ 165f29317c1Sandifunction utf8_str_replace($s,$r,$str){ 166f29317c1Sandi if(!is_array($s)){ 167f29317c1Sandi $s = '!'.preg_quote($s,'!').'!u'; 168f29317c1Sandi }else{ 169f29317c1Sandi foreach ($s as $k => $v) { 170f29317c1Sandi $s[$k] = '!'.preg_quote($v).'!u'; 171f29317c1Sandi } 172f29317c1Sandi } 173f29317c1Sandi return preg_replace($s,$r,$str); 174f29317c1Sandi} 175f29317c1Sandi 176f29317c1Sandi/** 177f29317c1Sandi * Unicode aware replacement for ltrim() 178f29317c1Sandi * 179f29317c1Sandi * @author Andreas Gohr <andi@splitbrain.org> 180f29317c1Sandi * @see ltrim() 181f29317c1Sandi * @return string 182f29317c1Sandi */ 183f29317c1Sandifunction utf8_ltrim($str,$charlist=''){ 184f29317c1Sandi if($charlist == '') return ltrim($str); 185f29317c1Sandi 186f29317c1Sandi //quote charlist for use in a characterclass 187f29317c1Sandi $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 188f29317c1Sandi 189f29317c1Sandi return preg_replace('/^['.$charlist.']+/u','',$str); 190f29317c1Sandi} 191f29317c1Sandi 192f29317c1Sandi/** 193ea2eed85Sandi * Unicode aware replacement for rtrim() 194f29317c1Sandi * 195f29317c1Sandi * @author Andreas Gohr <andi@splitbrain.org> 196f29317c1Sandi * @see rtrim() 197f29317c1Sandi * @return string 198f29317c1Sandi */ 199f29317c1Sandifunction utf8_rtrim($str,$charlist=''){ 200f29317c1Sandi if($charlist == '') return rtrim($str); 201f29317c1Sandi 202f29317c1Sandi //quote charlist for use in a characterclass 203f29317c1Sandi $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist); 204f29317c1Sandi 205f29317c1Sandi return preg_replace('/['.$charlist.']+$/u','',$str); 206f29317c1Sandi} 207f29317c1Sandi 208f29317c1Sandi/** 209f29317c1Sandi * Unicode aware replacement for trim() 210f29317c1Sandi * 211f29317c1Sandi * @author Andreas Gohr <andi@splitbrain.org> 212f29317c1Sandi * @see trim() 213f29317c1Sandi * @return string 214f29317c1Sandi */ 215f29317c1Sandifunction utf8_trim($str,$charlist='') { 216f29317c1Sandi if($charlist == '') return trim($str); 217f29317c1Sandi 218f29317c1Sandi return utf8_ltrim(utf8_rtrim($str)); 219f29317c1Sandi} 220f29317c1Sandi 2212f954959Sandi 22249c713a3Sandi/** 22382257610Sandi * This is a unicode aware replacement for strtolower() 22482257610Sandi * 22582257610Sandi * Uses mb_string extension if available 22682257610Sandi * 22782257610Sandi * @author Andreas Gohr <andi@splitbrain.org> 22882257610Sandi * @see strtolower() 22982257610Sandi * @see utf8_strtoupper() 23082257610Sandi */ 23182257610Sandifunction utf8_strtolower($string){ 23282257610Sandi if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower')) 23382257610Sandi return mb_strtolower($string,'utf-8'); 23482257610Sandi 23582257610Sandi global $UTF8_UPPER_TO_LOWER; 23682257610Sandi $uni = utf8_to_unicode($string); 2372cd2db38Sandi $cnt = count($uni); 2382cd2db38Sandi for ($i=0; $i < $cnt; $i++){ 23982257610Sandi if($UTF8_UPPER_TO_LOWER[$uni[$i]]){ 24082257610Sandi $uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]]; 24182257610Sandi } 24282257610Sandi } 24382257610Sandi return unicode_to_utf8($uni); 24482257610Sandi} 24582257610Sandi 24682257610Sandi/** 24782257610Sandi * This is a unicode aware replacement for strtoupper() 24882257610Sandi * 24982257610Sandi * Uses mb_string extension if available 25082257610Sandi * 25182257610Sandi * @author Andreas Gohr <andi@splitbrain.org> 25282257610Sandi * @see strtoupper() 25382257610Sandi * @see utf8_strtoupper() 25482257610Sandi */ 25582257610Sandifunction utf8_strtoupper($string){ 25682257610Sandi if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower')) 2574d807ea6SAndreas Gohr return mb_strtoupper($string,'utf-8'); 25882257610Sandi 25982257610Sandi global $UTF8_LOWER_TO_UPPER; 26082257610Sandi $uni = utf8_to_unicode($string); 2612cd2db38Sandi $cnt = count($uni); 2622cd2db38Sandi for ($i=0; $i < $cnt; $i++){ 26382257610Sandi if($UTF8_LOWER_TO_UPPER[$uni[$i]]){ 26482257610Sandi $uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]]; 26582257610Sandi } 26682257610Sandi } 26782257610Sandi return unicode_to_utf8($uni); 26882257610Sandi} 26982257610Sandi 27082257610Sandi/** 27182257610Sandi * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 27282257610Sandi * 27382257610Sandi * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 27482257610Sandi * letters. Default is to deaccent both cases ($case = 0) 27582257610Sandi * 27682257610Sandi * @author Andreas Gohr <andi@splitbrain.org> 27782257610Sandi */ 27882257610Sandifunction utf8_deaccent($string,$case=0){ 27982257610Sandi if($case <= 0){ 28082257610Sandi global $UTF8_LOWER_ACCENTS; 28182257610Sandi $string = str_replace(array_keys($UTF8_LOWER_ACCENTS),array_values($UTF8_LOWER_ACCENTS),$string); 28282257610Sandi } 28382257610Sandi if($case >= 0){ 28482257610Sandi global $UTF8_UPPER_ACCENTS; 28582257610Sandi $string = str_replace(array_keys($UTF8_UPPER_ACCENTS),array_values($UTF8_UPPER_ACCENTS),$string); 28682257610Sandi } 28782257610Sandi return $string; 28882257610Sandi} 28982257610Sandi 29082257610Sandi/** 291099ada41Sandi * Removes special characters (nonalphanumeric) from a UTF-8 string 292099ada41Sandi * 293099ada41Sandi * This function adds the controlchars 0x00 to 0x19 to the array of 294099ada41Sandi * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 295099ada41Sandi * 296099ada41Sandi * @author Andreas Gohr <andi@splitbrain.org> 297099ada41Sandi * @param string $string The UTF8 string to strip of special chars 298099ada41Sandi * @param string $repl Replace special with this string 299b4ce25e9SAndreas Gohr * @param string $additional Additional chars to strip (used in regexp char class) 300099ada41Sandi */ 301b4ce25e9SAndreas Gohrfunction utf8_stripspecials($string,$repl='',$additional=''){ 302099ada41Sandi global $UTF8_SPECIAL_CHARS; 303099ada41Sandi 3045c812709Sandi static $specials = null; 3055c812709Sandi if(is_null($specials)){ 3065c812709Sandi $specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 3075c812709Sandi } 308099ada41Sandi 309b4ce25e9SAndreas Gohr return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 310099ada41Sandi} 311099ada41Sandi 312099ada41Sandi/** 3132f954959Sandi * This is an Unicode aware replacement for strpos 3142f954959Sandi * 3152f954959Sandi * Uses mb_string extension if available 3162f954959Sandi * 317f29317c1Sandi * @author Harry Fuecks <hfuecks@gmail.com> 3182f954959Sandi * @see strpos() 3192f954959Sandi */ 3202f954959Sandifunction utf8_strpos($haystack, $needle,$offset=0) { 3212f954959Sandi if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strpos')) 3222f954959Sandi return mb_strpos($haystack,$needle,$offset,'utf-8'); 3232f954959Sandi 324f29317c1Sandi if(!$offset){ 325f29317c1Sandi $ar = utf8_explode($needle, $str); 326f29317c1Sandi if ( count($ar) > 1 ) { 327f29317c1Sandi return utf8_strlen($ar[0]); 328f29317c1Sandi } 329f29317c1Sandi return false; 330f29317c1Sandi }else{ 331f29317c1Sandi if ( !is_int($offset) ) { 332f29317c1Sandi trigger_error('Offset must be an integer',E_USER_WARNING); 333f29317c1Sandi return false; 334f29317c1Sandi } 3352f954959Sandi 336f29317c1Sandi $str = utf8_substr($str, $offset); 337f29317c1Sandi 338f29317c1Sandi if ( false !== ($pos = utf8_strpos($str,$needle))){ 339f29317c1Sandi return $pos + $offset; 3402f954959Sandi } 341f29317c1Sandi return false; 3422f954959Sandi } 3432f954959Sandi} 3442f954959Sandi 3452f954959Sandi/** 346ea2eed85Sandi * Encodes UTF-8 characters to HTML entities 347ea2eed85Sandi * 348ea2eed85Sandi * @author <vpribish at shopping dot com> 349ea2eed85Sandi * @link http://www.php.net/manual/en/function.utf8-decode.php 350ea2eed85Sandi */ 351ea2eed85Sandifunction utf8_tohtml ($str) { 352ea2eed85Sandi $ret = ''; 353ea2eed85Sandi $max = strlen($str); 354ea2eed85Sandi $last = 0; // keeps the index of the last regular character 355ea2eed85Sandi for ($i=0; $i<$max; $i++) { 356ea2eed85Sandi $c = $str{$i}; 357ea2eed85Sandi $c1 = ord($c); 358ea2eed85Sandi if ($c1>>5 == 6) { // 110x xxxx, 110 prefix for 2 bytes unicode 359ea2eed85Sandi $ret .= substr($str, $last, $i-$last); // append all the regular characters we've passed 360ea2eed85Sandi $c1 &= 31; // remove the 3 bit two bytes prefix 361ea2eed85Sandi $c2 = ord($str{++$i}); // the next byte 362ea2eed85Sandi $c2 &= 63; // remove the 2 bit trailing byte prefix 363ea2eed85Sandi $c2 |= (($c1 & 3) << 6); // last 2 bits of c1 become first 2 of c2 364ea2eed85Sandi $c1 >>= 2; // c1 shifts 2 to the right 365ea2eed85Sandi $ret .= '&#' . ($c1 * 100 + $c2) . ';'; // this is the fastest string concatenation 366ea2eed85Sandi $last = $i+1; 367ea2eed85Sandi } 368ea2eed85Sandi } 369ea2eed85Sandi return $ret . substr($str, $last, $i); // append the last batch of regular characters 370ea2eed85Sandi} 371ea2eed85Sandi 372ea2eed85Sandi/** 373f29317c1Sandi * This function returns any UTF-8 encoded text as a list of 374f29317c1Sandi * Unicode values: 37582257610Sandi * 37682257610Sandi * @author Scott Michael Reynen <scott@randomchaos.com> 37782257610Sandi * @link http://www.randomchaos.com/document.php?source=php_and_unicode 37882257610Sandi * @see unicode_to_utf8() 37982257610Sandi */ 380*15fa0b4fSAndreas Gohrfunction utf8_to_unicode( &$str ) { 38182257610Sandi $unicode = array(); 38282257610Sandi $values = array(); 38382257610Sandi $lookingFor = 1; 38482257610Sandi 38582257610Sandi for ($i = 0; $i < strlen( $str ); $i++ ) { 38682257610Sandi $thisValue = ord( $str[ $i ] ); 38782257610Sandi if ( $thisValue < 128 ) $unicode[] = $thisValue; 38882257610Sandi else { 38982257610Sandi if ( count( $values ) == 0 ) $lookingFor = ( $thisValue < 224 ) ? 2 : 3; 39082257610Sandi $values[] = $thisValue; 39182257610Sandi if ( count( $values ) == $lookingFor ) { 39282257610Sandi $number = ( $lookingFor == 3 ) ? 39382257610Sandi ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ): 39482257610Sandi ( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 ); 39582257610Sandi $unicode[] = $number; 39682257610Sandi $values = array(); 39782257610Sandi $lookingFor = 1; 39882257610Sandi } 39982257610Sandi } 40082257610Sandi } 40182257610Sandi return $unicode; 40282257610Sandi} 40382257610Sandi 40482257610Sandi/** 405f29317c1Sandi * This function converts a Unicode array back to its UTF-8 representation 40682257610Sandi * 40782257610Sandi * @author Scott Michael Reynen <scott@randomchaos.com> 40882257610Sandi * @link http://www.randomchaos.com/document.php?source=php_and_unicode 40982257610Sandi * @see utf8_to_unicode() 41082257610Sandi */ 411*15fa0b4fSAndreas Gohrfunction unicode_to_utf8( &$str ) { 41282257610Sandi $utf8 = ''; 41382257610Sandi foreach( $str as $unicode ) { 41482257610Sandi if ( $unicode < 128 ) { 41582257610Sandi $utf8.= chr( $unicode ); 41682257610Sandi } elseif ( $unicode < 2048 ) { 41782257610Sandi $utf8.= chr( 192 + ( ( $unicode - ( $unicode % 64 ) ) / 64 ) ); 41882257610Sandi $utf8.= chr( 128 + ( $unicode % 64 ) ); 41982257610Sandi } else { 42082257610Sandi $utf8.= chr( 224 + ( ( $unicode - ( $unicode % 4096 ) ) / 4096 ) ); 42182257610Sandi $utf8.= chr( 128 + ( ( ( $unicode % 4096 ) - ( $unicode % 64 ) ) / 64 ) ); 42282257610Sandi $utf8.= chr( 128 + ( $unicode % 64 ) ); 42382257610Sandi } 42482257610Sandi } 42582257610Sandi return $utf8; 42682257610Sandi} 42782257610Sandi 42882257610Sandi/** 429*15fa0b4fSAndreas Gohr * UTF-8 to UTF-16BE conversion. 430*15fa0b4fSAndreas Gohr * 431*15fa0b4fSAndreas Gohr * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 432*15fa0b4fSAndreas Gohr */ 433*15fa0b4fSAndreas Gohrfunction utf8_to_utf16be(&$str, $bom = false) { 434*15fa0b4fSAndreas Gohr $out = $bom ? "\xFE\xFF" : ''; 435*15fa0b4fSAndreas Gohr if(!defined('UTF8_NOMBSTRING') && function_exists('mb_convert_encoding')) 436*15fa0b4fSAndreas Gohr return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 437*15fa0b4fSAndreas Gohr 438*15fa0b4fSAndreas Gohr $uni = utf8_to_unicode($str); 439*15fa0b4fSAndreas Gohr foreach($uni as $cp){ 440*15fa0b4fSAndreas Gohr $out .= pack('n',$cp); 441*15fa0b4fSAndreas Gohr } 442*15fa0b4fSAndreas Gohr return $out; 443*15fa0b4fSAndreas Gohr} 444*15fa0b4fSAndreas Gohr 445*15fa0b4fSAndreas Gohr/** 446*15fa0b4fSAndreas Gohr * UTF-8 to UTF-16BE conversion. 447*15fa0b4fSAndreas Gohr * 448*15fa0b4fSAndreas Gohr * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 449*15fa0b4fSAndreas Gohr */ 450*15fa0b4fSAndreas Gohrfunction utf16be_to_utf8(&$str) { 451*15fa0b4fSAndreas Gohr $uni = unpack('n*',$str); 452*15fa0b4fSAndreas Gohr return unicode_to_utf8($uni); 453*15fa0b4fSAndreas Gohr} 454*15fa0b4fSAndreas Gohr 455*15fa0b4fSAndreas Gohr/** 45682257610Sandi * UTF-8 Case lookup table 45782257610Sandi * 45882257610Sandi * This lookuptable defines the upper case letters to their correspponding 45982257610Sandi * lower case letter in UTF-8 46082257610Sandi * 46182257610Sandi * @author Andreas Gohr <andi@splitbrain.org> 46282257610Sandi */ 46382257610Sandi$UTF8_LOWER_TO_UPPER = array( 46482257610Sandi 0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042, 46582257610Sandi 0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100, 46682257610Sandi 0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393, 46782257610Sandi 0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C, 46882257610Sandi 0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F, 46982257610Sandi 0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E, 47082257610Sandi 0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3, 47182257610Sandi 0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A, 47282257610Sandi 0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9, 47382257610Sandi 0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C, 47482257610Sandi 0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4, 47582257610Sandi 0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164, 47682257610Sandi 0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156, 47782257610Sandi 0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118, 47882257610Sandi 0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128, 47982257610Sandi 0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428, 48082257610Sandi 0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055, 48182257610Sandi 0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A, 48282257610Sandi 0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC, 48382257610Sandi 0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0, 48482257610Sandi 0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D, 48582257610Sandi 0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0, 48682257610Sandi 0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5, 48782257610Sandi 0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA, 48882257610Sandi 0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045, 48982257610Sandi 0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F, 49082257610Sandi 0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048, 49182257610Sandi 0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6, 49282257610Sandi 0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407, 49382257610Sandi 0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395, 49482257610Sandi 0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396, 49582257610Sandi 0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051, 49682257610Sandi 0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408, 49782257610Sandi 0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F, 49882257610Sandi 0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126, 49982257610Sandi 0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C, 50082257610Sandi 0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E, 50182257610Sandi 0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB, 50282257610Sandi 0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421, 50382257610Sandi 0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A, 50482257610Sandi 0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102, 50582257610Sandi 0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9, 50682257610Sandi 0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122, 50782257610Sandi); 50882257610Sandi 50982257610Sandi/** 51082257610Sandi * UTF-8 Case lookup table 51182257610Sandi * 51282257610Sandi * This lookuptable defines the lower case letters to their correspponding 51382257610Sandi * upper case letter in UTF-8 (it does so by flipping $UTF8_LOWER_TO_UPPER) 51482257610Sandi * 51582257610Sandi * @author Andreas Gohr <andi@splitbrain.org> 51682257610Sandi */ 51782257610Sandi$UTF8_UPPER_TO_LOWER = @array_flip($UTF8_LOWER_TO_UPPER); 51882257610Sandi 51982257610Sandi/** 52082257610Sandi * UTF-8 lookup table for lower case accented letters 52182257610Sandi * 52282257610Sandi * This lookuptable defines replacements for accented characters from the ASCII-7 52382257610Sandi * range. This are lower case letters only. 52482257610Sandi * 52582257610Sandi * @author Andreas Gohr <andi@splitbrain.org> 52682257610Sandi * @see utf8_deaccent() 52782257610Sandi */ 52882257610Sandi$UTF8_LOWER_ACCENTS = array( 52982257610Sandi 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', 53082257610Sandi 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', 53182257610Sandi 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', 53282257610Sandi 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', 53382257610Sandi 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', 53482257610Sandi 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 53582257610Sandi 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', 53682257610Sandi 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', 53782257610Sandi 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', 53882257610Sandi 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 53982257610Sandi 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', 54082257610Sandi 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', 54182257610Sandi 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', 54282257610Sandi 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 5430c59b0cfSandi 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 54482257610Sandi); 54582257610Sandi 54682257610Sandi/** 54782257610Sandi * UTF-8 lookup table for upper case accented letters 54882257610Sandi * 54982257610Sandi * This lookuptable defines replacements for accented characters from the ASCII-7 55082257610Sandi * range. This are upper case letters only. 55182257610Sandi * 55282257610Sandi * @author Andreas Gohr <andi@splitbrain.org> 55382257610Sandi * @see utf8_deaccent() 55482257610Sandi */ 55582257610Sandi$UTF8_UPPER_ACCENTS = array( 556df3ecd55SAndreas Gohr 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', 557df3ecd55SAndreas Gohr 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 558df3ecd55SAndreas Gohr 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', 559df3ecd55SAndreas Gohr 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', 560df3ecd55SAndreas Gohr 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', 561df3ecd55SAndreas Gohr 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', 562df3ecd55SAndreas Gohr 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', 563df3ecd55SAndreas Gohr 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', 564df3ecd55SAndreas Gohr 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 565df3ecd55SAndreas Gohr 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', 566df3ecd55SAndreas Gohr 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', 567df3ecd55SAndreas Gohr 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', 568df3ecd55SAndreas Gohr 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', 569df3ecd55SAndreas Gohr 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', 570df3ecd55SAndreas Gohr 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 57182257610Sandi); 57282257610Sandi 573099ada41Sandi/** 574099ada41Sandi * UTF-8 array of common special characters 575099ada41Sandi * 576099ada41Sandi * This array should contain all special characters (not a letter or digit) 577099ada41Sandi * defined in the various local charsets - it's not a complete list of non-alphanum 578099ada41Sandi * characters in UTF-8. It's not perfect but should match most cases of special 579099ada41Sandi * chars. 580099ada41Sandi * 581099ada41Sandi * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 5825c812709Sandi * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d 583099ada41Sandi * 584099ada41Sandi * @author Andreas Gohr <andi@splitbrain.org> 585099ada41Sandi * @see utf8_stripspecials() 586099ada41Sandi */ 587099ada41Sandi$UTF8_SPECIAL_CHARS = array( 588099ada41Sandi 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 5895c812709Sandi 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 5905c812709Sandi 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 5915c812709Sandi 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 592099ada41Sandi 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 593099ada41Sandi 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 594099ada41Sandi 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 595099ada41Sandi 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 596099ada41Sandi 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 597099ada41Sandi 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 598099ada41Sandi 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 599099ada41Sandi 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 600099ada41Sandi 0x0385, 0x0387, 0x03b2, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 601099ada41Sandi 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 602099ada41Sandi 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 603099ada41Sandi 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 604099ada41Sandi 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 605099ada41Sandi 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 606099ada41Sandi 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 607099ada41Sandi 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 608099ada41Sandi 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 609099ada41Sandi 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 610099ada41Sandi 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 611099ada41Sandi 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 612099ada41Sandi 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 613099ada41Sandi 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 614099ada41Sandi 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 615099ada41Sandi 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 616099ada41Sandi 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 617099ada41Sandi 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 618099ada41Sandi 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 619099ada41Sandi 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 620099ada41Sandi 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 621099ada41Sandi 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 622099ada41Sandi 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 623099ada41Sandi 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 624099ada41Sandi 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 625099ada41Sandi 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 626099ada41Sandi 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 627099ada41Sandi 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 628099ada41Sandi 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 629099ada41Sandi 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 630099ada41Sandi 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 631099ada41Sandi 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 632099ada41Sandi 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 633099ada41Sandi 0x27be, 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 634099ada41Sandi 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 635099ada41Sandi 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 636099ada41Sandi 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 637099ada41Sandi 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 638099ada41Sandi); 639340756e4Sandi 640340756e4Sandi 641340756e4Sandi//Setup VIM: ex: et ts=2 enc=utf-8 : 642