1<?php 2 3require_once(HTML2PS_DIR.'encoding.inc.php'); 4require_once(HTML2PS_DIR.'encoding.entities.inc.php'); 5require_once(HTML2PS_DIR.'encoding.glyphs.inc.php'); 6require_once(HTML2PS_DIR.'encoding.iso-8859-1.inc.php'); 7require_once(HTML2PS_DIR.'encoding.iso-8859-2.inc.php'); 8require_once(HTML2PS_DIR.'encoding.iso-8859-3.inc.php'); 9require_once(HTML2PS_DIR.'encoding.iso-8859-4.inc.php'); 10require_once(HTML2PS_DIR.'encoding.iso-8859-5.inc.php'); 11require_once(HTML2PS_DIR.'encoding.iso-8859-6.inc.php'); 12require_once(HTML2PS_DIR.'encoding.iso-8859-7.inc.php'); 13require_once(HTML2PS_DIR.'encoding.iso-8859-8.inc.php'); 14require_once(HTML2PS_DIR.'encoding.iso-8859-9.inc.php'); 15require_once(HTML2PS_DIR.'encoding.iso-8859-10.inc.php'); 16require_once(HTML2PS_DIR.'encoding.iso-8859-11.inc.php'); 17require_once(HTML2PS_DIR.'encoding.iso-8859-13.inc.php'); 18require_once(HTML2PS_DIR.'encoding.iso-8859-14.inc.php'); 19require_once(HTML2PS_DIR.'encoding.iso-8859-15.inc.php'); 20require_once(HTML2PS_DIR.'encoding.koi8-r.inc.php'); 21require_once(HTML2PS_DIR.'encoding.cp866.inc.php'); 22require_once(HTML2PS_DIR.'encoding.windows-1250.inc.php'); 23require_once(HTML2PS_DIR.'encoding.windows-1251.inc.php'); 24require_once(HTML2PS_DIR.'encoding.windows-1252.inc.php'); 25require_once(HTML2PS_DIR.'encoding.dingbats.inc.php'); 26require_once(HTML2PS_DIR.'encoding.symbol.inc.php'); 27 28// TODO: this works for PS encoding names only 29class ManagerEncoding { 30 var $_encodings = array(); 31 32 /** 33 * Number of the current custom encoding vector 34 */ 35 var $_custom_vector_index = 0; 36 37 var $_utf8_mapping; 38 39 function ManagerEncoding() { 40 $this->new_custom_encoding_vector(); 41 } 42 43 /** 44 * Add new custom symbol not present in the existing encoding 45 * vectors. 46 * 47 * Note: encoding vector this character was placed to should be 48 * extracted via get_current_custom_encoding_name immediately after 49 * add_custom_char call. 50 * 51 * @param char[2] $char UCS-2 character (represented as 2-octet 52 * string) 53 * 54 * @return char index of this character in custom encoding vector 55 */ 56 function add_custom_char($char) { 57 // Check if current encoding vector is full; if it is, we should 58 // add a new one. 59 if ($this->is_custom_encoding_full()) { 60 $this->new_custom_encoding_vector(); 61 }; 62 63 // Get name of the custom encoding where new character should be 64 // placed 65 $vector_name = $this->get_current_custom_encoding_name(); 66 67 // Get (zero-based) index of this character in the encoding vector 68 $index = count($this->_encodings[$vector_name]); 69 70 // Add new character to the custom encoding vector 71 $this->_encodings[$vector_name][chr($index)] = $char; 72 73 // Add new character to the UTF8 mapping table 74 $this->_utf8_mapping[code_to_utf8($char)][$vector_name] = chr($index); 75 76 return chr($index); 77 } 78 79 function generate_mapping($mapping_file) { 80 global $g_utf8_converters; 81 82 $this->_utf8_mapping = array(); 83 foreach (array_keys($g_utf8_converters) as $encoding) { 84 $flipped = array_flip($g_utf8_converters[$encoding][0]); 85 foreach ($flipped as $utf => $code) { 86 $this->_utf8_mapping[code_to_utf8($utf)][$encoding] = $code; 87 }; 88 }; 89 90 $file = fopen($mapping_file,'w'); 91 fwrite($file, serialize($this->_utf8_mapping)); 92 fclose($file); 93 } 94 95 function &get() { 96 global $g_manager_encodings; 97 return $g_manager_encodings; 98 } 99 100 function get_canonized_encoding_name($encoding) { 101 global $g_encoding_aliases; 102 103 if (isset($g_encoding_aliases[$encoding])) { 104 return $g_encoding_aliases[$encoding]; 105 }; 106 107 return $encoding; 108 } 109 110 function get_current_custom_encoding_name() { 111 return $this->get_custom_encoding_name($this->get_custom_vector_index()); 112 } 113 114 function get_custom_encoding_name($index) { 115 return sprintf('custom%d', 116 $index); 117 } 118 119 function get_custom_vector_index() { 120 return $this->_custom_vector_index; 121 } 122 123 function get_encoding_glyphs($encoding) { 124 $vector = $this->get_encoding_vector($encoding); 125 if (is_null($vector)) { 126 error_log(sprintf("Cannot get encoding vector for encoding '%s'", $encoding)); 127 return null; 128 }; 129 return $this->vector_to_glyphs($vector); 130 } 131 132 /** 133 * Get an encoding vector (array containing 256 elements; every 134 * element is an ucs-2 encoded character) 135 * 136 * @param $encoding Encoding name 137 * 138 * @return Array encoding vector; null if this encoding is not known to the script 139 */ 140 function get_encoding_vector($encoding) { 141 $encoding = $this->get_canonized_encoding_name($encoding); 142 143 global $g_utf8_converters; 144 if (isset($g_utf8_converters[$encoding])) { 145 $vector = $g_utf8_converters[$encoding][0]; 146 } elseif (isset($this->_encodings[$encoding])) { 147 $vector = $this->_encodings[$encoding]; 148 } else { 149 return null; 150 }; 151 152 for ($i = 0; $i <= 255; $i++) { 153 if (!isset($vector[chr($i)])) { 154 $vector[chr($i)] = 0xFFFF; 155 }; 156 }; 157 158 return $vector; 159 } 160 161 function get_glyph_to_code_mapping($encoding) { 162 $vector = $this->get_encoding_vector($encoding); 163 164 $result = array(); 165 foreach ($vector as $code => $uccode) { 166 if (isset($GLOBALS['g_unicode_glyphs'][$uccode])) { 167 $result[$GLOBALS['g_unicode_glyphs'][$uccode]][] = $code; 168 }; 169 }; 170 171 return $result; 172 } 173 174 function get_mapping($char) { 175 if (!isset($this->_utf8_mapping)) { 176 $this->load_mapping(CACHE_DIR . 'utf8.mappings.dat'); 177 }; 178 179 if (!isset($this->_utf8_mapping[$char])) { 180 return null; 181 }; 182 return $this->_utf8_mapping[$char]; 183 } 184 185 function get_next_utf8_char($raw_content, &$ptr) { 186 if ((ord($raw_content[$ptr]) & 0xF0) == 0xF0) { 187 $charlen = 4; 188 } elseif ((ord($raw_content[$ptr]) & 0xE0) == 0xE0) { 189 $charlen = 3; 190 } elseif ((ord($raw_content[$ptr]) & 0xC0) == 0xC0) { 191 $charlen = 2; 192 } else { 193 $charlen = 1; 194 }; 195 196 $char = substr($raw_content,$ptr,$charlen); 197 $ptr += $charlen; 198 199 return $char; 200 } 201 202 function get_ps_encoding_vector($encoding) { 203 $vector = $this->get_encoding_vector($encoding); 204 205 $result = "/".$encoding." [ \n"; 206 for ($i=0; $i<256; $i++) { 207 if ($i % 10 == 0) { $result .= "\n"; }; 208 209 // ! Note the order of array checking; optimizing interpreters may break this 210 if (isset($vector[chr($i)]) && isset($GLOBALS['g_unicode_glyphs'][$vector[chr($i)]])) { 211 $result .= " /".$GLOBALS['g_unicode_glyphs'][$vector[chr($i)]]; 212 } else { 213 $result .= " /.notdef"; 214 }; 215 }; 216 $result .= " ] readonly def"; 217 218 return $result; 219 } 220 221 function is_custom_encoding($encoding) { 222 return preg_match('/^custom\d+$/', $encoding); 223 } 224 225 function is_custom_encoding_full() { 226 return count($this->_encodings[$this->get_current_custom_encoding_name()]) >= 256; 227 } 228 229 function load_mapping($mapping_file) { 230 if (!is_readable($mapping_file)) { 231 $this->generate_mapping($mapping_file); 232 } else { 233 $this->_utf8_mapping = unserialize(file_get_contents($mapping_file)); 234 }; 235 } 236 237 /** 238 * Create new custom 256-characters encoding vector. Reserve first 239 * 32 symbols for system use. 240 * 241 * Custom encoding vectors have names 'customX' when X stand for the 242 * encoding index. 243 */ 244 function new_custom_encoding_vector() { 245 $initial_vector = array(); 246 for ($i = 0; $i <= 32; $i++) { 247 $initial_vector[chr($i)] = chr($i); 248 }; 249 $this->register_encoding(sprintf('custom%d', 250 $this->next_custom_vector_index()), 251 $initial_vector); 252 } 253 254 /** 255 * Returns index for the next custom encoding 256 */ 257 function next_custom_vector_index() { 258 return ++$this->_custom_vector_index; 259 } 260 261 function register_encoding($name, $vector) { 262 $this->_encodings[$name] = $vector; 263 } 264 265 function to_utf8($word, $encoding) { 266 $vector = $this->get_encoding_vector($encoding); 267 268 $converted = ''; 269 for ($i=0, $size=strlen($word); $i < $size; $i++) { 270 $converted .= code_to_utf8($vector[$word{$i}]); 271 }; 272 273 return $converted; 274 } 275 276 function vector_to_glyphs($vector) { 277 $result = array(); 278 279 foreach ($vector as $code => $ucs2) { 280 if (isset($GLOBALS['g_unicode_glyphs'][$ucs2])) { 281 $result[$code] = $GLOBALS['g_unicode_glyphs'][$ucs2]; 282 } elseif ($ucs2 == 0xFFFF) { 283 $result[$code] = ".notdef"; 284 } else { 285 // Use "Unicode and Glyph Names" mapping from Adobe 286 // http://partners.adobe.com/public/developer/opentype/index_glyph.html 287 $result[$code] = sprintf("u%04X", $ucs2); 288 }; 289 }; 290 291 return $result; 292 } 293} 294 295global $g_manager_encodings; 296$g_manager_encodings = new ManagerEncoding; 297?>