1<?php
2
3require_once(HTML2PS_DIR.'encoding.inc.php');
4require_once(HTML2PS_DIR.'encoding.entities.inc.php');
5require_once(HTML2PS_DIR.'encoding.glyphs.inc.php');
6require_once(HTML2PS_DIR.'encoding.iso-8859-1.inc.php');
7require_once(HTML2PS_DIR.'encoding.iso-8859-2.inc.php');
8require_once(HTML2PS_DIR.'encoding.iso-8859-3.inc.php');
9require_once(HTML2PS_DIR.'encoding.iso-8859-4.inc.php');
10require_once(HTML2PS_DIR.'encoding.iso-8859-5.inc.php');
11require_once(HTML2PS_DIR.'encoding.iso-8859-6.inc.php');
12require_once(HTML2PS_DIR.'encoding.iso-8859-7.inc.php');
13require_once(HTML2PS_DIR.'encoding.iso-8859-8.inc.php');
14require_once(HTML2PS_DIR.'encoding.iso-8859-9.inc.php');
15require_once(HTML2PS_DIR.'encoding.iso-8859-10.inc.php');
16require_once(HTML2PS_DIR.'encoding.iso-8859-11.inc.php');
17require_once(HTML2PS_DIR.'encoding.iso-8859-13.inc.php');
18require_once(HTML2PS_DIR.'encoding.iso-8859-14.inc.php');
19require_once(HTML2PS_DIR.'encoding.iso-8859-15.inc.php');
20require_once(HTML2PS_DIR.'encoding.koi8-r.inc.php');
21require_once(HTML2PS_DIR.'encoding.cp866.inc.php');
22require_once(HTML2PS_DIR.'encoding.windows-1250.inc.php');
23require_once(HTML2PS_DIR.'encoding.windows-1251.inc.php');
24require_once(HTML2PS_DIR.'encoding.windows-1252.inc.php');
25require_once(HTML2PS_DIR.'encoding.dingbats.inc.php');
26require_once(HTML2PS_DIR.'encoding.symbol.inc.php');
27
28// TODO: this works for PS encoding names only
29class ManagerEncoding {
30  var $_encodings = array();
31
32  /**
33   * Number of the current custom encoding vector
34   */
35  var $_custom_vector_index = 0;
36
37  var $_utf8_mapping;
38
39  function ManagerEncoding() {
40    $this->new_custom_encoding_vector();
41  }
42
43  /**
44   * Add  new  custom symbol  not  present  in  the existing  encoding
45   * vectors.
46   *
47   * Note:  encoding vector  this character  was placed  to  should be
48   * extracted via  get_current_custom_encoding_name immediately after
49   * add_custom_char call.
50   *
51   * @param  char[2]  $char UCS-2  character  (represented as  2-octet
52   * string)
53   *
54   * @return char index of this character in custom encoding vector
55   */
56  function add_custom_char($char) {
57    // Check if current  encoding vector is full; if  it is, we should
58    // add a new one.
59    if ($this->is_custom_encoding_full()) {
60      $this->new_custom_encoding_vector();
61    };
62
63    // Get name of  the custom encoding where new  character should be
64    // placed
65    $vector_name = $this->get_current_custom_encoding_name();
66
67    // Get (zero-based) index of this character in the encoding vector
68    $index = count($this->_encodings[$vector_name]);
69
70    // Add new character to the custom encoding vector
71    $this->_encodings[$vector_name][chr($index)] = $char;
72
73    // Add new character to the UTF8 mapping table
74    $this->_utf8_mapping[code_to_utf8($char)][$vector_name] = chr($index);
75
76    return chr($index);
77  }
78
79  function generate_mapping($mapping_file) {
80    global $g_utf8_converters;
81
82    $this->_utf8_mapping = array();
83    foreach (array_keys($g_utf8_converters) as $encoding) {
84      $flipped = array_flip($g_utf8_converters[$encoding][0]);
85      foreach ($flipped as $utf => $code) {
86        $this->_utf8_mapping[code_to_utf8($utf)][$encoding] = $code;
87      };
88    };
89
90    $file = fopen($mapping_file,'w');
91    fwrite($file, serialize($this->_utf8_mapping));
92    fclose($file);
93  }
94
95  function &get() {
96    global $g_manager_encodings;
97    return $g_manager_encodings;
98  }
99
100  function get_canonized_encoding_name($encoding) {
101    global $g_encoding_aliases;
102
103    if (isset($g_encoding_aliases[$encoding])) {
104      return $g_encoding_aliases[$encoding];
105    };
106
107    return $encoding;
108  }
109
110  function get_current_custom_encoding_name() {
111    return $this->get_custom_encoding_name($this->get_custom_vector_index());
112  }
113
114  function get_custom_encoding_name($index) {
115    return sprintf('custom%d',
116                   $index);
117  }
118
119  function get_custom_vector_index() {
120    return $this->_custom_vector_index;
121  }
122
123  function get_encoding_glyphs($encoding) {
124    $vector = $this->get_encoding_vector($encoding);
125    if (is_null($vector)) {
126      error_log(sprintf("Cannot get encoding vector for encoding '%s'", $encoding));
127      return null;
128    };
129    return $this->vector_to_glyphs($vector);
130  }
131
132  /**
133   * Get  an encoding  vector  (array containing  256 elements;  every
134   * element is an ucs-2 encoded character)
135   *
136   * @param $encoding Encoding name
137   *
138   * @return Array encoding vector; null if this encoding is not known to the script
139   */
140  function get_encoding_vector($encoding) {
141    $encoding = $this->get_canonized_encoding_name($encoding);
142
143    global $g_utf8_converters;
144    if (isset($g_utf8_converters[$encoding])) {
145      $vector = $g_utf8_converters[$encoding][0];
146    } elseif (isset($this->_encodings[$encoding])) {
147      $vector = $this->_encodings[$encoding];
148    } else {
149      return null;
150    };
151
152    for ($i = 0; $i <= 255; $i++) {
153      if (!isset($vector[chr($i)])) {
154        $vector[chr($i)] = 0xFFFF;
155      };
156    };
157
158    return $vector;
159  }
160
161  function get_glyph_to_code_mapping($encoding) {
162    $vector = $this->get_encoding_vector($encoding);
163
164    $result = array();
165    foreach ($vector as $code => $uccode) {
166      if (isset($GLOBALS['g_unicode_glyphs'][$uccode])) {
167        $result[$GLOBALS['g_unicode_glyphs'][$uccode]][] = $code;
168      };
169    };
170
171    return $result;
172  }
173
174  function get_mapping($char) {
175    if (!isset($this->_utf8_mapping)) {
176      $this->load_mapping(CACHE_DIR . 'utf8.mappings.dat');
177    };
178
179    if (!isset($this->_utf8_mapping[$char])) {
180      return null;
181    };
182    return $this->_utf8_mapping[$char];
183  }
184
185  function get_next_utf8_char($raw_content, &$ptr) {
186    if ((ord($raw_content[$ptr]) & 0xF0) == 0xF0) {
187      $charlen = 4;
188    } elseif ((ord($raw_content[$ptr]) & 0xE0) == 0xE0) {
189      $charlen = 3;
190    } elseif ((ord($raw_content[$ptr]) & 0xC0) == 0xC0) {
191      $charlen = 2;
192    } else {
193      $charlen = 1;
194    };
195
196    $char = substr($raw_content,$ptr,$charlen);
197    $ptr += $charlen;
198
199    return $char;
200  }
201
202  function get_ps_encoding_vector($encoding) {
203    $vector = $this->get_encoding_vector($encoding);
204
205    $result = "/".$encoding." [ \n";
206    for ($i=0; $i<256; $i++) {
207      if ($i % 10 == 0) { $result .= "\n"; };
208
209      // ! Note the order of array checking; optimizing interpreters may break this
210      if (isset($vector[chr($i)]) && isset($GLOBALS['g_unicode_glyphs'][$vector[chr($i)]])) {
211        $result .= " /".$GLOBALS['g_unicode_glyphs'][$vector[chr($i)]];
212      } else {
213        $result .= " /.notdef";
214      };
215    };
216    $result .= " ] readonly def";
217
218    return $result;
219  }
220
221  function is_custom_encoding($encoding) {
222    return preg_match('/^custom\d+$/', $encoding);
223  }
224
225  function is_custom_encoding_full() {
226    return count($this->_encodings[$this->get_current_custom_encoding_name()]) >= 256;
227  }
228
229  function load_mapping($mapping_file) {
230    if (!is_readable($mapping_file)) {
231      $this->generate_mapping($mapping_file);
232    } else {
233      $this->_utf8_mapping = unserialize(file_get_contents($mapping_file));
234    };
235  }
236
237  /**
238   * Create new custom  256-characters encoding vector.  Reserve first
239   * 32 symbols for system use.
240   *
241   * Custom encoding vectors have names 'customX' when X stand for the
242   * encoding index.
243   */
244  function new_custom_encoding_vector() {
245    $initial_vector = array();
246    for ($i = 0; $i <= 32; $i++) {
247      $initial_vector[chr($i)] = chr($i);
248    };
249    $this->register_encoding(sprintf('custom%d',
250                                     $this->next_custom_vector_index()),
251                             $initial_vector);
252  }
253
254  /**
255   * Returns index for the next custom encoding
256   */
257  function next_custom_vector_index() {
258    return ++$this->_custom_vector_index;
259  }
260
261  function register_encoding($name, $vector) {
262    $this->_encodings[$name] = $vector;
263  }
264
265  function to_utf8($word, $encoding) {
266    $vector = $this->get_encoding_vector($encoding);
267
268    $converted = '';
269    for ($i=0, $size=strlen($word); $i < $size; $i++) {
270      $converted .= code_to_utf8($vector[$word{$i}]);
271    };
272
273    return $converted;
274  }
275
276  function vector_to_glyphs($vector) {
277    $result = array();
278
279    foreach ($vector as $code => $ucs2) {
280      if (isset($GLOBALS['g_unicode_glyphs'][$ucs2])) {
281        $result[$code] = $GLOBALS['g_unicode_glyphs'][$ucs2];
282      } elseif ($ucs2 == 0xFFFF) {
283        $result[$code] = ".notdef";
284      } else {
285        // Use "Unicode and Glyph Names" mapping from Adobe
286        // http://partners.adobe.com/public/developer/opentype/index_glyph.html
287        $result[$code] = sprintf("u%04X", $ucs2);
288      };
289    };
290
291    return $result;
292  }
293}
294
295global $g_manager_encodings;
296$g_manager_encodings = new ManagerEncoding;
297?>