1<?php
2// $Header: /cvsroot/html2ps/encoding.inc.php,v 1.7 2006/06/25 13:55:36 Konstantin Exp $
3
4/**
5 * Converts a hexadecimal string  representing UCS-2 character code to
6 * UTF-8 encoding
7 */
8function hex_to_utf8($code) {
9  return code_to_utf8(hexdec($code));
10}
11
12/**
13 * Converts an UTF8-encoded character to UCS-2 integer code
14 * TODO: handle sequence incorrect length
15 */
16function utf8_to_code($utf8) {
17  $code = 0;
18
19  if ((ord($utf8{0}) & 0xF0) == 0xF0) {
20    // 4-byte sequence
21    $code =
22      ((ord($utf8{0}) & 0x07) << 18) |
23      ((ord($utf8{1}) & 0x3F) << 12) |
24      ((ord($utf8{2}) & 0x3F) <<  6) |
25      (ord($utf8{3}) & 0x3F);
26  } elseif ((ord($utf8{0}) & 0xE0) === 0xE0) {
27    // 3-byte sequence
28    $code =
29      ((ord($utf8{0}) & 0x0F) << 12) |
30      ((ord($utf8{1}) & 0x3F) <<  6) |
31      (ord($utf8{2}) & 0x3F);
32  } elseif ((ord($utf8{0}) & 0xC0) === 0xC0) {
33
34    // 2-byte sequence
35    $code =
36      ((ord($utf8{0}) & 0x1F) << 6) |
37      (ord($utf8{1}) & 0x3F);
38  } else {
39    // Single-byte sequence
40    $code = ord($utf8);
41  };
42
43  return $code;
44}
45
46/**
47 * Converts an integer UCS-2 character code to UTF-8 encoding
48 */
49function code_to_utf8($code) {
50  if ($code < 128) {
51    return chr($code);
52  };
53
54  if ($code < 2048) {
55    return chr(0xC0 | (($code >> 6) & 0x1F)) . chr(0x80 | ($code & 0x3F));
56  };
57
58  if ($code < 65536) {
59    return chr(0xE0 | (($code >> 12) & 0x0F)) . chr(0x80 | (($code >> 6) & 0x3F)) . chr(0x80 | ($code & 0x3F));
60  };
61
62  return
63    chr(0xF0 | (($code >> 18) & 0x07)) .
64    chr(0x80 | (($code >> 12) & 0x3F)) .
65    chr(0x80 | (($code >>  6) & 0x3F)) .
66    chr(0x80 | ($code & 0x3F));
67}
68
69?>