1<?php
2
3namespace MaxMind\Db\Reader;
4
5// @codingStandardsIgnoreLine
6use RuntimeException;
7
8/**
9 * @ignore
10 *
11 * We subtract 1 from the log to protect against precision loss.
12 */
13\define(__NAMESPACE__ . '\_MM_MAX_INT_BYTES', (log(PHP_INT_MAX, 2) - 1) / 8);
14
15class Decoder
16{
17    private $fileStream;
18    private $pointerBase;
19    private $pointerBaseByteSize;
20    // This is only used for unit testing
21    private $pointerTestHack;
22    private $switchByteOrder;
23
24    /** @ignore */
25    const _EXTENDED = 0;
26    /** @ignore */
27    const _POINTER = 1;
28    /** @ignore */
29    const _UTF8_STRING = 2;
30    /** @ignore */
31    const _DOUBLE = 3;
32    /** @ignore */
33    const _BYTES = 4;
34    /** @ignore */
35    const _UINT16 = 5;
36    /** @ignore */
37    const _UINT32 = 6;
38    /** @ignore */
39    const _MAP = 7;
40    /** @ignore */
41    const _INT32 = 8;
42    /** @ignore */
43    const _UINT64 = 9;
44    /** @ignore */
45    const _UINT128 = 10;
46    /** @ignore */
47    const _ARRAY = 11;
48    /** @ignore */
49    const _CONTAINER = 12;
50    /** @ignore */
51    const _END_MARKER = 13;
52    /** @ignore */
53    const _BOOLEAN = 14;
54    /** @ignore */
55    const _FLOAT = 15;
56
57    public function __construct(
58        $fileStream,
59        $pointerBase = 0,
60        $pointerTestHack = false
61    ) {
62        $this->fileStream = $fileStream;
63        $this->pointerBase = $pointerBase;
64
65        $this->pointerBaseByteSize = $pointerBase > 0 ? log($pointerBase, 2) / 8 : 0;
66        $this->pointerTestHack = $pointerTestHack;
67
68        $this->switchByteOrder = $this->isPlatformLittleEndian();
69    }
70
71    public function decode($offset)
72    {
73        list(, $ctrlByte) = unpack(
74            'C',
75            Util::read($this->fileStream, $offset, 1)
76        );
77        ++$offset;
78
79        $type = $ctrlByte >> 5;
80
81        // Pointers are a special case, we don't read the next $size bytes, we
82        // use the size to determine the length of the pointer and then follow
83        // it.
84        if ($type === self::_POINTER) {
85            list($pointer, $offset) = $this->decodePointer($ctrlByte, $offset);
86
87            // for unit testing
88            if ($this->pointerTestHack) {
89                return [$pointer];
90            }
91
92            list($result) = $this->decode($pointer);
93
94            return [$result, $offset];
95        }
96
97        if ($type === self::_EXTENDED) {
98            list(, $nextByte) = unpack(
99                'C',
100                Util::read($this->fileStream, $offset, 1)
101            );
102
103            $type = $nextByte + 7;
104
105            if ($type < 8) {
106                throw new InvalidDatabaseException(
107                    'Something went horribly wrong in the decoder. An extended type '
108                    . 'resolved to a type number < 8 ('
109                    . $type
110                    . ')'
111                );
112            }
113
114            ++$offset;
115        }
116
117        list($size, $offset) = $this->sizeFromCtrlByte($ctrlByte, $offset);
118
119        return $this->decodeByType($type, $offset, $size);
120    }
121
122    private function decodeByType($type, $offset, $size)
123    {
124        switch ($type) {
125            case self::_MAP:
126                return $this->decodeMap($size, $offset);
127            case self::_ARRAY:
128                return $this->decodeArray($size, $offset);
129            case self::_BOOLEAN:
130                return [$this->decodeBoolean($size), $offset];
131        }
132
133        $newOffset = $offset + $size;
134        $bytes = Util::read($this->fileStream, $offset, $size);
135        switch ($type) {
136            case self::_BYTES:
137            case self::_UTF8_STRING:
138                return [$bytes, $newOffset];
139            case self::_DOUBLE:
140                $this->verifySize(8, $size);
141
142                return [$this->decodeDouble($bytes), $newOffset];
143            case self::_FLOAT:
144                $this->verifySize(4, $size);
145
146                return [$this->decodeFloat($bytes), $newOffset];
147            case self::_INT32:
148                return [$this->decodeInt32($bytes, $size), $newOffset];
149            case self::_UINT16:
150            case self::_UINT32:
151            case self::_UINT64:
152            case self::_UINT128:
153                return [$this->decodeUint($bytes, $size), $newOffset];
154            default:
155                throw new InvalidDatabaseException(
156                    'Unknown or unexpected type: ' . $type
157                );
158        }
159    }
160
161    private function verifySize($expected, $actual)
162    {
163        if ($expected !== $actual) {
164            throw new InvalidDatabaseException(
165                "The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)"
166            );
167        }
168    }
169
170    private function decodeArray($size, $offset)
171    {
172        $array = [];
173
174        for ($i = 0; $i < $size; ++$i) {
175            list($value, $offset) = $this->decode($offset);
176            array_push($array, $value);
177        }
178
179        return [$array, $offset];
180    }
181
182    private function decodeBoolean($size)
183    {
184        return $size === 0 ? false : true;
185    }
186
187    private function decodeDouble($bits)
188    {
189        // This assumes IEEE 754 doubles, but most (all?) modern platforms
190        // use them.
191        //
192        // We are not using the "E" format as that was only added in
193        // 7.0.15 and 7.1.1. As such, we must switch byte order on
194        // little endian machines.
195        list(, $double) = unpack('d', $this->maybeSwitchByteOrder($bits));
196
197        return $double;
198    }
199
200    private function decodeFloat($bits)
201    {
202        // This assumes IEEE 754 floats, but most (all?) modern platforms
203        // use them.
204        //
205        // We are not using the "G" format as that was only added in
206        // 7.0.15 and 7.1.1. As such, we must switch byte order on
207        // little endian machines.
208        list(, $float) = unpack('f', $this->maybeSwitchByteOrder($bits));
209
210        return $float;
211    }
212
213    private function decodeInt32($bytes, $size)
214    {
215        switch ($size) {
216            case 0:
217                return 0;
218            case 1:
219            case 2:
220            case 3:
221                $bytes = str_pad($bytes, 4, "\x00", STR_PAD_LEFT);
222                break;
223            case 4:
224                break;
225            default:
226                throw new InvalidDatabaseException(
227                    "The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)"
228                );
229        }
230
231        list(, $int) = unpack('l', $this->maybeSwitchByteOrder($bytes));
232
233        return $int;
234    }
235
236    private function decodeMap($size, $offset)
237    {
238        $map = [];
239
240        for ($i = 0; $i < $size; ++$i) {
241            list($key, $offset) = $this->decode($offset);
242            list($value, $offset) = $this->decode($offset);
243            $map[$key] = $value;
244        }
245
246        return [$map, $offset];
247    }
248
249    private function decodePointer($ctrlByte, $offset)
250    {
251        $pointerSize = (($ctrlByte >> 3) & 0x3) + 1;
252
253        $buffer = Util::read($this->fileStream, $offset, $pointerSize);
254        $offset = $offset + $pointerSize;
255
256        switch ($pointerSize) {
257            case 1:
258                $packed = (pack('C', $ctrlByte & 0x7)) . $buffer;
259                list(, $pointer) = unpack('n', $packed);
260                $pointer += $this->pointerBase;
261                break;
262            case 2:
263                $packed = "\x00" . (pack('C', $ctrlByte & 0x7)) . $buffer;
264                list(, $pointer) = unpack('N', $packed);
265                $pointer += $this->pointerBase + 2048;
266                break;
267            case 3:
268                $packed = (pack('C', $ctrlByte & 0x7)) . $buffer;
269
270                // It is safe to use 'N' here, even on 32 bit machines as the
271                // first bit is 0.
272                list(, $pointer) = unpack('N', $packed);
273                $pointer += $this->pointerBase + 526336;
274                break;
275            case 4:
276                // We cannot use unpack here as we might overflow on 32 bit
277                // machines
278                $pointerOffset = $this->decodeUint($buffer, $pointerSize);
279
280                $byteLength = $pointerSize + $this->pointerBaseByteSize;
281
282                if ($byteLength <= _MM_MAX_INT_BYTES) {
283                    $pointer = $pointerOffset + $this->pointerBase;
284                } elseif (\extension_loaded('gmp')) {
285                    $pointer = gmp_strval(gmp_add($pointerOffset, $this->pointerBase));
286                } elseif (\extension_loaded('bcmath')) {
287                    $pointer = bcadd($pointerOffset, $this->pointerBase);
288                } else {
289                    throw new RuntimeException(
290                        'The gmp or bcmath extension must be installed to read this database.'
291                    );
292                }
293        }
294
295        return [$pointer, $offset];
296    }
297
298    private function decodeUint($bytes, $byteLength)
299    {
300        if ($byteLength === 0) {
301            return 0;
302        }
303
304        $integer = 0;
305
306        for ($i = 0; $i < $byteLength; ++$i) {
307            $part = \ord($bytes[$i]);
308
309            // We only use gmp or bcmath if the final value is too big
310            if ($byteLength <= _MM_MAX_INT_BYTES) {
311                $integer = ($integer << 8) + $part;
312            } elseif (\extension_loaded('gmp')) {
313                $integer = gmp_strval(gmp_add(gmp_mul($integer, 256), $part));
314            } elseif (\extension_loaded('bcmath')) {
315                $integer = bcadd(bcmul($integer, 256), $part);
316            } else {
317                throw new RuntimeException(
318                    'The gmp or bcmath extension must be installed to read this database.'
319                );
320            }
321        }
322
323        return $integer;
324    }
325
326    private function sizeFromCtrlByte($ctrlByte, $offset)
327    {
328        $size = $ctrlByte & 0x1f;
329
330        if ($size < 29) {
331            return [$size, $offset];
332        }
333
334        $bytesToRead = $size - 28;
335        $bytes = Util::read($this->fileStream, $offset, $bytesToRead);
336
337        if ($size === 29) {
338            $size = 29 + \ord($bytes);
339        } elseif ($size === 30) {
340            list(, $adjust) = unpack('n', $bytes);
341            $size = 285 + $adjust;
342        } elseif ($size > 30) {
343            list(, $adjust) = unpack('N', "\x00" . $bytes);
344            $size = ($adjust & (0x0FFFFFFF >> (32 - (8 * $bytesToRead))))
345                + 65821;
346        }
347
348        return [$size, $offset + $bytesToRead];
349    }
350
351    private function maybeSwitchByteOrder($bytes)
352    {
353        return $this->switchByteOrder ? strrev($bytes) : $bytes;
354    }
355
356    private function isPlatformLittleEndian()
357    {
358        $testint = 0x00FF;
359        $packed = pack('S', $testint);
360
361        return $testint === current(unpack('v', $packed));
362    }
363}
364