1<?php 2 3namespace MaxMind\Db\Reader; 4 5// @codingStandardsIgnoreLine 6use RuntimeException; 7 8/** 9 * @ignore 10 * 11 * We subtract 1 from the log to protect against precision loss. 12 */ 13\define(__NAMESPACE__ . '\_MM_MAX_INT_BYTES', (log(PHP_INT_MAX, 2) - 1) / 8); 14 15class Decoder 16{ 17 private $fileStream; 18 private $pointerBase; 19 private $pointerBaseByteSize; 20 // This is only used for unit testing 21 private $pointerTestHack; 22 private $switchByteOrder; 23 24 /** @ignore */ 25 const _EXTENDED = 0; 26 /** @ignore */ 27 const _POINTER = 1; 28 /** @ignore */ 29 const _UTF8_STRING = 2; 30 /** @ignore */ 31 const _DOUBLE = 3; 32 /** @ignore */ 33 const _BYTES = 4; 34 /** @ignore */ 35 const _UINT16 = 5; 36 /** @ignore */ 37 const _UINT32 = 6; 38 /** @ignore */ 39 const _MAP = 7; 40 /** @ignore */ 41 const _INT32 = 8; 42 /** @ignore */ 43 const _UINT64 = 9; 44 /** @ignore */ 45 const _UINT128 = 10; 46 /** @ignore */ 47 const _ARRAY = 11; 48 /** @ignore */ 49 const _CONTAINER = 12; 50 /** @ignore */ 51 const _END_MARKER = 13; 52 /** @ignore */ 53 const _BOOLEAN = 14; 54 /** @ignore */ 55 const _FLOAT = 15; 56 57 public function __construct( 58 $fileStream, 59 $pointerBase = 0, 60 $pointerTestHack = false 61 ) { 62 $this->fileStream = $fileStream; 63 $this->pointerBase = $pointerBase; 64 65 $this->pointerBaseByteSize = $pointerBase > 0 ? log($pointerBase, 2) / 8 : 0; 66 $this->pointerTestHack = $pointerTestHack; 67 68 $this->switchByteOrder = $this->isPlatformLittleEndian(); 69 } 70 71 public function decode($offset) 72 { 73 list(, $ctrlByte) = unpack( 74 'C', 75 Util::read($this->fileStream, $offset, 1) 76 ); 77 ++$offset; 78 79 $type = $ctrlByte >> 5; 80 81 // Pointers are a special case, we don't read the next $size bytes, we 82 // use the size to determine the length of the pointer and then follow 83 // it. 84 if ($type === self::_POINTER) { 85 list($pointer, $offset) = $this->decodePointer($ctrlByte, $offset); 86 87 // for unit testing 88 if ($this->pointerTestHack) { 89 return [$pointer]; 90 } 91 92 list($result) = $this->decode($pointer); 93 94 return [$result, $offset]; 95 } 96 97 if ($type === self::_EXTENDED) { 98 list(, $nextByte) = unpack( 99 'C', 100 Util::read($this->fileStream, $offset, 1) 101 ); 102 103 $type = $nextByte + 7; 104 105 if ($type < 8) { 106 throw new InvalidDatabaseException( 107 'Something went horribly wrong in the decoder. An extended type ' 108 . 'resolved to a type number < 8 (' 109 . $type 110 . ')' 111 ); 112 } 113 114 ++$offset; 115 } 116 117 list($size, $offset) = $this->sizeFromCtrlByte($ctrlByte, $offset); 118 119 return $this->decodeByType($type, $offset, $size); 120 } 121 122 private function decodeByType($type, $offset, $size) 123 { 124 switch ($type) { 125 case self::_MAP: 126 return $this->decodeMap($size, $offset); 127 case self::_ARRAY: 128 return $this->decodeArray($size, $offset); 129 case self::_BOOLEAN: 130 return [$this->decodeBoolean($size), $offset]; 131 } 132 133 $newOffset = $offset + $size; 134 $bytes = Util::read($this->fileStream, $offset, $size); 135 switch ($type) { 136 case self::_BYTES: 137 case self::_UTF8_STRING: 138 return [$bytes, $newOffset]; 139 case self::_DOUBLE: 140 $this->verifySize(8, $size); 141 142 return [$this->decodeDouble($bytes), $newOffset]; 143 case self::_FLOAT: 144 $this->verifySize(4, $size); 145 146 return [$this->decodeFloat($bytes), $newOffset]; 147 case self::_INT32: 148 return [$this->decodeInt32($bytes, $size), $newOffset]; 149 case self::_UINT16: 150 case self::_UINT32: 151 case self::_UINT64: 152 case self::_UINT128: 153 return [$this->decodeUint($bytes, $size), $newOffset]; 154 default: 155 throw new InvalidDatabaseException( 156 'Unknown or unexpected type: ' . $type 157 ); 158 } 159 } 160 161 private function verifySize($expected, $actual) 162 { 163 if ($expected !== $actual) { 164 throw new InvalidDatabaseException( 165 "The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)" 166 ); 167 } 168 } 169 170 private function decodeArray($size, $offset) 171 { 172 $array = []; 173 174 for ($i = 0; $i < $size; ++$i) { 175 list($value, $offset) = $this->decode($offset); 176 array_push($array, $value); 177 } 178 179 return [$array, $offset]; 180 } 181 182 private function decodeBoolean($size) 183 { 184 return $size === 0 ? false : true; 185 } 186 187 private function decodeDouble($bits) 188 { 189 // This assumes IEEE 754 doubles, but most (all?) modern platforms 190 // use them. 191 // 192 // We are not using the "E" format as that was only added in 193 // 7.0.15 and 7.1.1. As such, we must switch byte order on 194 // little endian machines. 195 list(, $double) = unpack('d', $this->maybeSwitchByteOrder($bits)); 196 197 return $double; 198 } 199 200 private function decodeFloat($bits) 201 { 202 // This assumes IEEE 754 floats, but most (all?) modern platforms 203 // use them. 204 // 205 // We are not using the "G" format as that was only added in 206 // 7.0.15 and 7.1.1. As such, we must switch byte order on 207 // little endian machines. 208 list(, $float) = unpack('f', $this->maybeSwitchByteOrder($bits)); 209 210 return $float; 211 } 212 213 private function decodeInt32($bytes, $size) 214 { 215 switch ($size) { 216 case 0: 217 return 0; 218 case 1: 219 case 2: 220 case 3: 221 $bytes = str_pad($bytes, 4, "\x00", STR_PAD_LEFT); 222 break; 223 case 4: 224 break; 225 default: 226 throw new InvalidDatabaseException( 227 "The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)" 228 ); 229 } 230 231 list(, $int) = unpack('l', $this->maybeSwitchByteOrder($bytes)); 232 233 return $int; 234 } 235 236 private function decodeMap($size, $offset) 237 { 238 $map = []; 239 240 for ($i = 0; $i < $size; ++$i) { 241 list($key, $offset) = $this->decode($offset); 242 list($value, $offset) = $this->decode($offset); 243 $map[$key] = $value; 244 } 245 246 return [$map, $offset]; 247 } 248 249 private function decodePointer($ctrlByte, $offset) 250 { 251 $pointerSize = (($ctrlByte >> 3) & 0x3) + 1; 252 253 $buffer = Util::read($this->fileStream, $offset, $pointerSize); 254 $offset = $offset + $pointerSize; 255 256 switch ($pointerSize) { 257 case 1: 258 $packed = (pack('C', $ctrlByte & 0x7)) . $buffer; 259 list(, $pointer) = unpack('n', $packed); 260 $pointer += $this->pointerBase; 261 break; 262 case 2: 263 $packed = "\x00" . (pack('C', $ctrlByte & 0x7)) . $buffer; 264 list(, $pointer) = unpack('N', $packed); 265 $pointer += $this->pointerBase + 2048; 266 break; 267 case 3: 268 $packed = (pack('C', $ctrlByte & 0x7)) . $buffer; 269 270 // It is safe to use 'N' here, even on 32 bit machines as the 271 // first bit is 0. 272 list(, $pointer) = unpack('N', $packed); 273 $pointer += $this->pointerBase + 526336; 274 break; 275 case 4: 276 // We cannot use unpack here as we might overflow on 32 bit 277 // machines 278 $pointerOffset = $this->decodeUint($buffer, $pointerSize); 279 280 $byteLength = $pointerSize + $this->pointerBaseByteSize; 281 282 if ($byteLength <= _MM_MAX_INT_BYTES) { 283 $pointer = $pointerOffset + $this->pointerBase; 284 } elseif (\extension_loaded('gmp')) { 285 $pointer = gmp_strval(gmp_add($pointerOffset, $this->pointerBase)); 286 } elseif (\extension_loaded('bcmath')) { 287 $pointer = bcadd($pointerOffset, $this->pointerBase); 288 } else { 289 throw new RuntimeException( 290 'The gmp or bcmath extension must be installed to read this database.' 291 ); 292 } 293 } 294 295 return [$pointer, $offset]; 296 } 297 298 private function decodeUint($bytes, $byteLength) 299 { 300 if ($byteLength === 0) { 301 return 0; 302 } 303 304 $integer = 0; 305 306 for ($i = 0; $i < $byteLength; ++$i) { 307 $part = \ord($bytes[$i]); 308 309 // We only use gmp or bcmath if the final value is too big 310 if ($byteLength <= _MM_MAX_INT_BYTES) { 311 $integer = ($integer << 8) + $part; 312 } elseif (\extension_loaded('gmp')) { 313 $integer = gmp_strval(gmp_add(gmp_mul($integer, 256), $part)); 314 } elseif (\extension_loaded('bcmath')) { 315 $integer = bcadd(bcmul($integer, 256), $part); 316 } else { 317 throw new RuntimeException( 318 'The gmp or bcmath extension must be installed to read this database.' 319 ); 320 } 321 } 322 323 return $integer; 324 } 325 326 private function sizeFromCtrlByte($ctrlByte, $offset) 327 { 328 $size = $ctrlByte & 0x1f; 329 330 if ($size < 29) { 331 return [$size, $offset]; 332 } 333 334 $bytesToRead = $size - 28; 335 $bytes = Util::read($this->fileStream, $offset, $bytesToRead); 336 337 if ($size === 29) { 338 $size = 29 + \ord($bytes); 339 } elseif ($size === 30) { 340 list(, $adjust) = unpack('n', $bytes); 341 $size = 285 + $adjust; 342 } elseif ($size > 30) { 343 list(, $adjust) = unpack('N', "\x00" . $bytes); 344 $size = ($adjust & (0x0FFFFFFF >> (32 - (8 * $bytesToRead)))) 345 + 65821; 346 } 347 348 return [$size, $offset + $bytesToRead]; 349 } 350 351 private function maybeSwitchByteOrder($bytes) 352 { 353 return $this->switchByteOrder ? strrev($bytes) : $bytes; 354 } 355 356 private function isPlatformLittleEndian() 357 { 358 $testint = 0x00FF; 359 $packed = pack('S', $testint); 360 361 return $testint === current(unpack('v', $packed)); 362 } 363} 364