1<?php 2 3namespace splitbrain\PHPArchive; 4 5/** 6 * Class Tar 7 * 8 * Creates or extracts Tar archives. Supports gz and bzip compression 9 * 10 * Long pathnames (>100 chars) are supported in POSIX ustar and GNU longlink formats. 11 * 12 * @author Andreas Gohr <andi@splitbrain.org> 13 * @package splitbrain\PHPArchive 14 * @license MIT 15 */ 16class Tar extends Archive 17{ 18 19 protected $file = ''; 20 protected $comptype = Archive::COMPRESS_AUTO; 21 protected $complevel = 9; 22 protected $fh; 23 protected $memory = ''; 24 protected $closed = true; 25 protected $writeaccess = false; 26 27 /** 28 * Sets the compression to use 29 * 30 * @param int $level Compression level (0 to 9) 31 * @param int $type Type of compression to use (use COMPRESS_* constants) 32 * @return mixed 33 */ 34 public function setCompression($level = 9, $type = Archive::COMPRESS_AUTO) 35 { 36 $this->compressioncheck($type); 37 $this->comptype = $type; 38 $this->complevel = $level; 39 if($level == 0) $this->comptype = Archive::COMPRESS_NONE; 40 if($type == Archive::COMPRESS_NONE) $this->complevel = 0; 41 } 42 43 /** 44 * Open an existing TAR file for reading 45 * 46 * @param string $file 47 * @throws ArchiveIOException 48 */ 49 public function open($file) 50 { 51 $this->file = $file; 52 53 // update compression to mach file 54 if ($this->comptype == Tar::COMPRESS_AUTO) { 55 $this->setCompression($this->complevel, $this->filetype($file)); 56 } 57 58 // open file handles 59 if ($this->comptype === Archive::COMPRESS_GZIP) { 60 $this->fh = @gzopen($this->file, 'rb'); 61 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 62 $this->fh = @bzopen($this->file, 'r'); 63 } else { 64 $this->fh = @fopen($this->file, 'rb'); 65 } 66 67 if (!$this->fh) { 68 throw new ArchiveIOException('Could not open file for reading: '.$this->file); 69 } 70 $this->closed = false; 71 } 72 73 /** 74 * Read the contents of a TAR archive 75 * 76 * This function lists the files stored in the archive 77 * 78 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. 79 * Reopen the file with open() again if you want to do additional operations 80 * 81 * @throws ArchiveIOException 82 * @returns FileInfo[] 83 */ 84 public function contents() 85 { 86 if ($this->closed || !$this->file) { 87 throw new ArchiveIOException('Can not read from a closed archive'); 88 } 89 90 $result = array(); 91 while ($read = $this->readbytes(512)) { 92 $header = $this->parseHeader($read); 93 if (!is_array($header)) { 94 continue; 95 } 96 97 $this->skipbytes(ceil($header['size'] / 512) * 512); 98 $result[] = $this->header2fileinfo($header); 99 } 100 101 $this->close(); 102 return $result; 103 } 104 105 /** 106 * Extract an existing TAR archive 107 * 108 * The $strip parameter allows you to strip a certain number of path components from the filenames 109 * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when 110 * an integer is passed as $strip. 111 * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix, 112 * the prefix will be stripped. It is recommended to give prefixes with a trailing slash. 113 * 114 * By default this will extract all files found in the archive. You can restrict the output using the $include 115 * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If 116 * $include is set only files that match this expression will be extracted. Files that match the $exclude 117 * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against 118 * stripped filenames as described above. 119 * 120 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. 121 * Reopen the file with open() again if you want to do additional operations 122 * 123 * @param string $outdir the target directory for extracting 124 * @param int|string $strip either the number of path components or a fixed prefix to strip 125 * @param string $exclude a regular expression of files to exclude 126 * @param string $include a regular expression of files to include 127 * @throws ArchiveIOException 128 * @return FileInfo[] 129 */ 130 public function extract($outdir, $strip = '', $exclude = '', $include = '') 131 { 132 if ($this->closed || !$this->file) { 133 throw new ArchiveIOException('Can not read from a closed archive'); 134 } 135 136 $outdir = rtrim($outdir, '/'); 137 @mkdir($outdir, 0777, true); 138 if (!is_dir($outdir)) { 139 throw new ArchiveIOException("Could not create directory '$outdir'"); 140 } 141 142 $extracted = array(); 143 while ($dat = $this->readbytes(512)) { 144 // read the file header 145 $header = $this->parseHeader($dat); 146 if (!is_array($header)) { 147 continue; 148 } 149 $fileinfo = $this->header2fileinfo($header); 150 151 // apply strip rules 152 $fileinfo->strip($strip); 153 154 // skip unwanted files 155 if (!strlen($fileinfo->getPath()) || !$fileinfo->match($include, $exclude)) { 156 $this->skipbytes(ceil($header['size'] / 512) * 512); 157 continue; 158 } 159 160 // create output directory 161 $output = $outdir.'/'.$fileinfo->getPath(); 162 $directory = ($fileinfo->getIsdir()) ? $output : dirname($output); 163 @mkdir($directory, 0777, true); 164 165 // extract data 166 if (!$fileinfo->getIsdir()) { 167 $fp = fopen($output, "wb"); 168 if (!$fp) { 169 throw new ArchiveIOException('Could not open file for writing: '.$output); 170 } 171 172 $size = floor($header['size'] / 512); 173 for ($i = 0; $i < $size; $i++) { 174 fwrite($fp, $this->readbytes(512), 512); 175 } 176 if (($header['size'] % 512) != 0) { 177 fwrite($fp, $this->readbytes(512), $header['size'] % 512); 178 } 179 180 fclose($fp); 181 touch($output, $fileinfo->getMtime()); 182 chmod($output, $fileinfo->getMode()); 183 } else { 184 $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories 185 } 186 187 $extracted[] = $fileinfo; 188 } 189 190 $this->close(); 191 return $extracted; 192 } 193 194 /** 195 * Create a new TAR file 196 * 197 * If $file is empty, the tar file will be created in memory 198 * 199 * @param string $file 200 * @throws ArchiveIOException 201 */ 202 public function create($file = '') 203 { 204 $this->file = $file; 205 $this->memory = ''; 206 $this->fh = 0; 207 208 if ($this->file) { 209 // determine compression 210 if ($this->comptype == Archive::COMPRESS_AUTO) { 211 $this->setCompression($this->complevel, $this->filetype($file)); 212 } 213 214 if ($this->comptype === Archive::COMPRESS_GZIP) { 215 $this->fh = @gzopen($this->file, 'wb'.$this->complevel); 216 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 217 $this->fh = @bzopen($this->file, 'w'); 218 } else { 219 $this->fh = @fopen($this->file, 'wb'); 220 } 221 222 if (!$this->fh) { 223 throw new ArchiveIOException('Could not open file for writing: '.$this->file); 224 } 225 } 226 $this->writeaccess = true; 227 $this->closed = false; 228 } 229 230 /** 231 * Add a file to the current TAR archive using an existing file in the filesystem 232 * 233 * @param string $file path to the original file 234 * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data, empty to take from original 235 * @throws ArchiveIOException 236 */ 237 public function addFile($file, $fileinfo = '') 238 { 239 if (is_string($fileinfo)) { 240 $fileinfo = FileInfo::fromPath($file, $fileinfo); 241 } 242 243 if ($this->closed) { 244 throw new ArchiveIOException('Archive has been closed, files can no longer be added'); 245 } 246 247 $fp = fopen($file, 'rb'); 248 if (!$fp) { 249 throw new ArchiveIOException('Could not open file for reading: '.$file); 250 } 251 252 // create file header 253 $this->writeFileHeader($fileinfo); 254 255 // write data 256 while (!feof($fp)) { 257 $data = fread($fp, 512); 258 if ($data === false) { 259 break; 260 } 261 if ($data === '') { 262 break; 263 } 264 $packed = pack("a512", $data); 265 $this->writebytes($packed); 266 } 267 fclose($fp); 268 } 269 270 /** 271 * Add a file to the current TAR archive using the given $data as content 272 * 273 * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data 274 * @param string $data binary content of the file to add 275 * @throws ArchiveIOException 276 */ 277 public function addData($fileinfo, $data) 278 { 279 if (is_string($fileinfo)) { 280 $fileinfo = new FileInfo($fileinfo); 281 } 282 283 if ($this->closed) { 284 throw new ArchiveIOException('Archive has been closed, files can no longer be added'); 285 } 286 287 $len = strlen($data); 288 $fileinfo->setSize($len); 289 $this->writeFileHeader($fileinfo); 290 291 for ($s = 0; $s < $len; $s += 512) { 292 $this->writebytes(pack("a512", substr($data, $s, 512))); 293 } 294 } 295 296 /** 297 * Add the closing footer to the archive if in write mode, close all file handles 298 * 299 * After a call to this function no more data can be added to the archive, for 300 * read access no reading is allowed anymore 301 * 302 * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which 303 * consists of two 512 blocks of zero bytes" 304 * 305 * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134 306 */ 307 public function close() 308 { 309 if ($this->closed) { 310 return; 311 } // we did this already 312 313 // write footer 314 if ($this->writeaccess) { 315 $this->writebytes(pack("a512", "")); 316 $this->writebytes(pack("a512", "")); 317 } 318 319 // close file handles 320 if ($this->file) { 321 if ($this->comptype === Archive::COMPRESS_GZIP) { 322 gzclose($this->fh); 323 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 324 bzclose($this->fh); 325 } else { 326 fclose($this->fh); 327 } 328 329 $this->file = ''; 330 $this->fh = 0; 331 } 332 333 $this->writeaccess = false; 334 $this->closed = true; 335 } 336 337 /** 338 * Returns the created in-memory archive data 339 * 340 * This implicitly calls close() on the Archive 341 */ 342 public function getArchive() 343 { 344 $this->close(); 345 346 if ($this->comptype === Archive::COMPRESS_AUTO) { 347 $this->comptype = Archive::COMPRESS_NONE; 348 } 349 350 if ($this->comptype === Archive::COMPRESS_GZIP) { 351 return gzcompress($this->memory, $this->complevel); 352 } 353 if ($this->comptype === Archive::COMPRESS_BZIP) { 354 return bzcompress($this->memory); 355 } 356 return $this->memory; 357 } 358 359 /** 360 * Save the created in-memory archive data 361 * 362 * Note: It more memory effective to specify the filename in the create() function and 363 * let the library work on the new file directly. 364 * 365 * @param string $file 366 * @throws ArchiveIOException 367 */ 368 public function save($file) 369 { 370 if ($this->comptype === Archive::COMPRESS_AUTO) { 371 $this->setCompression($this->complevel, $this->filetype($file)); 372 } 373 374 if (!file_put_contents($file, $this->getArchive())) { 375 throw new ArchiveIOException('Could not write to file: '.$file); 376 } 377 } 378 379 /** 380 * Read from the open file pointer 381 * 382 * @param int $length bytes to read 383 * @return string 384 */ 385 protected function readbytes($length) 386 { 387 if ($this->comptype === Archive::COMPRESS_GZIP) { 388 return @gzread($this->fh, $length); 389 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 390 return @bzread($this->fh, $length); 391 } else { 392 return @fread($this->fh, $length); 393 } 394 } 395 396 /** 397 * Write to the open filepointer or memory 398 * 399 * @param string $data 400 * @throws ArchiveIOException 401 * @return int number of bytes written 402 */ 403 protected function writebytes($data) 404 { 405 if (!$this->file) { 406 $this->memory .= $data; 407 $written = strlen($data); 408 } elseif ($this->comptype === Archive::COMPRESS_GZIP) { 409 $written = @gzwrite($this->fh, $data); 410 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 411 $written = @bzwrite($this->fh, $data); 412 } else { 413 $written = @fwrite($this->fh, $data); 414 } 415 if ($written === false) { 416 throw new ArchiveIOException('Failed to write to archive stream'); 417 } 418 return $written; 419 } 420 421 /** 422 * Skip forward in the open file pointer 423 * 424 * This is basically a wrapper around seek() (and a workaround for bzip2) 425 * 426 * @param int $bytes seek to this position 427 */ 428 function skipbytes($bytes) 429 { 430 if ($this->comptype === Archive::COMPRESS_GZIP) { 431 @gzseek($this->fh, $bytes, SEEK_CUR); 432 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 433 // there is no seek in bzip2, we simply read on 434 // bzread allows to read a max of 8kb at once 435 while($bytes) { 436 $toread = min(8192, $bytes); 437 @bzread($this->fh, $toread); 438 $bytes -= $toread; 439 } 440 } else { 441 @fseek($this->fh, $bytes, SEEK_CUR); 442 } 443 } 444 445 /** 446 * Write the given file metat data as header 447 * 448 * @param FileInfo $fileinfo 449 */ 450 protected function writeFileHeader(FileInfo $fileinfo) 451 { 452 $this->writeRawFileHeader( 453 $fileinfo->getPath(), 454 $fileinfo->getUid(), 455 $fileinfo->getGid(), 456 $fileinfo->getMode(), 457 $fileinfo->getSize(), 458 $fileinfo->getMtime(), 459 $fileinfo->getIsdir() ? '5' : '0' 460 ); 461 } 462 463 /** 464 * Write a file header to the stream 465 * 466 * @param string $name 467 * @param int $uid 468 * @param int $gid 469 * @param int $perm 470 * @param int $size 471 * @param int $mtime 472 * @param string $typeflag Set to '5' for directories 473 */ 474 protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '') 475 { 476 // handle filename length restrictions 477 $prefix = ''; 478 $namelen = strlen($name); 479 if ($namelen > 100) { 480 $file = basename($name); 481 $dir = dirname($name); 482 if (strlen($file) > 100 || strlen($dir) > 155) { 483 // we're still too large, let's use GNU longlink 484 $this->writeRawFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L'); 485 for ($s = 0; $s < $namelen; $s += 512) { 486 $this->writebytes(pack("a512", substr($name, $s, 512))); 487 } 488 $name = substr($name, 0, 100); // cut off name 489 } else { 490 // we're fine when splitting, use POSIX ustar 491 $prefix = $dir; 492 $name = $file; 493 } 494 } 495 496 // values are needed in octal 497 $uid = sprintf("%6s ", decoct($uid)); 498 $gid = sprintf("%6s ", decoct($gid)); 499 $perm = sprintf("%6s ", decoct($perm)); 500 $size = sprintf("%11s ", decoct($size)); 501 $mtime = sprintf("%11s", decoct($mtime)); 502 503 $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime); 504 $data_last = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, ""); 505 506 for ($i = 0, $chks = 0; $i < 148; $i++) { 507 $chks += ord($data_first[$i]); 508 } 509 510 for ($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++) { 511 $chks += ord($data_last[$j]); 512 } 513 514 $this->writebytes($data_first); 515 516 $chks = pack("a8", sprintf("%6s ", decoct($chks))); 517 $this->writebytes($chks.$data_last); 518 } 519 520 /** 521 * Decode the given tar file header 522 * 523 * @param string $block a 512 byte block containing the header data 524 * @return array|false returns false when this was a null block 525 * @throws ArchiveCorruptedException 526 */ 527 protected function parseHeader($block) 528 { 529 if (!$block || strlen($block) != 512) { 530 throw new ArchiveCorruptedException('Unexpected length of header'); 531 } 532 533 // null byte blocks are ignored 534 if(trim($block) === '') return false; 535 536 for ($i = 0, $chks = 0; $i < 148; $i++) { 537 $chks += ord($block[$i]); 538 } 539 540 for ($i = 156, $chks += 256; $i < 512; $i++) { 541 $chks += ord($block[$i]); 542 } 543 544 $header = @unpack( 545 "a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix", 546 $block 547 ); 548 if (!$header) { 549 throw new ArchiveCorruptedException('Failed to parse header'); 550 } 551 552 $return['checksum'] = OctDec(trim($header['checksum'])); 553 if ($return['checksum'] != $chks) { 554 throw new ArchiveCorruptedException('Header does not match it\'s checksum'); 555 } 556 557 $return['filename'] = trim($header['filename']); 558 $return['perm'] = OctDec(trim($header['perm'])); 559 $return['uid'] = OctDec(trim($header['uid'])); 560 $return['gid'] = OctDec(trim($header['gid'])); 561 $return['size'] = OctDec(trim($header['size'])); 562 $return['mtime'] = OctDec(trim($header['mtime'])); 563 $return['typeflag'] = $header['typeflag']; 564 $return['link'] = trim($header['link']); 565 $return['uname'] = trim($header['uname']); 566 $return['gname'] = trim($header['gname']); 567 568 // Handle ustar Posix compliant path prefixes 569 if (trim($header['prefix'])) { 570 $return['filename'] = trim($header['prefix']).'/'.$return['filename']; 571 } 572 573 // Handle Long-Link entries from GNU Tar 574 if ($return['typeflag'] == 'L') { 575 // following data block(s) is the filename 576 $filename = trim($this->readbytes(ceil($header['size'] / 512) * 512)); 577 // next block is the real header 578 $block = $this->readbytes(512); 579 $return = $this->parseHeader($block); 580 // overwrite the filename 581 $return['filename'] = $filename; 582 } 583 584 return $return; 585 } 586 587 /** 588 * Creates a FileInfo object from the given parsed header 589 * 590 * @param $header 591 * @return FileInfo 592 */ 593 protected function header2fileinfo($header) 594 { 595 $fileinfo = new FileInfo(); 596 $fileinfo->setPath($header['filename']); 597 $fileinfo->setMode($header['perm']); 598 $fileinfo->setUid($header['uid']); 599 $fileinfo->setGid($header['gid']); 600 $fileinfo->setSize($header['size']); 601 $fileinfo->setMtime($header['mtime']); 602 $fileinfo->setOwner($header['uname']); 603 $fileinfo->setGroup($header['gname']); 604 $fileinfo->setIsdir((bool) $header['typeflag']); 605 606 return $fileinfo; 607 } 608 609 /** 610 * Checks if the given compression type is available and throws an exception if not 611 * 612 * @param $comptype 613 * @throws ArchiveIllegalCompressionException 614 */ 615 protected function compressioncheck($comptype) 616 { 617 if ($comptype === Archive::COMPRESS_GZIP && !function_exists('gzopen')) { 618 throw new ArchiveIllegalCompressionException('No gzip support available'); 619 } 620 621 if ($comptype === Archive::COMPRESS_BZIP && !function_exists('bzopen')) { 622 throw new ArchiveIllegalCompressionException('No bzip2 support available'); 623 } 624 } 625 626 /** 627 * Guesses the wanted compression from the given file 628 * 629 * Uses magic bytes for existing files, the file extension otherwise 630 * 631 * You don't need to call this yourself. It's used when you pass Archive::COMPRESS_AUTO somewhere 632 * 633 * @param string $file 634 * @return int 635 */ 636 public function filetype($file) 637 { 638 // for existing files, try to read the magic bytes 639 if(file_exists($file) && is_readable($file) && filesize($file) > 5) { 640 $fh = fopen($file, 'rb'); 641 if(!$fh) return false; 642 $magic = fread($fh, 5); 643 fclose($fh); 644 645 if(strpos($magic, "\x42\x5a") === 0) return Archive::COMPRESS_BZIP; 646 if(strpos($magic, "\x1f\x8b") === 0) return Archive::COMPRESS_GZIP; 647 } 648 649 // otherwise rely on file name 650 $file = strtolower($file); 651 if (substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') { 652 return Archive::COMPRESS_GZIP; 653 } elseif (substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') { 654 return Archive::COMPRESS_BZIP; 655 } 656 657 return Archive::COMPRESS_NONE; 658 } 659} 660