1<?php 2 3namespace splitbrain\PHPArchive; 4 5/** 6 * Class Tar 7 * 8 * Creates or extracts Tar archives. Supports gz and bzip compression 9 * 10 * Long pathnames (>100 chars) are supported in POSIX ustar and GNU longlink formats. 11 * 12 * @author Andreas Gohr <andi@splitbrain.org> 13 * @package splitbrain\PHPArchive 14 * @license MIT 15 */ 16class Tar extends Archive 17{ 18 19 protected $file = ''; 20 protected $comptype = Archive::COMPRESS_AUTO; 21 protected $complevel = 9; 22 protected $fh; 23 protected $memory = ''; 24 protected $closed = true; 25 protected $writeaccess = false; 26 27 /** 28 * Sets the compression to use 29 * 30 * @param int $level Compression level (0 to 9) 31 * @param int $type Type of compression to use (use COMPRESS_* constants) 32 * @throws ArchiveIllegalCompressionException 33 */ 34 public function setCompression($level = 9, $type = Archive::COMPRESS_AUTO) 35 { 36 $this->compressioncheck($type); 37 if ($level < -1 || $level > 9) { 38 throw new ArchiveIllegalCompressionException('Compression level should be between -1 and 9'); 39 } 40 $this->comptype = $type; 41 $this->complevel = $level; 42 if($level == 0) $this->comptype = Archive::COMPRESS_NONE; 43 if($type == Archive::COMPRESS_NONE) $this->complevel = 0; 44 } 45 46 /** 47 * Open an existing TAR file for reading 48 * 49 * @param string $file 50 * @throws ArchiveIOException 51 * @throws ArchiveIllegalCompressionException 52 */ 53 public function open($file) 54 { 55 $this->file = $file; 56 57 // update compression to mach file 58 if ($this->comptype == Tar::COMPRESS_AUTO) { 59 $this->setCompression($this->complevel, $this->filetype($file)); 60 } 61 62 // open file handles 63 if ($this->comptype === Archive::COMPRESS_GZIP) { 64 $this->fh = @gzopen($this->file, 'rb'); 65 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 66 $this->fh = @bzopen($this->file, 'r'); 67 } else { 68 $this->fh = @fopen($this->file, 'rb'); 69 } 70 71 if (!$this->fh) { 72 throw new ArchiveIOException('Could not open file for reading: '.$this->file); 73 } 74 $this->closed = false; 75 } 76 77 /** 78 * Read the contents of a TAR archive 79 * 80 * This function lists the files stored in the archive 81 * 82 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. 83 * Reopen the file with open() again if you want to do additional operations 84 * 85 * @throws ArchiveIOException 86 * @throws ArchiveCorruptedException 87 * @returns FileInfo[] 88 */ 89 public function contents() 90 { 91 $result = array(); 92 93 foreach ($this->yieldContents() as $fileinfo) { 94 $result[] = $fileinfo; 95 } 96 97 return $result; 98 } 99 100 /** 101 * Read the contents of a TAR archive and return each entry using yield 102 * for memory efficiency. 103 * 104 * @see contents() 105 * @throws ArchiveIOException 106 * @throws ArchiveCorruptedException 107 * @returns FileInfo[] 108 */ 109 public function yieldContents() 110 { 111 if ($this->closed || !$this->file) { 112 throw new ArchiveIOException('Can not read from a closed archive'); 113 } 114 115 while ($read = $this->readbytes(512)) { 116 $header = $this->parseHeader($read); 117 if (!is_array($header)) { 118 continue; 119 } 120 121 $this->skipbytes(ceil($header['size'] / 512) * 512); 122 yield $this->header2fileinfo($header); 123 } 124 125 $this->close(); 126 127 } 128 129 /** 130 * Extract an existing TAR archive 131 * 132 * The $strip parameter allows you to strip a certain number of path components from the filenames 133 * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when 134 * an integer is passed as $strip. 135 * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix, 136 * the prefix will be stripped. It is recommended to give prefixes with a trailing slash. 137 * 138 * By default this will extract all files found in the archive. You can restrict the output using the $include 139 * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If 140 * $include is set only files that match this expression will be extracted. Files that match the $exclude 141 * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against 142 * stripped filenames as described above. 143 * 144 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. 145 * Reopen the file with open() again if you want to do additional operations 146 * 147 * @param string $outdir the target directory for extracting 148 * @param int|string $strip either the number of path components or a fixed prefix to strip 149 * @param string $exclude a regular expression of files to exclude 150 * @param string $include a regular expression of files to include 151 * @throws ArchiveIOException 152 * @throws ArchiveCorruptedException 153 * @return FileInfo[] 154 */ 155 public function extract($outdir, $strip = '', $exclude = '', $include = '') 156 { 157 if ($this->closed || !$this->file) { 158 throw new ArchiveIOException('Can not read from a closed archive'); 159 } 160 161 $outdir = rtrim($outdir, '/'); 162 @mkdir($outdir, 0777, true); 163 if (!is_dir($outdir)) { 164 throw new ArchiveIOException("Could not create directory '$outdir'"); 165 } 166 167 $extracted = array(); 168 while ($dat = $this->readbytes(512)) { 169 // read the file header 170 $header = $this->parseHeader($dat); 171 if (!is_array($header)) { 172 continue; 173 } 174 $fileinfo = $this->header2fileinfo($header); 175 176 // apply strip rules 177 $fileinfo->strip($strip); 178 179 // skip unwanted files 180 if (!strlen($fileinfo->getPath()) || !$fileinfo->matchExpression($include, $exclude)) { 181 $this->skipbytes(ceil($header['size'] / 512) * 512); 182 continue; 183 } 184 185 // create output directory 186 $output = $outdir.'/'.$fileinfo->getPath(); 187 $directory = ($fileinfo->getIsdir()) ? $output : dirname($output); 188 if (!file_exists($directory)) { 189 mkdir($directory, 0777, true); 190 } 191 192 // extract data 193 if (!$fileinfo->getIsdir()) { 194 $fp = @fopen($output, "wb"); 195 if (!$fp) { 196 throw new ArchiveIOException('Could not open file for writing: '.$output); 197 } 198 199 $size = floor($header['size'] / 512); 200 for ($i = 0; $i < $size; $i++) { 201 fwrite($fp, $this->readbytes(512), 512); 202 } 203 if (($header['size'] % 512) != 0) { 204 fwrite($fp, $this->readbytes(512), $header['size'] % 512); 205 } 206 207 fclose($fp); 208 @touch($output, $fileinfo->getMtime()); 209 @chmod($output, $fileinfo->getMode()); 210 } else { 211 $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories 212 } 213 214 if(is_callable($this->callback)) { 215 call_user_func($this->callback, $fileinfo); 216 } 217 $extracted[] = $fileinfo; 218 } 219 220 $this->close(); 221 return $extracted; 222 } 223 224 /** 225 * Create a new TAR file 226 * 227 * If $file is empty, the tar file will be created in memory 228 * 229 * @param string $file 230 * @throws ArchiveIOException 231 * @throws ArchiveIllegalCompressionException 232 */ 233 public function create($file = '') 234 { 235 $this->file = $file; 236 $this->memory = ''; 237 $this->fh = 0; 238 239 if ($this->file) { 240 // determine compression 241 if ($this->comptype == Archive::COMPRESS_AUTO) { 242 $this->setCompression($this->complevel, $this->filetype($file)); 243 } 244 245 if ($this->comptype === Archive::COMPRESS_GZIP) { 246 $this->fh = @gzopen($this->file, 'wb'.$this->complevel); 247 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 248 $this->fh = @bzopen($this->file, 'w'); 249 } else { 250 $this->fh = @fopen($this->file, 'wb'); 251 } 252 253 if (!$this->fh) { 254 throw new ArchiveIOException('Could not open file for writing: '.$this->file); 255 } 256 } 257 $this->writeaccess = true; 258 $this->closed = false; 259 } 260 261 /** 262 * Add a file to the current TAR archive using an existing file in the filesystem 263 * 264 * @param string $file path to the original file 265 * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data, empty to take from original 266 * @throws ArchiveCorruptedException when the file changes while reading it, the archive will be corrupt and should be deleted 267 * @throws ArchiveIOException there was trouble reading the given file, it was not added 268 * @throws FileInfoException trouble reading file info, it was not added 269 */ 270 public function addFile($file, $fileinfo = '') 271 { 272 if (is_string($fileinfo)) { 273 $fileinfo = FileInfo::fromPath($file, $fileinfo); 274 } 275 276 if ($this->closed) { 277 throw new ArchiveIOException('Archive has been closed, files can no longer be added'); 278 } 279 280 // create file header 281 $this->writeFileHeader($fileinfo); 282 283 // write data, but only if we have data to write. 284 // note: on Windows fopen() on a directory will fail, so we prevent 285 // errors on Windows by testing if we have data to write. 286 if (!$fileinfo->getIsdir() && $fileinfo->getSize() > 0) { 287 $read = 0; 288 $fp = @fopen($file, 'rb'); 289 if (!$fp) { 290 throw new ArchiveIOException('Could not open file for reading: ' . $file); 291 } 292 while (!feof($fp)) { 293 $data = fread($fp, 512); 294 $read += strlen($data); 295 if ($data === false) { 296 break; 297 } 298 if ($data === '') { 299 break; 300 } 301 $packed = pack("a512", $data); 302 $this->writebytes($packed); 303 } 304 fclose($fp); 305 306 if ($read != $fileinfo->getSize()) { 307 $this->close(); 308 throw new ArchiveCorruptedException("The size of $file changed while reading, archive corrupted. read $read expected ".$fileinfo->getSize()); 309 } 310 } 311 312 if(is_callable($this->callback)) { 313 call_user_func($this->callback, $fileinfo); 314 } 315 } 316 317 /** 318 * Add a file to the current TAR archive using the given $data as content 319 * 320 * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data 321 * @param string $data binary content of the file to add 322 * @throws ArchiveIOException 323 */ 324 public function addData($fileinfo, $data) 325 { 326 if (is_string($fileinfo)) { 327 $fileinfo = new FileInfo($fileinfo); 328 } 329 330 if ($this->closed) { 331 throw new ArchiveIOException('Archive has been closed, files can no longer be added'); 332 } 333 334 $len = strlen($data); 335 $fileinfo->setSize($len); 336 $this->writeFileHeader($fileinfo); 337 338 for ($s = 0; $s < $len; $s += 512) { 339 $this->writebytes(pack("a512", substr($data, $s, 512))); 340 } 341 342 if (is_callable($this->callback)) { 343 call_user_func($this->callback, $fileinfo); 344 } 345 } 346 347 /** 348 * Add the closing footer to the archive if in write mode, close all file handles 349 * 350 * After a call to this function no more data can be added to the archive, for 351 * read access no reading is allowed anymore 352 * 353 * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which 354 * consists of two 512 blocks of zero bytes" 355 * 356 * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134 357 * @throws ArchiveIOException 358 */ 359 public function close() 360 { 361 if ($this->closed) { 362 return; 363 } // we did this already 364 365 // write footer 366 if ($this->writeaccess) { 367 $this->writebytes(pack("a512", "")); 368 $this->writebytes(pack("a512", "")); 369 } 370 371 // close file handles 372 if ($this->file) { 373 if ($this->comptype === Archive::COMPRESS_GZIP) { 374 gzclose($this->fh); 375 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 376 bzclose($this->fh); 377 } else { 378 fclose($this->fh); 379 } 380 381 $this->file = ''; 382 $this->fh = 0; 383 } 384 385 $this->writeaccess = false; 386 $this->closed = true; 387 } 388 389 /** 390 * Returns the created in-memory archive data 391 * 392 * This implicitly calls close() on the Archive 393 * @throws ArchiveIOException 394 */ 395 public function getArchive() 396 { 397 $this->close(); 398 399 if ($this->comptype === Archive::COMPRESS_AUTO) { 400 $this->comptype = Archive::COMPRESS_NONE; 401 } 402 403 if ($this->comptype === Archive::COMPRESS_GZIP) { 404 return gzencode($this->memory, $this->complevel); 405 } 406 if ($this->comptype === Archive::COMPRESS_BZIP) { 407 return bzcompress($this->memory); 408 } 409 return $this->memory; 410 } 411 412 /** 413 * Save the created in-memory archive data 414 * 415 * Note: It more memory effective to specify the filename in the create() function and 416 * let the library work on the new file directly. 417 * 418 * @param string $file 419 * @throws ArchiveIOException 420 * @throws ArchiveIllegalCompressionException 421 */ 422 public function save($file) 423 { 424 if ($this->comptype === Archive::COMPRESS_AUTO) { 425 $this->setCompression($this->complevel, $this->filetype($file)); 426 } 427 428 if (!@file_put_contents($file, $this->getArchive())) { 429 throw new ArchiveIOException('Could not write to file: '.$file); 430 } 431 } 432 433 /** 434 * Read from the open file pointer 435 * 436 * @param int $length bytes to read 437 * @return string 438 */ 439 protected function readbytes($length) 440 { 441 if ($this->comptype === Archive::COMPRESS_GZIP) { 442 return @gzread($this->fh, $length); 443 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 444 return @bzread($this->fh, $length); 445 } else { 446 return @fread($this->fh, $length); 447 } 448 } 449 450 /** 451 * Write to the open filepointer or memory 452 * 453 * @param string $data 454 * @throws ArchiveIOException 455 * @return int number of bytes written 456 */ 457 protected function writebytes($data) 458 { 459 if (!$this->file) { 460 $this->memory .= $data; 461 $written = strlen($data); 462 } elseif ($this->comptype === Archive::COMPRESS_GZIP) { 463 $written = @gzwrite($this->fh, $data); 464 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 465 $written = @bzwrite($this->fh, $data); 466 } else { 467 $written = @fwrite($this->fh, $data); 468 } 469 if ($written === false) { 470 throw new ArchiveIOException('Failed to write to archive stream'); 471 } 472 return $written; 473 } 474 475 /** 476 * Skip forward in the open file pointer 477 * 478 * This is basically a wrapper around seek() (and a workaround for bzip2) 479 * 480 * @param int $bytes seek to this position 481 */ 482 protected function skipbytes($bytes) 483 { 484 if ($this->comptype === Archive::COMPRESS_GZIP) { 485 @gzseek($this->fh, $bytes, SEEK_CUR); 486 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 487 // there is no seek in bzip2, we simply read on 488 // bzread allows to read a max of 8kb at once 489 while($bytes) { 490 $toread = min(8192, $bytes); 491 @bzread($this->fh, $toread); 492 $bytes -= $toread; 493 } 494 } else { 495 @fseek($this->fh, $bytes, SEEK_CUR); 496 } 497 } 498 499 /** 500 * Write the given file meta data as header 501 * 502 * @param FileInfo $fileinfo 503 * @throws ArchiveIOException 504 */ 505 protected function writeFileHeader(FileInfo $fileinfo) 506 { 507 $this->writeRawFileHeader( 508 $fileinfo->getPath(), 509 $fileinfo->getUid(), 510 $fileinfo->getGid(), 511 $fileinfo->getMode(), 512 $fileinfo->getSize(), 513 $fileinfo->getMtime(), 514 $fileinfo->getIsdir() ? '5' : '0' 515 ); 516 } 517 518 /** 519 * Write a file header to the stream 520 * 521 * @param string $name 522 * @param int $uid 523 * @param int $gid 524 * @param int $perm 525 * @param int $size 526 * @param int $mtime 527 * @param string $typeflag Set to '5' for directories 528 * @throws ArchiveIOException 529 */ 530 protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '') 531 { 532 // handle filename length restrictions 533 $prefix = ''; 534 $namelen = strlen($name); 535 if ($namelen > 100) { 536 $file = basename($name); 537 $dir = dirname($name); 538 if (strlen($file) > 100 || strlen($dir) > 155) { 539 // we're still too large, let's use GNU longlink 540 $this->writeRawFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L'); 541 for ($s = 0; $s < $namelen; $s += 512) { 542 $this->writebytes(pack("a512", substr($name, $s, 512))); 543 } 544 $name = substr($name, 0, 100); // cut off name 545 } else { 546 // we're fine when splitting, use POSIX ustar 547 $prefix = $dir; 548 $name = $file; 549 } 550 } 551 552 // values are needed in octal 553 $uid = sprintf("%6s ", decoct($uid)); 554 $gid = sprintf("%6s ", decoct($gid)); 555 $perm = sprintf("%6s ", decoct($perm)); 556 $size = sprintf("%11s ", decoct($size)); 557 $mtime = sprintf("%11s", decoct($mtime)); 558 559 $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime); 560 $data_last = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, ""); 561 562 for ($i = 0, $chks = 0; $i < 148; $i++) { 563 $chks += ord($data_first[$i]); 564 } 565 566 for ($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++) { 567 $chks += ord($data_last[$j]); 568 } 569 570 $this->writebytes($data_first); 571 572 $chks = pack("a8", sprintf("%6s ", decoct($chks))); 573 $this->writebytes($chks.$data_last); 574 } 575 576 /** 577 * Decode the given tar file header 578 * 579 * @param string $block a 512 byte block containing the header data 580 * @return array|false returns false when this was a null block 581 * @throws ArchiveCorruptedException 582 */ 583 protected function parseHeader($block) 584 { 585 if (!$block || strlen($block) != 512) { 586 throw new ArchiveCorruptedException('Unexpected length of header'); 587 } 588 589 // null byte blocks are ignored 590 if(trim($block) === '') return false; 591 592 for ($i = 0, $chks = 0; $i < 148; $i++) { 593 $chks += ord($block[$i]); 594 } 595 596 for ($i = 156, $chks += 256; $i < 512; $i++) { 597 $chks += ord($block[$i]); 598 } 599 600 $header = @unpack( 601 "a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix", 602 $block 603 ); 604 if (!$header) { 605 throw new ArchiveCorruptedException('Failed to parse header'); 606 } 607 608 $return['checksum'] = OctDec(trim($header['checksum'])); 609 if ($return['checksum'] != $chks) { 610 throw new ArchiveCorruptedException('Header does not match its checksum'); 611 } 612 613 $return['filename'] = trim($header['filename']); 614 $return['perm'] = OctDec(trim($header['perm'])); 615 $return['uid'] = OctDec(trim($header['uid'])); 616 $return['gid'] = OctDec(trim($header['gid'])); 617 $return['size'] = OctDec(trim($header['size'])); 618 $return['mtime'] = OctDec(trim($header['mtime'])); 619 $return['typeflag'] = $header['typeflag']; 620 $return['link'] = trim($header['link']); 621 $return['uname'] = trim($header['uname']); 622 $return['gname'] = trim($header['gname']); 623 624 // Handle ustar Posix compliant path prefixes 625 if (trim($header['prefix'])) { 626 $return['filename'] = trim($header['prefix']).'/'.$return['filename']; 627 } 628 629 // Handle Long-Link entries from GNU Tar 630 if ($return['typeflag'] == 'L') { 631 // following data block(s) is the filename 632 $filename = trim($this->readbytes(ceil($return['size'] / 512) * 512)); 633 // next block is the real header 634 $block = $this->readbytes(512); 635 $return = $this->parseHeader($block); 636 // overwrite the filename 637 $return['filename'] = $filename; 638 } 639 640 return $return; 641 } 642 643 /** 644 * Creates a FileInfo object from the given parsed header 645 * 646 * @param $header 647 * @return FileInfo 648 */ 649 protected function header2fileinfo($header) 650 { 651 $fileinfo = new FileInfo(); 652 $fileinfo->setPath($header['filename']); 653 $fileinfo->setMode($header['perm']); 654 $fileinfo->setUid($header['uid']); 655 $fileinfo->setGid($header['gid']); 656 $fileinfo->setSize($header['size']); 657 $fileinfo->setMtime($header['mtime']); 658 $fileinfo->setOwner($header['uname']); 659 $fileinfo->setGroup($header['gname']); 660 $fileinfo->setIsdir((bool) $header['typeflag']); 661 662 return $fileinfo; 663 } 664 665 /** 666 * Checks if the given compression type is available and throws an exception if not 667 * 668 * @param $comptype 669 * @throws ArchiveIllegalCompressionException 670 */ 671 protected function compressioncheck($comptype) 672 { 673 if ($comptype === Archive::COMPRESS_GZIP && !function_exists('gzopen')) { 674 throw new ArchiveIllegalCompressionException('No gzip support available'); 675 } 676 677 if ($comptype === Archive::COMPRESS_BZIP && !function_exists('bzopen')) { 678 throw new ArchiveIllegalCompressionException('No bzip2 support available'); 679 } 680 } 681 682 /** 683 * Guesses the wanted compression from the given file 684 * 685 * Uses magic bytes for existing files, the file extension otherwise 686 * 687 * You don't need to call this yourself. It's used when you pass Archive::COMPRESS_AUTO somewhere 688 * 689 * @param string $file 690 * @return int 691 */ 692 public function filetype($file) 693 { 694 // for existing files, try to read the magic bytes 695 if(file_exists($file) && is_readable($file) && filesize($file) > 5) { 696 $fh = @fopen($file, 'rb'); 697 if(!$fh) return false; 698 $magic = fread($fh, 5); 699 fclose($fh); 700 701 if(strpos($magic, "\x42\x5a") === 0) return Archive::COMPRESS_BZIP; 702 if(strpos($magic, "\x1f\x8b") === 0) return Archive::COMPRESS_GZIP; 703 } 704 705 // otherwise rely on file name 706 $file = strtolower($file); 707 if (substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') { 708 return Archive::COMPRESS_GZIP; 709 } elseif (substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') { 710 return Archive::COMPRESS_BZIP; 711 } 712 713 return Archive::COMPRESS_NONE; 714 } 715 716} 717