1<?php 2 3namespace splitbrain\PHPArchive; 4 5/** 6 * Class Tar 7 * 8 * Creates or extracts Tar archives. Supports gz and bzip compression 9 * 10 * Long pathnames (>100 chars) are supported in POSIX ustar and GNU longlink formats. 11 * 12 * @author Andreas Gohr <andi@splitbrain.org> 13 * @package splitbrain\PHPArchive 14 * @license MIT 15 */ 16class Tar extends Archive 17{ 18 19 protected $file = ''; 20 protected $comptype = Archive::COMPRESS_AUTO; 21 protected $complevel = 9; 22 protected $fh; 23 protected $memory = ''; 24 protected $closed = true; 25 protected $writeaccess = false; 26 27 /** 28 * Sets the compression to use 29 * 30 * @param int $level Compression level (0 to 9) 31 * @param int $type Type of compression to use (use COMPRESS_* constants) 32 * @throws ArchiveIllegalCompressionException 33 */ 34 public function setCompression($level = 9, $type = Archive::COMPRESS_AUTO) 35 { 36 $this->compressioncheck($type); 37 if ($level < -1 || $level > 9) { 38 throw new ArchiveIllegalCompressionException('Compression level should be between -1 and 9'); 39 } 40 $this->comptype = $type; 41 $this->complevel = $level; 42 if($level == 0) $this->comptype = Archive::COMPRESS_NONE; 43 if($type == Archive::COMPRESS_NONE) $this->complevel = 0; 44 } 45 46 /** 47 * Open an existing TAR file for reading 48 * 49 * @param string $file 50 * @throws ArchiveIOException 51 * @throws ArchiveIllegalCompressionException 52 */ 53 public function open($file) 54 { 55 $this->file = $file; 56 57 // update compression to mach file 58 if ($this->comptype == Tar::COMPRESS_AUTO) { 59 $this->setCompression($this->complevel, $this->filetype($file)); 60 } 61 62 // open file handles 63 if ($this->comptype === Archive::COMPRESS_GZIP) { 64 $this->fh = @gzopen($this->file, 'rb'); 65 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 66 $this->fh = @bzopen($this->file, 'r'); 67 } else { 68 $this->fh = @fopen($this->file, 'rb'); 69 } 70 71 if (!$this->fh) { 72 throw new ArchiveIOException('Could not open file for reading: '.$this->file); 73 } 74 $this->closed = false; 75 } 76 77 /** 78 * Read the contents of a TAR archive 79 * 80 * This function lists the files stored in the archive 81 * 82 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. 83 * Reopen the file with open() again if you want to do additional operations 84 * 85 * @throws ArchiveIOException 86 * @throws ArchiveCorruptedException 87 * @returns FileInfo[] 88 */ 89 public function contents() 90 { 91 if ($this->closed || !$this->file) { 92 throw new ArchiveIOException('Can not read from a closed archive'); 93 } 94 95 $result = array(); 96 while ($read = $this->readbytes(512)) { 97 $header = $this->parseHeader($read); 98 if (!is_array($header)) { 99 continue; 100 } 101 102 $this->skipbytes(ceil($header['size'] / 512) * 512); 103 $result[] = $this->header2fileinfo($header); 104 } 105 106 $this->close(); 107 return $result; 108 } 109 110 /** 111 * Extract an existing TAR archive 112 * 113 * The $strip parameter allows you to strip a certain number of path components from the filenames 114 * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when 115 * an integer is passed as $strip. 116 * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix, 117 * the prefix will be stripped. It is recommended to give prefixes with a trailing slash. 118 * 119 * By default this will extract all files found in the archive. You can restrict the output using the $include 120 * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If 121 * $include is set only files that match this expression will be extracted. Files that match the $exclude 122 * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against 123 * stripped filenames as described above. 124 * 125 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. 126 * Reopen the file with open() again if you want to do additional operations 127 * 128 * @param string $outdir the target directory for extracting 129 * @param int|string $strip either the number of path components or a fixed prefix to strip 130 * @param string $exclude a regular expression of files to exclude 131 * @param string $include a regular expression of files to include 132 * @throws ArchiveIOException 133 * @throws ArchiveCorruptedException 134 * @return FileInfo[] 135 */ 136 public function extract($outdir, $strip = '', $exclude = '', $include = '') 137 { 138 if ($this->closed || !$this->file) { 139 throw new ArchiveIOException('Can not read from a closed archive'); 140 } 141 142 $outdir = rtrim($outdir, '/'); 143 @mkdir($outdir, 0777, true); 144 if (!is_dir($outdir)) { 145 throw new ArchiveIOException("Could not create directory '$outdir'"); 146 } 147 148 $extracted = array(); 149 while ($dat = $this->readbytes(512)) { 150 // read the file header 151 $header = $this->parseHeader($dat); 152 if (!is_array($header)) { 153 continue; 154 } 155 $fileinfo = $this->header2fileinfo($header); 156 157 // apply strip rules 158 $fileinfo->strip($strip); 159 160 // skip unwanted files 161 if (!strlen($fileinfo->getPath()) || !$fileinfo->matchExpression($include, $exclude)) { 162 $this->skipbytes(ceil($header['size'] / 512) * 512); 163 continue; 164 } 165 166 // create output directory 167 $output = $outdir.'/'.$fileinfo->getPath(); 168 $directory = ($fileinfo->getIsdir()) ? $output : dirname($output); 169 @mkdir($directory, 0777, true); 170 171 // extract data 172 if (!$fileinfo->getIsdir()) { 173 $fp = @fopen($output, "wb"); 174 if (!$fp) { 175 throw new ArchiveIOException('Could not open file for writing: '.$output); 176 } 177 178 $size = floor($header['size'] / 512); 179 for ($i = 0; $i < $size; $i++) { 180 fwrite($fp, $this->readbytes(512), 512); 181 } 182 if (($header['size'] % 512) != 0) { 183 fwrite($fp, $this->readbytes(512), $header['size'] % 512); 184 } 185 186 fclose($fp); 187 @touch($output, $fileinfo->getMtime()); 188 @chmod($output, $fileinfo->getMode()); 189 } else { 190 $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories 191 } 192 193 if(is_callable($this->callback)) { 194 call_user_func($this->callback, $fileinfo); 195 } 196 $extracted[] = $fileinfo; 197 } 198 199 $this->close(); 200 return $extracted; 201 } 202 203 /** 204 * Create a new TAR file 205 * 206 * If $file is empty, the tar file will be created in memory 207 * 208 * @param string $file 209 * @throws ArchiveIOException 210 * @throws ArchiveIllegalCompressionException 211 */ 212 public function create($file = '') 213 { 214 $this->file = $file; 215 $this->memory = ''; 216 $this->fh = 0; 217 218 if ($this->file) { 219 // determine compression 220 if ($this->comptype == Archive::COMPRESS_AUTO) { 221 $this->setCompression($this->complevel, $this->filetype($file)); 222 } 223 224 if ($this->comptype === Archive::COMPRESS_GZIP) { 225 $this->fh = @gzopen($this->file, 'wb'.$this->complevel); 226 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 227 $this->fh = @bzopen($this->file, 'w'); 228 } else { 229 $this->fh = @fopen($this->file, 'wb'); 230 } 231 232 if (!$this->fh) { 233 throw new ArchiveIOException('Could not open file for writing: '.$this->file); 234 } 235 } 236 $this->writeaccess = true; 237 $this->closed = false; 238 } 239 240 /** 241 * Add a file to the current TAR archive using an existing file in the filesystem 242 * 243 * @param string $file path to the original file 244 * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data, empty to take from original 245 * @throws ArchiveCorruptedException when the file changes while reading it, the archive will be corrupt and should be deleted 246 * @throws ArchiveIOException there was trouble reading the given file, it was not added 247 * @throws FileInfoException trouble reading file info, it was not added 248 */ 249 public function addFile($file, $fileinfo = '') 250 { 251 if (is_string($fileinfo)) { 252 $fileinfo = FileInfo::fromPath($file, $fileinfo); 253 } 254 255 if ($this->closed) { 256 throw new ArchiveIOException('Archive has been closed, files can no longer be added'); 257 } 258 259 // create file header 260 $this->writeFileHeader($fileinfo); 261 262 // write data, but only if we have data to write. 263 // note: on Windows fopen() on a directory will fail, so we prevent 264 // errors on Windows by testing if we have data to write. 265 if (!$fileinfo->getIsdir() && $fileinfo->getSize() > 0) { 266 $read = 0; 267 $fp = @fopen($file, 'rb'); 268 if (!$fp) { 269 throw new ArchiveIOException('Could not open file for reading: ' . $file); 270 } 271 while (!feof($fp)) { 272 $data = fread($fp, 512); 273 $read += strlen($data); 274 if ($data === false) { 275 break; 276 } 277 if ($data === '') { 278 break; 279 } 280 $packed = pack("a512", $data); 281 $this->writebytes($packed); 282 } 283 fclose($fp); 284 285 if ($read != $fileinfo->getSize()) { 286 $this->close(); 287 throw new ArchiveCorruptedException("The size of $file changed while reading, archive corrupted. read $read expected ".$fileinfo->getSize()); 288 } 289 } 290 291 if(is_callable($this->callback)) { 292 call_user_func($this->callback, $fileinfo); 293 } 294 } 295 296 /** 297 * Add a file to the current TAR archive using the given $data as content 298 * 299 * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data 300 * @param string $data binary content of the file to add 301 * @throws ArchiveIOException 302 */ 303 public function addData($fileinfo, $data) 304 { 305 if (is_string($fileinfo)) { 306 $fileinfo = new FileInfo($fileinfo); 307 } 308 309 if ($this->closed) { 310 throw new ArchiveIOException('Archive has been closed, files can no longer be added'); 311 } 312 313 $len = strlen($data); 314 $fileinfo->setSize($len); 315 $this->writeFileHeader($fileinfo); 316 317 for ($s = 0; $s < $len; $s += 512) { 318 $this->writebytes(pack("a512", substr($data, $s, 512))); 319 } 320 321 if (is_callable($this->callback)) { 322 call_user_func($this->callback, $fileinfo); 323 } 324 } 325 326 /** 327 * Add the closing footer to the archive if in write mode, close all file handles 328 * 329 * After a call to this function no more data can be added to the archive, for 330 * read access no reading is allowed anymore 331 * 332 * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which 333 * consists of two 512 blocks of zero bytes" 334 * 335 * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134 336 * @throws ArchiveIOException 337 */ 338 public function close() 339 { 340 if ($this->closed) { 341 return; 342 } // we did this already 343 344 // write footer 345 if ($this->writeaccess) { 346 $this->writebytes(pack("a512", "")); 347 $this->writebytes(pack("a512", "")); 348 } 349 350 // close file handles 351 if ($this->file) { 352 if ($this->comptype === Archive::COMPRESS_GZIP) { 353 gzclose($this->fh); 354 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 355 bzclose($this->fh); 356 } else { 357 fclose($this->fh); 358 } 359 360 $this->file = ''; 361 $this->fh = 0; 362 } 363 364 $this->writeaccess = false; 365 $this->closed = true; 366 } 367 368 /** 369 * Returns the created in-memory archive data 370 * 371 * This implicitly calls close() on the Archive 372 * @throws ArchiveIOException 373 */ 374 public function getArchive() 375 { 376 $this->close(); 377 378 if ($this->comptype === Archive::COMPRESS_AUTO) { 379 $this->comptype = Archive::COMPRESS_NONE; 380 } 381 382 if ($this->comptype === Archive::COMPRESS_GZIP) { 383 return gzencode($this->memory, $this->complevel); 384 } 385 if ($this->comptype === Archive::COMPRESS_BZIP) { 386 return bzcompress($this->memory); 387 } 388 return $this->memory; 389 } 390 391 /** 392 * Save the created in-memory archive data 393 * 394 * Note: It more memory effective to specify the filename in the create() function and 395 * let the library work on the new file directly. 396 * 397 * @param string $file 398 * @throws ArchiveIOException 399 * @throws ArchiveIllegalCompressionException 400 */ 401 public function save($file) 402 { 403 if ($this->comptype === Archive::COMPRESS_AUTO) { 404 $this->setCompression($this->complevel, $this->filetype($file)); 405 } 406 407 if (!@file_put_contents($file, $this->getArchive())) { 408 throw new ArchiveIOException('Could not write to file: '.$file); 409 } 410 } 411 412 /** 413 * Read from the open file pointer 414 * 415 * @param int $length bytes to read 416 * @return string 417 */ 418 protected function readbytes($length) 419 { 420 if ($this->comptype === Archive::COMPRESS_GZIP) { 421 return @gzread($this->fh, $length); 422 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 423 return @bzread($this->fh, $length); 424 } else { 425 return @fread($this->fh, $length); 426 } 427 } 428 429 /** 430 * Write to the open filepointer or memory 431 * 432 * @param string $data 433 * @throws ArchiveIOException 434 * @return int number of bytes written 435 */ 436 protected function writebytes($data) 437 { 438 if (!$this->file) { 439 $this->memory .= $data; 440 $written = strlen($data); 441 } elseif ($this->comptype === Archive::COMPRESS_GZIP) { 442 $written = @gzwrite($this->fh, $data); 443 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 444 $written = @bzwrite($this->fh, $data); 445 } else { 446 $written = @fwrite($this->fh, $data); 447 } 448 if ($written === false) { 449 throw new ArchiveIOException('Failed to write to archive stream'); 450 } 451 return $written; 452 } 453 454 /** 455 * Skip forward in the open file pointer 456 * 457 * This is basically a wrapper around seek() (and a workaround for bzip2) 458 * 459 * @param int $bytes seek to this position 460 */ 461 protected function skipbytes($bytes) 462 { 463 if ($this->comptype === Archive::COMPRESS_GZIP) { 464 @gzseek($this->fh, $bytes, SEEK_CUR); 465 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 466 // there is no seek in bzip2, we simply read on 467 // bzread allows to read a max of 8kb at once 468 while($bytes) { 469 $toread = min(8192, $bytes); 470 @bzread($this->fh, $toread); 471 $bytes -= $toread; 472 } 473 } else { 474 @fseek($this->fh, $bytes, SEEK_CUR); 475 } 476 } 477 478 /** 479 * Write the given file meta data as header 480 * 481 * @param FileInfo $fileinfo 482 * @throws ArchiveIOException 483 */ 484 protected function writeFileHeader(FileInfo $fileinfo) 485 { 486 $this->writeRawFileHeader( 487 $fileinfo->getPath(), 488 $fileinfo->getUid(), 489 $fileinfo->getGid(), 490 $fileinfo->getMode(), 491 $fileinfo->getSize(), 492 $fileinfo->getMtime(), 493 $fileinfo->getIsdir() ? '5' : '0' 494 ); 495 } 496 497 /** 498 * Write a file header to the stream 499 * 500 * @param string $name 501 * @param int $uid 502 * @param int $gid 503 * @param int $perm 504 * @param int $size 505 * @param int $mtime 506 * @param string $typeflag Set to '5' for directories 507 * @throws ArchiveIOException 508 */ 509 protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '') 510 { 511 // handle filename length restrictions 512 $prefix = ''; 513 $namelen = strlen($name); 514 if ($namelen > 100) { 515 $file = basename($name); 516 $dir = dirname($name); 517 if (strlen($file) > 100 || strlen($dir) > 155) { 518 // we're still too large, let's use GNU longlink 519 $this->writeRawFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L'); 520 for ($s = 0; $s < $namelen; $s += 512) { 521 $this->writebytes(pack("a512", substr($name, $s, 512))); 522 } 523 $name = substr($name, 0, 100); // cut off name 524 } else { 525 // we're fine when splitting, use POSIX ustar 526 $prefix = $dir; 527 $name = $file; 528 } 529 } 530 531 // values are needed in octal 532 $uid = sprintf("%6s ", decoct($uid)); 533 $gid = sprintf("%6s ", decoct($gid)); 534 $perm = sprintf("%6s ", decoct($perm)); 535 $size = sprintf("%11s ", decoct($size)); 536 $mtime = sprintf("%11s", decoct($mtime)); 537 538 $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime); 539 $data_last = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, ""); 540 541 for ($i = 0, $chks = 0; $i < 148; $i++) { 542 $chks += ord($data_first[$i]); 543 } 544 545 for ($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++) { 546 $chks += ord($data_last[$j]); 547 } 548 549 $this->writebytes($data_first); 550 551 $chks = pack("a8", sprintf("%6s ", decoct($chks))); 552 $this->writebytes($chks.$data_last); 553 } 554 555 /** 556 * Decode the given tar file header 557 * 558 * @param string $block a 512 byte block containing the header data 559 * @return array|false returns false when this was a null block 560 * @throws ArchiveCorruptedException 561 */ 562 protected function parseHeader($block) 563 { 564 if (!$block || strlen($block) != 512) { 565 throw new ArchiveCorruptedException('Unexpected length of header'); 566 } 567 568 // null byte blocks are ignored 569 if(trim($block) === '') return false; 570 571 for ($i = 0, $chks = 0; $i < 148; $i++) { 572 $chks += ord($block[$i]); 573 } 574 575 for ($i = 156, $chks += 256; $i < 512; $i++) { 576 $chks += ord($block[$i]); 577 } 578 579 $header = @unpack( 580 "a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix", 581 $block 582 ); 583 if (!$header) { 584 throw new ArchiveCorruptedException('Failed to parse header'); 585 } 586 587 $return['checksum'] = OctDec(trim($header['checksum'])); 588 if ($return['checksum'] != $chks) { 589 throw new ArchiveCorruptedException('Header does not match its checksum'); 590 } 591 592 $return['filename'] = trim($header['filename']); 593 $return['perm'] = OctDec(trim($header['perm'])); 594 $return['uid'] = OctDec(trim($header['uid'])); 595 $return['gid'] = OctDec(trim($header['gid'])); 596 $return['size'] = OctDec(trim($header['size'])); 597 $return['mtime'] = OctDec(trim($header['mtime'])); 598 $return['typeflag'] = $header['typeflag']; 599 $return['link'] = trim($header['link']); 600 $return['uname'] = trim($header['uname']); 601 $return['gname'] = trim($header['gname']); 602 603 // Handle ustar Posix compliant path prefixes 604 if (trim($header['prefix'])) { 605 $return['filename'] = trim($header['prefix']).'/'.$return['filename']; 606 } 607 608 // Handle Long-Link entries from GNU Tar 609 if ($return['typeflag'] == 'L') { 610 // following data block(s) is the filename 611 $filename = trim($this->readbytes(ceil($return['size'] / 512) * 512)); 612 // next block is the real header 613 $block = $this->readbytes(512); 614 $return = $this->parseHeader($block); 615 // overwrite the filename 616 $return['filename'] = $filename; 617 } 618 619 return $return; 620 } 621 622 /** 623 * Creates a FileInfo object from the given parsed header 624 * 625 * @param $header 626 * @return FileInfo 627 */ 628 protected function header2fileinfo($header) 629 { 630 $fileinfo = new FileInfo(); 631 $fileinfo->setPath($header['filename']); 632 $fileinfo->setMode($header['perm']); 633 $fileinfo->setUid($header['uid']); 634 $fileinfo->setGid($header['gid']); 635 $fileinfo->setSize($header['size']); 636 $fileinfo->setMtime($header['mtime']); 637 $fileinfo->setOwner($header['uname']); 638 $fileinfo->setGroup($header['gname']); 639 $fileinfo->setIsdir((bool) $header['typeflag']); 640 641 return $fileinfo; 642 } 643 644 /** 645 * Checks if the given compression type is available and throws an exception if not 646 * 647 * @param $comptype 648 * @throws ArchiveIllegalCompressionException 649 */ 650 protected function compressioncheck($comptype) 651 { 652 if ($comptype === Archive::COMPRESS_GZIP && !function_exists('gzopen')) { 653 throw new ArchiveIllegalCompressionException('No gzip support available'); 654 } 655 656 if ($comptype === Archive::COMPRESS_BZIP && !function_exists('bzopen')) { 657 throw new ArchiveIllegalCompressionException('No bzip2 support available'); 658 } 659 } 660 661 /** 662 * Guesses the wanted compression from the given file 663 * 664 * Uses magic bytes for existing files, the file extension otherwise 665 * 666 * You don't need to call this yourself. It's used when you pass Archive::COMPRESS_AUTO somewhere 667 * 668 * @param string $file 669 * @return int 670 */ 671 public function filetype($file) 672 { 673 // for existing files, try to read the magic bytes 674 if(file_exists($file) && is_readable($file) && filesize($file) > 5) { 675 $fh = @fopen($file, 'rb'); 676 if(!$fh) return false; 677 $magic = fread($fh, 5); 678 fclose($fh); 679 680 if(strpos($magic, "\x42\x5a") === 0) return Archive::COMPRESS_BZIP; 681 if(strpos($magic, "\x1f\x8b") === 0) return Archive::COMPRESS_GZIP; 682 } 683 684 // otherwise rely on file name 685 $file = strtolower($file); 686 if (substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') { 687 return Archive::COMPRESS_GZIP; 688 } elseif (substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') { 689 return Archive::COMPRESS_BZIP; 690 } 691 692 return Archive::COMPRESS_NONE; 693 } 694 695} 696