1<?php 2 3namespace splitbrain\PHPArchive; 4 5/** 6 * Class Tar 7 * 8 * Creates or extracts Tar archives. Supports gz and bzip compression 9 * 10 * Long pathnames (>100 chars) are supported in POSIX ustar and GNU longlink formats. 11 * 12 * @author Andreas Gohr <andi@splitbrain.org> 13 * @package splitbrain\PHPArchive 14 * @license MIT 15 */ 16class Tar extends Archive 17{ 18 19 protected $file = ''; 20 protected $comptype = Archive::COMPRESS_AUTO; 21 protected $complevel = 9; 22 protected $fh; 23 protected $memory = ''; 24 protected $closed = true; 25 protected $writeaccess = false; 26 27 /** 28 * Sets the compression to use 29 * 30 * @param int $level Compression level (0 to 9) 31 * @param int $type Type of compression to use (use COMPRESS_* constants) 32 * @throws ArchiveIllegalCompressionException 33 */ 34 public function setCompression($level = 9, $type = Archive::COMPRESS_AUTO) 35 { 36 $this->compressioncheck($type); 37 if ($level < -1 || $level > 9) { 38 throw new ArchiveIllegalCompressionException('Compression level should be between -1 and 9'); 39 } 40 $this->comptype = $type; 41 $this->complevel = $level; 42 if($level == 0) $this->comptype = Archive::COMPRESS_NONE; 43 if($type == Archive::COMPRESS_NONE) $this->complevel = 0; 44 } 45 46 /** 47 * Open an existing TAR file for reading 48 * 49 * @param string $file 50 * @throws ArchiveIOException 51 * @throws ArchiveIllegalCompressionException 52 */ 53 public function open($file) 54 { 55 $this->file = $file; 56 57 // update compression to mach file 58 if ($this->comptype == Tar::COMPRESS_AUTO) { 59 $this->setCompression($this->complevel, $this->filetype($file)); 60 } 61 62 // open file handles 63 if ($this->comptype === Archive::COMPRESS_GZIP) { 64 $this->fh = @gzopen($this->file, 'rb'); 65 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 66 $this->fh = @bzopen($this->file, 'r'); 67 } else { 68 $this->fh = @fopen($this->file, 'rb'); 69 } 70 71 if (!$this->fh) { 72 throw new ArchiveIOException('Could not open file for reading: '.$this->file); 73 } 74 $this->closed = false; 75 } 76 77 /** 78 * Read the contents of a TAR archive 79 * 80 * This function lists the files stored in the archive 81 * 82 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. 83 * Reopen the file with open() again if you want to do additional operations 84 * 85 * @throws ArchiveIOException 86 * @throws ArchiveCorruptedException 87 * @returns FileInfo[] 88 */ 89 public function contents() 90 { 91 if ($this->closed || !$this->file) { 92 throw new ArchiveIOException('Can not read from a closed archive'); 93 } 94 95 $result = array(); 96 while ($read = $this->readbytes(512)) { 97 $header = $this->parseHeader($read); 98 if (!is_array($header)) { 99 continue; 100 } 101 102 $this->skipbytes(ceil($header['size'] / 512) * 512); 103 $result[] = $this->header2fileinfo($header); 104 } 105 106 $this->close(); 107 return $result; 108 } 109 110 /** 111 * Extract an existing TAR archive 112 * 113 * The $strip parameter allows you to strip a certain number of path components from the filenames 114 * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when 115 * an integer is passed as $strip. 116 * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix, 117 * the prefix will be stripped. It is recommended to give prefixes with a trailing slash. 118 * 119 * By default this will extract all files found in the archive. You can restrict the output using the $include 120 * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If 121 * $include is set only files that match this expression will be extracted. Files that match the $exclude 122 * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against 123 * stripped filenames as described above. 124 * 125 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. 126 * Reopen the file with open() again if you want to do additional operations 127 * 128 * @param string $outdir the target directory for extracting 129 * @param int|string $strip either the number of path components or a fixed prefix to strip 130 * @param string $exclude a regular expression of files to exclude 131 * @param string $include a regular expression of files to include 132 * @throws ArchiveIOException 133 * @throws ArchiveCorruptedException 134 * @return FileInfo[] 135 */ 136 public function extract($outdir, $strip = '', $exclude = '', $include = '') 137 { 138 if ($this->closed || !$this->file) { 139 throw new ArchiveIOException('Can not read from a closed archive'); 140 } 141 142 $outdir = rtrim($outdir, '/'); 143 @mkdir($outdir, 0777, true); 144 if (!is_dir($outdir)) { 145 throw new ArchiveIOException("Could not create directory '$outdir'"); 146 } 147 148 $extracted = array(); 149 while ($dat = $this->readbytes(512)) { 150 // read the file header 151 $header = $this->parseHeader($dat); 152 if (!is_array($header)) { 153 continue; 154 } 155 $fileinfo = $this->header2fileinfo($header); 156 157 // apply strip rules 158 $fileinfo->strip($strip); 159 160 // skip unwanted files 161 if (!strlen($fileinfo->getPath()) || !$fileinfo->matchExpression($include, $exclude)) { 162 $this->skipbytes(ceil($header['size'] / 512) * 512); 163 continue; 164 } 165 166 // create output directory 167 $output = $outdir.'/'.$fileinfo->getPath(); 168 $directory = ($fileinfo->getIsdir()) ? $output : dirname($output); 169 @mkdir($directory, 0777, true); 170 171 // extract data 172 if (!$fileinfo->getIsdir()) { 173 $fp = @fopen($output, "wb"); 174 if (!$fp) { 175 throw new ArchiveIOException('Could not open file for writing: '.$output); 176 } 177 178 $size = floor($header['size'] / 512); 179 for ($i = 0; $i < $size; $i++) { 180 fwrite($fp, $this->readbytes(512), 512); 181 } 182 if (($header['size'] % 512) != 0) { 183 fwrite($fp, $this->readbytes(512), $header['size'] % 512); 184 } 185 186 fclose($fp); 187 @touch($output, $fileinfo->getMtime()); 188 @chmod($output, $fileinfo->getMode()); 189 } else { 190 $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories 191 } 192 193 if(is_callable($this->callback)) { 194 call_user_func($this->callback, $fileinfo); 195 } 196 $extracted[] = $fileinfo; 197 } 198 199 $this->close(); 200 return $extracted; 201 } 202 203 /** 204 * Create a new TAR file 205 * 206 * If $file is empty, the tar file will be created in memory 207 * 208 * @param string $file 209 * @throws ArchiveIOException 210 * @throws ArchiveIllegalCompressionException 211 */ 212 public function create($file = '') 213 { 214 $this->file = $file; 215 $this->memory = ''; 216 $this->fh = 0; 217 218 if ($this->file) { 219 // determine compression 220 if ($this->comptype == Archive::COMPRESS_AUTO) { 221 $this->setCompression($this->complevel, $this->filetype($file)); 222 } 223 224 if ($this->comptype === Archive::COMPRESS_GZIP) { 225 $this->fh = @gzopen($this->file, 'wb'.$this->complevel); 226 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 227 $this->fh = @bzopen($this->file, 'w'); 228 } else { 229 $this->fh = @fopen($this->file, 'wb'); 230 } 231 232 if (!$this->fh) { 233 throw new ArchiveIOException('Could not open file for writing: '.$this->file); 234 } 235 } 236 $this->writeaccess = true; 237 $this->closed = false; 238 } 239 240 /** 241 * Add a file to the current TAR archive using an existing file in the filesystem 242 * 243 * @param string $file path to the original file 244 * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data, empty to take from original 245 * @throws ArchiveCorruptedException when the file changes while reading it, the archive will be corrupt and should be deleted 246 * @throws ArchiveIOException there was trouble reading the given file, it was not added 247 * @throws FileInfoException trouble reading file info, it was not added 248 */ 249 public function addFile($file, $fileinfo = '') 250 { 251 if (is_string($fileinfo)) { 252 $fileinfo = FileInfo::fromPath($file, $fileinfo); 253 } 254 255 if ($this->closed) { 256 throw new ArchiveIOException('Archive has been closed, files can no longer be added'); 257 } 258 259 $fp = @fopen($file, 'rb'); 260 if (!$fp) { 261 throw new ArchiveIOException('Could not open file for reading: '.$file); 262 } 263 264 // create file header 265 $this->writeFileHeader($fileinfo); 266 267 // write data 268 $read = 0; 269 while (!feof($fp)) { 270 $data = fread($fp, 512); 271 $read += strlen($data); 272 if ($data === false) { 273 break; 274 } 275 if ($data === '') { 276 break; 277 } 278 $packed = pack("a512", $data); 279 $this->writebytes($packed); 280 } 281 fclose($fp); 282 283 if($read != $fileinfo->getSize()) { 284 $this->close(); 285 throw new ArchiveCorruptedException("The size of $file changed while reading, archive corrupted. read $read expected ".$fileinfo->getSize()); 286 } 287 288 if(is_callable($this->callback)) { 289 call_user_func($this->callback, $fileinfo); 290 } 291 } 292 293 /** 294 * Add a file to the current TAR archive using the given $data as content 295 * 296 * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data 297 * @param string $data binary content of the file to add 298 * @throws ArchiveIOException 299 */ 300 public function addData($fileinfo, $data) 301 { 302 if (is_string($fileinfo)) { 303 $fileinfo = new FileInfo($fileinfo); 304 } 305 306 if ($this->closed) { 307 throw new ArchiveIOException('Archive has been closed, files can no longer be added'); 308 } 309 310 $len = strlen($data); 311 $fileinfo->setSize($len); 312 $this->writeFileHeader($fileinfo); 313 314 for ($s = 0; $s < $len; $s += 512) { 315 $this->writebytes(pack("a512", substr($data, $s, 512))); 316 } 317 318 if (is_callable($this->callback)) { 319 call_user_func($this->callback, $fileinfo); 320 } 321 } 322 323 /** 324 * Add the closing footer to the archive if in write mode, close all file handles 325 * 326 * After a call to this function no more data can be added to the archive, for 327 * read access no reading is allowed anymore 328 * 329 * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which 330 * consists of two 512 blocks of zero bytes" 331 * 332 * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134 333 * @throws ArchiveIOException 334 */ 335 public function close() 336 { 337 if ($this->closed) { 338 return; 339 } // we did this already 340 341 // write footer 342 if ($this->writeaccess) { 343 $this->writebytes(pack("a512", "")); 344 $this->writebytes(pack("a512", "")); 345 } 346 347 // close file handles 348 if ($this->file) { 349 if ($this->comptype === Archive::COMPRESS_GZIP) { 350 gzclose($this->fh); 351 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 352 bzclose($this->fh); 353 } else { 354 fclose($this->fh); 355 } 356 357 $this->file = ''; 358 $this->fh = 0; 359 } 360 361 $this->writeaccess = false; 362 $this->closed = true; 363 } 364 365 /** 366 * Returns the created in-memory archive data 367 * 368 * This implicitly calls close() on the Archive 369 * @throws ArchiveIOException 370 */ 371 public function getArchive() 372 { 373 $this->close(); 374 375 if ($this->comptype === Archive::COMPRESS_AUTO) { 376 $this->comptype = Archive::COMPRESS_NONE; 377 } 378 379 if ($this->comptype === Archive::COMPRESS_GZIP) { 380 return gzencode($this->memory, $this->complevel); 381 } 382 if ($this->comptype === Archive::COMPRESS_BZIP) { 383 return bzcompress($this->memory); 384 } 385 return $this->memory; 386 } 387 388 /** 389 * Save the created in-memory archive data 390 * 391 * Note: It more memory effective to specify the filename in the create() function and 392 * let the library work on the new file directly. 393 * 394 * @param string $file 395 * @throws ArchiveIOException 396 * @throws ArchiveIllegalCompressionException 397 */ 398 public function save($file) 399 { 400 if ($this->comptype === Archive::COMPRESS_AUTO) { 401 $this->setCompression($this->complevel, $this->filetype($file)); 402 } 403 404 if (!@file_put_contents($file, $this->getArchive())) { 405 throw new ArchiveIOException('Could not write to file: '.$file); 406 } 407 } 408 409 /** 410 * Read from the open file pointer 411 * 412 * @param int $length bytes to read 413 * @return string 414 */ 415 protected function readbytes($length) 416 { 417 if ($this->comptype === Archive::COMPRESS_GZIP) { 418 return @gzread($this->fh, $length); 419 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 420 return @bzread($this->fh, $length); 421 } else { 422 return @fread($this->fh, $length); 423 } 424 } 425 426 /** 427 * Write to the open filepointer or memory 428 * 429 * @param string $data 430 * @throws ArchiveIOException 431 * @return int number of bytes written 432 */ 433 protected function writebytes($data) 434 { 435 if (!$this->file) { 436 $this->memory .= $data; 437 $written = strlen($data); 438 } elseif ($this->comptype === Archive::COMPRESS_GZIP) { 439 $written = @gzwrite($this->fh, $data); 440 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 441 $written = @bzwrite($this->fh, $data); 442 } else { 443 $written = @fwrite($this->fh, $data); 444 } 445 if ($written === false) { 446 throw new ArchiveIOException('Failed to write to archive stream'); 447 } 448 return $written; 449 } 450 451 /** 452 * Skip forward in the open file pointer 453 * 454 * This is basically a wrapper around seek() (and a workaround for bzip2) 455 * 456 * @param int $bytes seek to this position 457 */ 458 protected function skipbytes($bytes) 459 { 460 if ($this->comptype === Archive::COMPRESS_GZIP) { 461 @gzseek($this->fh, $bytes, SEEK_CUR); 462 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 463 // there is no seek in bzip2, we simply read on 464 // bzread allows to read a max of 8kb at once 465 while($bytes) { 466 $toread = min(8192, $bytes); 467 @bzread($this->fh, $toread); 468 $bytes -= $toread; 469 } 470 } else { 471 @fseek($this->fh, $bytes, SEEK_CUR); 472 } 473 } 474 475 /** 476 * Write the given file meta data as header 477 * 478 * @param FileInfo $fileinfo 479 * @throws ArchiveIOException 480 */ 481 protected function writeFileHeader(FileInfo $fileinfo) 482 { 483 $this->writeRawFileHeader( 484 $fileinfo->getPath(), 485 $fileinfo->getUid(), 486 $fileinfo->getGid(), 487 $fileinfo->getMode(), 488 $fileinfo->getSize(), 489 $fileinfo->getMtime(), 490 $fileinfo->getIsdir() ? '5' : '0' 491 ); 492 } 493 494 /** 495 * Write a file header to the stream 496 * 497 * @param string $name 498 * @param int $uid 499 * @param int $gid 500 * @param int $perm 501 * @param int $size 502 * @param int $mtime 503 * @param string $typeflag Set to '5' for directories 504 * @throws ArchiveIOException 505 */ 506 protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '') 507 { 508 // handle filename length restrictions 509 $prefix = ''; 510 $namelen = strlen($name); 511 if ($namelen > 100) { 512 $file = basename($name); 513 $dir = dirname($name); 514 if (strlen($file) > 100 || strlen($dir) > 155) { 515 // we're still too large, let's use GNU longlink 516 $this->writeRawFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L'); 517 for ($s = 0; $s < $namelen; $s += 512) { 518 $this->writebytes(pack("a512", substr($name, $s, 512))); 519 } 520 $name = substr($name, 0, 100); // cut off name 521 } else { 522 // we're fine when splitting, use POSIX ustar 523 $prefix = $dir; 524 $name = $file; 525 } 526 } 527 528 // values are needed in octal 529 $uid = sprintf("%6s ", decoct($uid)); 530 $gid = sprintf("%6s ", decoct($gid)); 531 $perm = sprintf("%6s ", decoct($perm)); 532 $size = sprintf("%11s ", decoct($size)); 533 $mtime = sprintf("%11s", decoct($mtime)); 534 535 $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime); 536 $data_last = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, ""); 537 538 for ($i = 0, $chks = 0; $i < 148; $i++) { 539 $chks += ord($data_first[$i]); 540 } 541 542 for ($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++) { 543 $chks += ord($data_last[$j]); 544 } 545 546 $this->writebytes($data_first); 547 548 $chks = pack("a8", sprintf("%6s ", decoct($chks))); 549 $this->writebytes($chks.$data_last); 550 } 551 552 /** 553 * Decode the given tar file header 554 * 555 * @param string $block a 512 byte block containing the header data 556 * @return array|false returns false when this was a null block 557 * @throws ArchiveCorruptedException 558 */ 559 protected function parseHeader($block) 560 { 561 if (!$block || strlen($block) != 512) { 562 throw new ArchiveCorruptedException('Unexpected length of header'); 563 } 564 565 // null byte blocks are ignored 566 if(trim($block) === '') return false; 567 568 for ($i = 0, $chks = 0; $i < 148; $i++) { 569 $chks += ord($block[$i]); 570 } 571 572 for ($i = 156, $chks += 256; $i < 512; $i++) { 573 $chks += ord($block[$i]); 574 } 575 576 $header = @unpack( 577 "a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix", 578 $block 579 ); 580 if (!$header) { 581 throw new ArchiveCorruptedException('Failed to parse header'); 582 } 583 584 $return['checksum'] = OctDec(trim($header['checksum'])); 585 if ($return['checksum'] != $chks) { 586 throw new ArchiveCorruptedException('Header does not match its checksum'); 587 } 588 589 $return['filename'] = trim($header['filename']); 590 $return['perm'] = OctDec(trim($header['perm'])); 591 $return['uid'] = OctDec(trim($header['uid'])); 592 $return['gid'] = OctDec(trim($header['gid'])); 593 $return['size'] = OctDec(trim($header['size'])); 594 $return['mtime'] = OctDec(trim($header['mtime'])); 595 $return['typeflag'] = $header['typeflag']; 596 $return['link'] = trim($header['link']); 597 $return['uname'] = trim($header['uname']); 598 $return['gname'] = trim($header['gname']); 599 600 // Handle ustar Posix compliant path prefixes 601 if (trim($header['prefix'])) { 602 $return['filename'] = trim($header['prefix']).'/'.$return['filename']; 603 } 604 605 // Handle Long-Link entries from GNU Tar 606 if ($return['typeflag'] == 'L') { 607 // following data block(s) is the filename 608 $filename = trim($this->readbytes(ceil($return['size'] / 512) * 512)); 609 // next block is the real header 610 $block = $this->readbytes(512); 611 $return = $this->parseHeader($block); 612 // overwrite the filename 613 $return['filename'] = $filename; 614 } 615 616 return $return; 617 } 618 619 /** 620 * Creates a FileInfo object from the given parsed header 621 * 622 * @param $header 623 * @return FileInfo 624 */ 625 protected function header2fileinfo($header) 626 { 627 $fileinfo = new FileInfo(); 628 $fileinfo->setPath($header['filename']); 629 $fileinfo->setMode($header['perm']); 630 $fileinfo->setUid($header['uid']); 631 $fileinfo->setGid($header['gid']); 632 $fileinfo->setSize($header['size']); 633 $fileinfo->setMtime($header['mtime']); 634 $fileinfo->setOwner($header['uname']); 635 $fileinfo->setGroup($header['gname']); 636 $fileinfo->setIsdir((bool) $header['typeflag']); 637 638 return $fileinfo; 639 } 640 641 /** 642 * Checks if the given compression type is available and throws an exception if not 643 * 644 * @param $comptype 645 * @throws ArchiveIllegalCompressionException 646 */ 647 protected function compressioncheck($comptype) 648 { 649 if ($comptype === Archive::COMPRESS_GZIP && !function_exists('gzopen')) { 650 throw new ArchiveIllegalCompressionException('No gzip support available'); 651 } 652 653 if ($comptype === Archive::COMPRESS_BZIP && !function_exists('bzopen')) { 654 throw new ArchiveIllegalCompressionException('No bzip2 support available'); 655 } 656 } 657 658 /** 659 * Guesses the wanted compression from the given file 660 * 661 * Uses magic bytes for existing files, the file extension otherwise 662 * 663 * You don't need to call this yourself. It's used when you pass Archive::COMPRESS_AUTO somewhere 664 * 665 * @param string $file 666 * @return int 667 */ 668 public function filetype($file) 669 { 670 // for existing files, try to read the magic bytes 671 if(file_exists($file) && is_readable($file) && filesize($file) > 5) { 672 $fh = @fopen($file, 'rb'); 673 if(!$fh) return false; 674 $magic = fread($fh, 5); 675 fclose($fh); 676 677 if(strpos($magic, "\x42\x5a") === 0) return Archive::COMPRESS_BZIP; 678 if(strpos($magic, "\x1f\x8b") === 0) return Archive::COMPRESS_GZIP; 679 } 680 681 // otherwise rely on file name 682 $file = strtolower($file); 683 if (substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') { 684 return Archive::COMPRESS_GZIP; 685 } elseif (substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') { 686 return Archive::COMPRESS_BZIP; 687 } 688 689 return Archive::COMPRESS_NONE; 690 } 691 692} 693