1<?php 2 3namespace splitbrain\PHPArchive; 4 5/** 6 * Class Tar 7 * 8 * Creates or extracts Tar archives. Supports gz and bzip compression 9 * 10 * Long pathnames (>100 chars) are supported in POSIX ustar and GNU longlink formats. 11 * 12 * @author Andreas Gohr <andi@splitbrain.org> 13 * @package splitbrain\PHPArchive 14 * @license MIT 15 */ 16class Tar extends Archive 17{ 18 19 protected $file = ''; 20 protected $comptype = Archive::COMPRESS_AUTO; 21 protected $complevel = 9; 22 protected $fh; 23 protected $memory = ''; 24 protected $closed = true; 25 protected $writeaccess = false; 26 27 /** 28 * Sets the compression to use 29 * 30 * @param int $level Compression level (0 to 9) 31 * @param int $type Type of compression to use (use COMPRESS_* constants) 32 * @return mixed 33 */ 34 public function setCompression($level = 9, $type = Archive::COMPRESS_AUTO) 35 { 36 $this->compressioncheck($type); 37 $this->comptype = $type; 38 $this->complevel = $level; 39 if($level == 0) $this->comptype = Archive::COMPRESS_NONE; 40 if($type == Archive::COMPRESS_NONE) $this->complevel = 0; 41 } 42 43 /** 44 * Open an existing TAR file for reading 45 * 46 * @param string $file 47 * @throws ArchiveIOException 48 */ 49 public function open($file) 50 { 51 $this->file = $file; 52 53 // update compression to mach file 54 if ($this->comptype == Tar::COMPRESS_AUTO) { 55 $this->setCompression($this->complevel, $this->filetype($file)); 56 } 57 58 // open file handles 59 if ($this->comptype === Archive::COMPRESS_GZIP) { 60 $this->fh = @gzopen($this->file, 'rb'); 61 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 62 $this->fh = @bzopen($this->file, 'r'); 63 } else { 64 $this->fh = @fopen($this->file, 'rb'); 65 } 66 67 if (!$this->fh) { 68 throw new ArchiveIOException('Could not open file for reading: '.$this->file); 69 } 70 $this->closed = false; 71 } 72 73 /** 74 * Read the contents of a TAR archive 75 * 76 * This function lists the files stored in the archive 77 * 78 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. 79 * Reopen the file with open() again if you want to do additional operations 80 * 81 * @throws ArchiveIOException 82 * @returns FileInfo[] 83 */ 84 public function contents() 85 { 86 if ($this->closed || !$this->file) { 87 throw new ArchiveIOException('Can not read from a closed archive'); 88 } 89 90 $result = array(); 91 while ($read = $this->readbytes(512)) { 92 $header = $this->parseHeader($read); 93 if (!is_array($header)) { 94 continue; 95 } 96 97 $this->skipbytes(ceil($header['size'] / 512) * 512); 98 $result[] = $this->header2fileinfo($header); 99 } 100 101 $this->close(); 102 return $result; 103 } 104 105 /** 106 * Extract an existing TAR archive 107 * 108 * The $strip parameter allows you to strip a certain number of path components from the filenames 109 * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when 110 * an integer is passed as $strip. 111 * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix, 112 * the prefix will be stripped. It is recommended to give prefixes with a trailing slash. 113 * 114 * By default this will extract all files found in the archive. You can restrict the output using the $include 115 * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If 116 * $include is set only files that match this expression will be extracted. Files that match the $exclude 117 * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against 118 * stripped filenames as described above. 119 * 120 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. 121 * Reopen the file with open() again if you want to do additional operations 122 * 123 * @param string $outdir the target directory for extracting 124 * @param int|string $strip either the number of path components or a fixed prefix to strip 125 * @param string $exclude a regular expression of files to exclude 126 * @param string $include a regular expression of files to include 127 * @throws ArchiveIOException 128 * @return FileInfo[] 129 */ 130 public function extract($outdir, $strip = '', $exclude = '', $include = '') 131 { 132 if ($this->closed || !$this->file) { 133 throw new ArchiveIOException('Can not read from a closed archive'); 134 } 135 136 $outdir = rtrim($outdir, '/'); 137 @mkdir($outdir, 0777, true); 138 if (!is_dir($outdir)) { 139 throw new ArchiveIOException("Could not create directory '$outdir'"); 140 } 141 142 $extracted = array(); 143 while ($dat = $this->readbytes(512)) { 144 // read the file header 145 $header = $this->parseHeader($dat); 146 if (!is_array($header)) { 147 continue; 148 } 149 $fileinfo = $this->header2fileinfo($header); 150 151 // apply strip rules 152 $fileinfo->strip($strip); 153 154 // skip unwanted files 155 if (!strlen($fileinfo->getPath()) || !$fileinfo->match($include, $exclude)) { 156 $this->skipbytes(ceil($header['size'] / 512) * 512); 157 continue; 158 } 159 160 // create output directory 161 $output = $outdir.'/'.$fileinfo->getPath(); 162 $directory = ($fileinfo->getIsdir()) ? $output : dirname($output); 163 @mkdir($directory, 0777, true); 164 165 // extract data 166 if (!$fileinfo->getIsdir()) { 167 $fp = @fopen($output, "wb"); 168 if (!$fp) { 169 throw new ArchiveIOException('Could not open file for writing: '.$output); 170 } 171 172 $size = floor($header['size'] / 512); 173 for ($i = 0; $i < $size; $i++) { 174 fwrite($fp, $this->readbytes(512), 512); 175 } 176 if (($header['size'] % 512) != 0) { 177 fwrite($fp, $this->readbytes(512), $header['size'] % 512); 178 } 179 180 fclose($fp); 181 touch($output, $fileinfo->getMtime()); 182 chmod($output, $fileinfo->getMode()); 183 } else { 184 $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories 185 } 186 187 $extracted[] = $fileinfo; 188 } 189 190 $this->close(); 191 return $extracted; 192 } 193 194 /** 195 * Create a new TAR file 196 * 197 * If $file is empty, the tar file will be created in memory 198 * 199 * @param string $file 200 * @throws ArchiveIOException 201 */ 202 public function create($file = '') 203 { 204 $this->file = $file; 205 $this->memory = ''; 206 $this->fh = 0; 207 208 if ($this->file) { 209 // determine compression 210 if ($this->comptype == Archive::COMPRESS_AUTO) { 211 $this->setCompression($this->complevel, $this->filetype($file)); 212 } 213 214 if ($this->comptype === Archive::COMPRESS_GZIP) { 215 $this->fh = @gzopen($this->file, 'wb'.$this->complevel); 216 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 217 $this->fh = @bzopen($this->file, 'w'); 218 } else { 219 $this->fh = @fopen($this->file, 'wb'); 220 } 221 222 if (!$this->fh) { 223 throw new ArchiveIOException('Could not open file for writing: '.$this->file); 224 } 225 } 226 $this->writeaccess = true; 227 $this->closed = false; 228 } 229 230 /** 231 * Add a file to the current TAR archive using an existing file in the filesystem 232 * 233 * @param string $file path to the original file 234 * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data, empty to take from original 235 * @throws ArchiveCorruptedException when the file changes while reading it, the archive will be corrupt and should be deleted 236 * @throws ArchiveIOException there was trouble reading the given file, it was not added 237 */ 238 public function addFile($file, $fileinfo = '') 239 { 240 if (is_string($fileinfo)) { 241 $fileinfo = FileInfo::fromPath($file, $fileinfo); 242 } 243 244 if ($this->closed) { 245 throw new ArchiveIOException('Archive has been closed, files can no longer be added'); 246 } 247 248 $fp = @fopen($file, 'rb'); 249 if (!$fp) { 250 throw new ArchiveIOException('Could not open file for reading: '.$file); 251 } 252 253 // create file header 254 $this->writeFileHeader($fileinfo); 255 256 // write data 257 $read = 0; 258 while (!feof($fp)) { 259 $data = fread($fp, 512); 260 $read += strlen($data); 261 if ($data === false) { 262 break; 263 } 264 if ($data === '') { 265 break; 266 } 267 $packed = pack("a512", $data); 268 $this->writebytes($packed); 269 } 270 fclose($fp); 271 272 if($read != $fileinfo->getSize()) { 273 $this->close(); 274 throw new ArchiveCorruptedException("The size of $file changed while reading, archive corrupted. read $read expected ".$fileinfo->getSize()); 275 } 276 } 277 278 /** 279 * Add a file to the current TAR archive using the given $data as content 280 * 281 * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data 282 * @param string $data binary content of the file to add 283 * @throws ArchiveIOException 284 */ 285 public function addData($fileinfo, $data) 286 { 287 if (is_string($fileinfo)) { 288 $fileinfo = new FileInfo($fileinfo); 289 } 290 291 if ($this->closed) { 292 throw new ArchiveIOException('Archive has been closed, files can no longer be added'); 293 } 294 295 $len = strlen($data); 296 $fileinfo->setSize($len); 297 $this->writeFileHeader($fileinfo); 298 299 for ($s = 0; $s < $len; $s += 512) { 300 $this->writebytes(pack("a512", substr($data, $s, 512))); 301 } 302 } 303 304 /** 305 * Add the closing footer to the archive if in write mode, close all file handles 306 * 307 * After a call to this function no more data can be added to the archive, for 308 * read access no reading is allowed anymore 309 * 310 * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which 311 * consists of two 512 blocks of zero bytes" 312 * 313 * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134 314 */ 315 public function close() 316 { 317 if ($this->closed) { 318 return; 319 } // we did this already 320 321 // write footer 322 if ($this->writeaccess) { 323 $this->writebytes(pack("a512", "")); 324 $this->writebytes(pack("a512", "")); 325 } 326 327 // close file handles 328 if ($this->file) { 329 if ($this->comptype === Archive::COMPRESS_GZIP) { 330 gzclose($this->fh); 331 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 332 bzclose($this->fh); 333 } else { 334 fclose($this->fh); 335 } 336 337 $this->file = ''; 338 $this->fh = 0; 339 } 340 341 $this->writeaccess = false; 342 $this->closed = true; 343 } 344 345 /** 346 * Returns the created in-memory archive data 347 * 348 * This implicitly calls close() on the Archive 349 */ 350 public function getArchive() 351 { 352 $this->close(); 353 354 if ($this->comptype === Archive::COMPRESS_AUTO) { 355 $this->comptype = Archive::COMPRESS_NONE; 356 } 357 358 if ($this->comptype === Archive::COMPRESS_GZIP) { 359 return gzencode($this->memory, $this->complevel); 360 } 361 if ($this->comptype === Archive::COMPRESS_BZIP) { 362 return bzcompress($this->memory); 363 } 364 return $this->memory; 365 } 366 367 /** 368 * Save the created in-memory archive data 369 * 370 * Note: It more memory effective to specify the filename in the create() function and 371 * let the library work on the new file directly. 372 * 373 * @param string $file 374 * @throws ArchiveIOException 375 */ 376 public function save($file) 377 { 378 if ($this->comptype === Archive::COMPRESS_AUTO) { 379 $this->setCompression($this->complevel, $this->filetype($file)); 380 } 381 382 if (!@file_put_contents($file, $this->getArchive())) { 383 throw new ArchiveIOException('Could not write to file: '.$file); 384 } 385 } 386 387 /** 388 * Read from the open file pointer 389 * 390 * @param int $length bytes to read 391 * @return string 392 */ 393 protected function readbytes($length) 394 { 395 if ($this->comptype === Archive::COMPRESS_GZIP) { 396 return @gzread($this->fh, $length); 397 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 398 return @bzread($this->fh, $length); 399 } else { 400 return @fread($this->fh, $length); 401 } 402 } 403 404 /** 405 * Write to the open filepointer or memory 406 * 407 * @param string $data 408 * @throws ArchiveIOException 409 * @return int number of bytes written 410 */ 411 protected function writebytes($data) 412 { 413 if (!$this->file) { 414 $this->memory .= $data; 415 $written = strlen($data); 416 } elseif ($this->comptype === Archive::COMPRESS_GZIP) { 417 $written = @gzwrite($this->fh, $data); 418 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 419 $written = @bzwrite($this->fh, $data); 420 } else { 421 $written = @fwrite($this->fh, $data); 422 } 423 if ($written === false) { 424 throw new ArchiveIOException('Failed to write to archive stream'); 425 } 426 return $written; 427 } 428 429 /** 430 * Skip forward in the open file pointer 431 * 432 * This is basically a wrapper around seek() (and a workaround for bzip2) 433 * 434 * @param int $bytes seek to this position 435 */ 436 protected function skipbytes($bytes) 437 { 438 if ($this->comptype === Archive::COMPRESS_GZIP) { 439 @gzseek($this->fh, $bytes, SEEK_CUR); 440 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 441 // there is no seek in bzip2, we simply read on 442 // bzread allows to read a max of 8kb at once 443 while($bytes) { 444 $toread = min(8192, $bytes); 445 @bzread($this->fh, $toread); 446 $bytes -= $toread; 447 } 448 } else { 449 @fseek($this->fh, $bytes, SEEK_CUR); 450 } 451 } 452 453 /** 454 * Write the given file metat data as header 455 * 456 * @param FileInfo $fileinfo 457 */ 458 protected function writeFileHeader(FileInfo $fileinfo) 459 { 460 $this->writeRawFileHeader( 461 $fileinfo->getPath(), 462 $fileinfo->getUid(), 463 $fileinfo->getGid(), 464 $fileinfo->getMode(), 465 $fileinfo->getSize(), 466 $fileinfo->getMtime(), 467 $fileinfo->getIsdir() ? '5' : '0' 468 ); 469 } 470 471 /** 472 * Write a file header to the stream 473 * 474 * @param string $name 475 * @param int $uid 476 * @param int $gid 477 * @param int $perm 478 * @param int $size 479 * @param int $mtime 480 * @param string $typeflag Set to '5' for directories 481 */ 482 protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '') 483 { 484 // handle filename length restrictions 485 $prefix = ''; 486 $namelen = strlen($name); 487 if ($namelen > 100) { 488 $file = basename($name); 489 $dir = dirname($name); 490 if (strlen($file) > 100 || strlen($dir) > 155) { 491 // we're still too large, let's use GNU longlink 492 $this->writeRawFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L'); 493 for ($s = 0; $s < $namelen; $s += 512) { 494 $this->writebytes(pack("a512", substr($name, $s, 512))); 495 } 496 $name = substr($name, 0, 100); // cut off name 497 } else { 498 // we're fine when splitting, use POSIX ustar 499 $prefix = $dir; 500 $name = $file; 501 } 502 } 503 504 // values are needed in octal 505 $uid = sprintf("%6s ", decoct($uid)); 506 $gid = sprintf("%6s ", decoct($gid)); 507 $perm = sprintf("%6s ", decoct($perm)); 508 $size = sprintf("%11s ", decoct($size)); 509 $mtime = sprintf("%11s", decoct($mtime)); 510 511 $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime); 512 $data_last = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, ""); 513 514 for ($i = 0, $chks = 0; $i < 148; $i++) { 515 $chks += ord($data_first[$i]); 516 } 517 518 for ($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++) { 519 $chks += ord($data_last[$j]); 520 } 521 522 $this->writebytes($data_first); 523 524 $chks = pack("a8", sprintf("%6s ", decoct($chks))); 525 $this->writebytes($chks.$data_last); 526 } 527 528 /** 529 * Decode the given tar file header 530 * 531 * @param string $block a 512 byte block containing the header data 532 * @return array|false returns false when this was a null block 533 * @throws ArchiveCorruptedException 534 */ 535 protected function parseHeader($block) 536 { 537 if (!$block || strlen($block) != 512) { 538 throw new ArchiveCorruptedException('Unexpected length of header'); 539 } 540 541 // null byte blocks are ignored 542 if(trim($block) === '') return false; 543 544 for ($i = 0, $chks = 0; $i < 148; $i++) { 545 $chks += ord($block[$i]); 546 } 547 548 for ($i = 156, $chks += 256; $i < 512; $i++) { 549 $chks += ord($block[$i]); 550 } 551 552 $header = @unpack( 553 "a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix", 554 $block 555 ); 556 if (!$header) { 557 throw new ArchiveCorruptedException('Failed to parse header'); 558 } 559 560 $return['checksum'] = OctDec(trim($header['checksum'])); 561 if ($return['checksum'] != $chks) { 562 throw new ArchiveCorruptedException('Header does not match it\'s checksum'); 563 } 564 565 $return['filename'] = trim($header['filename']); 566 $return['perm'] = OctDec(trim($header['perm'])); 567 $return['uid'] = OctDec(trim($header['uid'])); 568 $return['gid'] = OctDec(trim($header['gid'])); 569 $return['size'] = OctDec(trim($header['size'])); 570 $return['mtime'] = OctDec(trim($header['mtime'])); 571 $return['typeflag'] = $header['typeflag']; 572 $return['link'] = trim($header['link']); 573 $return['uname'] = trim($header['uname']); 574 $return['gname'] = trim($header['gname']); 575 576 // Handle ustar Posix compliant path prefixes 577 if (trim($header['prefix'])) { 578 $return['filename'] = trim($header['prefix']).'/'.$return['filename']; 579 } 580 581 // Handle Long-Link entries from GNU Tar 582 if ($return['typeflag'] == 'L') { 583 // following data block(s) is the filename 584 $filename = trim($this->readbytes(ceil($return['size'] / 512) * 512)); 585 // next block is the real header 586 $block = $this->readbytes(512); 587 $return = $this->parseHeader($block); 588 // overwrite the filename 589 $return['filename'] = $filename; 590 } 591 592 return $return; 593 } 594 595 /** 596 * Creates a FileInfo object from the given parsed header 597 * 598 * @param $header 599 * @return FileInfo 600 */ 601 protected function header2fileinfo($header) 602 { 603 $fileinfo = new FileInfo(); 604 $fileinfo->setPath($header['filename']); 605 $fileinfo->setMode($header['perm']); 606 $fileinfo->setUid($header['uid']); 607 $fileinfo->setGid($header['gid']); 608 $fileinfo->setSize($header['size']); 609 $fileinfo->setMtime($header['mtime']); 610 $fileinfo->setOwner($header['uname']); 611 $fileinfo->setGroup($header['gname']); 612 $fileinfo->setIsdir((bool) $header['typeflag']); 613 614 return $fileinfo; 615 } 616 617 /** 618 * Checks if the given compression type is available and throws an exception if not 619 * 620 * @param $comptype 621 * @throws ArchiveIllegalCompressionException 622 */ 623 protected function compressioncheck($comptype) 624 { 625 if ($comptype === Archive::COMPRESS_GZIP && !function_exists('gzopen')) { 626 throw new ArchiveIllegalCompressionException('No gzip support available'); 627 } 628 629 if ($comptype === Archive::COMPRESS_BZIP && !function_exists('bzopen')) { 630 throw new ArchiveIllegalCompressionException('No bzip2 support available'); 631 } 632 } 633 634 /** 635 * Guesses the wanted compression from the given file 636 * 637 * Uses magic bytes for existing files, the file extension otherwise 638 * 639 * You don't need to call this yourself. It's used when you pass Archive::COMPRESS_AUTO somewhere 640 * 641 * @param string $file 642 * @return int 643 */ 644 public function filetype($file) 645 { 646 // for existing files, try to read the magic bytes 647 if(file_exists($file) && is_readable($file) && filesize($file) > 5) { 648 $fh = @fopen($file, 'rb'); 649 if(!$fh) return false; 650 $magic = fread($fh, 5); 651 fclose($fh); 652 653 if(strpos($magic, "\x42\x5a") === 0) return Archive::COMPRESS_BZIP; 654 if(strpos($magic, "\x1f\x8b") === 0) return Archive::COMPRESS_GZIP; 655 } 656 657 // otherwise rely on file name 658 $file = strtolower($file); 659 if (substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') { 660 return Archive::COMPRESS_GZIP; 661 } elseif (substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') { 662 return Archive::COMPRESS_BZIP; 663 } 664 665 return Archive::COMPRESS_NONE; 666 } 667} 668