1<?php 2 3namespace splitbrain\PHPArchive; 4 5/** 6 * Class Tar 7 * 8 * Creates or extracts Tar archives. Supports gz and bzip compression 9 * 10 * Long pathnames (>100 chars) are supported in POSIX ustar and GNU longlink formats. 11 * 12 * @author Andreas Gohr <andi@splitbrain.org> 13 * @package splitbrain\PHPArchive 14 * @license MIT 15 */ 16class Tar extends Archive 17{ 18 19 protected $file = ''; 20 protected $comptype = Archive::COMPRESS_AUTO; 21 protected $complevel = 9; 22 protected $fh; 23 protected $memory = ''; 24 protected $closed = true; 25 protected $writeaccess = false; 26 27 /** 28 * Sets the compression to use 29 * 30 * @param int $level Compression level (0 to 9) 31 * @param int $type Type of compression to use (use COMPRESS_* constants) 32 * @return mixed 33 */ 34 public function setCompression($level = 9, $type = Archive::COMPRESS_AUTO) 35 { 36 $this->compressioncheck($type); 37 $this->comptype = $type; 38 $this->complevel = $level; 39 } 40 41 /** 42 * Open an existing TAR file for reading 43 * 44 * @param string $file 45 * @throws ArchiveIOException 46 */ 47 public function open($file) 48 { 49 $this->file = $file; 50 51 // update compression to mach file 52 if ($this->comptype == Tar::COMPRESS_AUTO) { 53 $this->setCompression($this->complevel, $this->filetype($file)); 54 } 55 56 // open file handles 57 if ($this->comptype === Archive::COMPRESS_GZIP) { 58 $this->fh = @gzopen($this->file, 'rb'); 59 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 60 $this->fh = @bzopen($this->file, 'r'); 61 } else { 62 $this->fh = @fopen($this->file, 'rb'); 63 } 64 65 if (!$this->fh) { 66 throw new ArchiveIOException('Could not open file for reading: '.$this->file); 67 } 68 $this->closed = false; 69 } 70 71 /** 72 * Read the contents of a TAR archive 73 * 74 * This function lists the files stored in the archive 75 * 76 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. 77 * Reopen the file with open() again if you want to do additional operations 78 * 79 * @throws ArchiveIOException 80 * @returns FileInfo[] 81 */ 82 public function contents() 83 { 84 if ($this->closed || !$this->file) { 85 throw new ArchiveIOException('Can not read from a closed archive'); 86 } 87 88 $result = array(); 89 while ($read = $this->readbytes(512)) { 90 $header = $this->parseHeader($read); 91 if (!is_array($header)) { 92 continue; 93 } 94 95 $this->skipbytes(ceil($header['size'] / 512) * 512); 96 $result[] = $this->header2fileinfo($header); 97 } 98 99 $this->close(); 100 return $result; 101 } 102 103 /** 104 * Extract an existing TAR archive 105 * 106 * The $strip parameter allows you to strip a certain number of path components from the filenames 107 * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when 108 * an integer is passed as $strip. 109 * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix, 110 * the prefix will be stripped. It is recommended to give prefixes with a trailing slash. 111 * 112 * By default this will extract all files found in the archive. You can restrict the output using the $include 113 * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If 114 * $include is set only files that match this expression will be extracted. Files that match the $exclude 115 * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against 116 * stripped filenames as described above. 117 * 118 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams. 119 * Reopen the file with open() again if you want to do additional operations 120 * 121 * @param string $outdir the target directory for extracting 122 * @param int|string $strip either the number of path components or a fixed prefix to strip 123 * @param string $exclude a regular expression of files to exclude 124 * @param string $include a regular expression of files to include 125 * @throws ArchiveIOException 126 * @return FileInfo[] 127 */ 128 public function extract($outdir, $strip = '', $exclude = '', $include = '') 129 { 130 if ($this->closed || !$this->file) { 131 throw new ArchiveIOException('Can not read from a closed archive'); 132 } 133 134 $outdir = rtrim($outdir, '/'); 135 @mkdir($outdir, 0777, true); 136 if (!is_dir($outdir)) { 137 throw new ArchiveIOException("Could not create directory '$outdir'"); 138 } 139 140 $extracted = array(); 141 while ($dat = $this->readbytes(512)) { 142 // read the file header 143 $header = $this->parseHeader($dat); 144 if (!is_array($header)) { 145 continue; 146 } 147 $fileinfo = $this->header2fileinfo($header); 148 149 // apply strip rules 150 $fileinfo->strip($strip); 151 152 // skip unwanted files 153 if (!strlen($fileinfo->getPath()) || !$fileinfo->match($include, $exclude)) { 154 $this->skipbytes(ceil($header['size'] / 512) * 512); 155 continue; 156 } 157 158 // create output directory 159 $output = $outdir.'/'.$fileinfo->getPath(); 160 $directory = ($fileinfo->getIsdir()) ? $output : dirname($output); 161 @mkdir($directory, 0777, true); 162 163 // extract data 164 if (!$fileinfo->getIsdir()) { 165 $fp = fopen($output, "wb"); 166 if (!$fp) { 167 throw new ArchiveIOException('Could not open file for writing: '.$output); 168 } 169 170 $size = floor($header['size'] / 512); 171 for ($i = 0; $i < $size; $i++) { 172 fwrite($fp, $this->readbytes(512), 512); 173 } 174 if (($header['size'] % 512) != 0) { 175 fwrite($fp, $this->readbytes(512), $header['size'] % 512); 176 } 177 178 fclose($fp); 179 touch($output, $fileinfo->getMtime()); 180 chmod($output, $fileinfo->getMode()); 181 } else { 182 $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories 183 } 184 185 $extracted[] = $fileinfo; 186 } 187 188 $this->close(); 189 return $extracted; 190 } 191 192 /** 193 * Create a new TAR file 194 * 195 * If $file is empty, the tar file will be created in memory 196 * 197 * @param string $file 198 * @throws ArchiveIOException 199 */ 200 public function create($file = '') 201 { 202 $this->file = $file; 203 $this->memory = ''; 204 $this->fh = 0; 205 206 if ($this->file) { 207 // determine compression 208 if ($this->comptype == Archive::COMPRESS_AUTO) { 209 $this->setCompression($this->complevel, $this->filetype($file)); 210 } 211 212 if ($this->comptype === Archive::COMPRESS_GZIP) { 213 $this->fh = @gzopen($this->file, 'wb'.$this->complevel); 214 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 215 $this->fh = @bzopen($this->file, 'w'); 216 } else { 217 $this->fh = @fopen($this->file, 'wb'); 218 } 219 220 if (!$this->fh) { 221 throw new ArchiveIOException('Could not open file for writing: '.$this->file); 222 } 223 } 224 $this->writeaccess = true; 225 $this->closed = false; 226 } 227 228 /** 229 * Add a file to the current TAR archive using an existing file in the filesystem 230 * 231 * @param string $file path to the original file 232 * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data, empty to take from original 233 * @throws ArchiveIOException 234 */ 235 public function addFile($file, $fileinfo = '') 236 { 237 if (is_string($fileinfo)) { 238 $fileinfo = FileInfo::fromPath($file, $fileinfo); 239 } 240 241 if ($this->closed) { 242 throw new ArchiveIOException('Archive has been closed, files can no longer be added'); 243 } 244 245 $fp = fopen($file, 'rb'); 246 if (!$fp) { 247 throw new ArchiveIOException('Could not open file for reading: '.$file); 248 } 249 250 // create file header 251 $this->writeFileHeader($fileinfo); 252 253 // write data 254 while (!feof($fp)) { 255 $data = fread($fp, 512); 256 if ($data === false) { 257 break; 258 } 259 if ($data === '') { 260 break; 261 } 262 $packed = pack("a512", $data); 263 $this->writebytes($packed); 264 } 265 fclose($fp); 266 } 267 268 /** 269 * Add a file to the current TAR archive using the given $data as content 270 * 271 * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data 272 * @param string $data binary content of the file to add 273 * @throws ArchiveIOException 274 */ 275 public function addData($fileinfo, $data) 276 { 277 if (is_string($fileinfo)) { 278 $fileinfo = new FileInfo($fileinfo); 279 } 280 281 if ($this->closed) { 282 throw new ArchiveIOException('Archive has been closed, files can no longer be added'); 283 } 284 285 $len = strlen($data); 286 $fileinfo->setSize($len); 287 $this->writeFileHeader($fileinfo); 288 289 for ($s = 0; $s < $len; $s += 512) { 290 $this->writebytes(pack("a512", substr($data, $s, 512))); 291 } 292 } 293 294 /** 295 * Add the closing footer to the archive if in write mode, close all file handles 296 * 297 * After a call to this function no more data can be added to the archive, for 298 * read access no reading is allowed anymore 299 * 300 * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which 301 * consists of two 512 blocks of zero bytes" 302 * 303 * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134 304 */ 305 public function close() 306 { 307 if ($this->closed) { 308 return; 309 } // we did this already 310 311 // write footer 312 if ($this->writeaccess) { 313 $this->writebytes(pack("a512", "")); 314 $this->writebytes(pack("a512", "")); 315 } 316 317 // close file handles 318 if ($this->file) { 319 if ($this->comptype === Archive::COMPRESS_GZIP) { 320 gzclose($this->fh); 321 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 322 bzclose($this->fh); 323 } else { 324 fclose($this->fh); 325 } 326 327 $this->file = ''; 328 $this->fh = 0; 329 } 330 331 $this->writeaccess = false; 332 $this->closed = true; 333 } 334 335 /** 336 * Returns the created in-memory archive data 337 * 338 * This implicitly calls close() on the Archive 339 */ 340 public function getArchive() 341 { 342 $this->close(); 343 344 if ($this->comptype === Archive::COMPRESS_AUTO) { 345 $this->comptype = Archive::COMPRESS_NONE; 346 } 347 348 if ($this->comptype === Archive::COMPRESS_GZIP) { 349 return gzcompress($this->memory, $this->complevel); 350 } 351 if ($this->comptype === Archive::COMPRESS_BZIP) { 352 return bzcompress($this->memory); 353 } 354 return $this->memory; 355 } 356 357 /** 358 * Save the created in-memory archive data 359 * 360 * Note: It more memory effective to specify the filename in the create() function and 361 * let the library work on the new file directly. 362 * 363 * @param string $file 364 * @throws ArchiveIOException 365 */ 366 public function save($file) 367 { 368 if ($this->comptype === Archive::COMPRESS_AUTO) { 369 $this->setCompression($this->filetype($this->complevel, $file)); 370 } 371 372 if (!file_put_contents($file, $this->getArchive())) { 373 throw new ArchiveIOException('Could not write to file: '.$file); 374 } 375 } 376 377 /** 378 * Read from the open file pointer 379 * 380 * @param int $length bytes to read 381 * @return string 382 */ 383 protected function readbytes($length) 384 { 385 if ($this->comptype === Archive::COMPRESS_GZIP) { 386 return @gzread($this->fh, $length); 387 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 388 return @bzread($this->fh, $length); 389 } else { 390 return @fread($this->fh, $length); 391 } 392 } 393 394 /** 395 * Write to the open filepointer or memory 396 * 397 * @param string $data 398 * @throws ArchiveIOException 399 * @return int number of bytes written 400 */ 401 protected function writebytes($data) 402 { 403 if (!$this->file) { 404 $this->memory .= $data; 405 $written = strlen($data); 406 } elseif ($this->comptype === Archive::COMPRESS_GZIP) { 407 $written = @gzwrite($this->fh, $data); 408 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 409 $written = @bzwrite($this->fh, $data); 410 } else { 411 $written = @fwrite($this->fh, $data); 412 } 413 if ($written === false) { 414 throw new ArchiveIOException('Failed to write to archive stream'); 415 } 416 return $written; 417 } 418 419 /** 420 * Skip forward in the open file pointer 421 * 422 * This is basically a wrapper around seek() (and a workaround for bzip2) 423 * 424 * @param int $bytes seek to this position 425 */ 426 function skipbytes($bytes) 427 { 428 if ($this->comptype === Archive::COMPRESS_GZIP) { 429 @gzseek($this->fh, $bytes, SEEK_CUR); 430 } elseif ($this->comptype === Archive::COMPRESS_BZIP) { 431 // there is no seek in bzip2, we simply read on 432 @bzread($this->fh, $bytes); 433 } else { 434 @fseek($this->fh, $bytes, SEEK_CUR); 435 } 436 } 437 438 /** 439 * Write the given file metat data as header 440 * 441 * @param FileInfo $fileinfo 442 */ 443 protected function writeFileHeader(FileInfo $fileinfo) 444 { 445 $this->writeRawFileHeader( 446 $fileinfo->getPath(), 447 $fileinfo->getUid(), 448 $fileinfo->getGid(), 449 $fileinfo->getMode(), 450 $fileinfo->getSize(), 451 $fileinfo->getMtime(), 452 $fileinfo->getIsdir() ? '5' : '0' 453 ); 454 } 455 456 /** 457 * Write a file header to the stream 458 * 459 * @param string $name 460 * @param int $uid 461 * @param int $gid 462 * @param int $perm 463 * @param int $size 464 * @param int $mtime 465 * @param string $typeflag Set to '5' for directories 466 */ 467 protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '') 468 { 469 // handle filename length restrictions 470 $prefix = ''; 471 $namelen = strlen($name); 472 if ($namelen > 100) { 473 $file = basename($name); 474 $dir = dirname($name); 475 if (strlen($file) > 100 || strlen($dir) > 155) { 476 // we're still too large, let's use GNU longlink 477 $this->writeRawFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L'); 478 for ($s = 0; $s < $namelen; $s += 512) { 479 $this->writebytes(pack("a512", substr($name, $s, 512))); 480 } 481 $name = substr($name, 0, 100); // cut off name 482 } else { 483 // we're fine when splitting, use POSIX ustar 484 $prefix = $dir; 485 $name = $file; 486 } 487 } 488 489 // values are needed in octal 490 $uid = sprintf("%6s ", decoct($uid)); 491 $gid = sprintf("%6s ", decoct($gid)); 492 $perm = sprintf("%6s ", decoct($perm)); 493 $size = sprintf("%11s ", decoct($size)); 494 $mtime = sprintf("%11s", decoct($mtime)); 495 496 $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime); 497 $data_last = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, ""); 498 499 for ($i = 0, $chks = 0; $i < 148; $i++) { 500 $chks += ord($data_first[$i]); 501 } 502 503 for ($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++) { 504 $chks += ord($data_last[$j]); 505 } 506 507 $this->writebytes($data_first); 508 509 $chks = pack("a8", sprintf("%6s ", decoct($chks))); 510 $this->writebytes($chks.$data_last); 511 } 512 513 /** 514 * Decode the given tar file header 515 * 516 * @param string $block a 512 byte block containign the header data 517 * @return array|bool 518 */ 519 protected function parseHeader($block) 520 { 521 if (!$block || strlen($block) != 512) { 522 return false; 523 } 524 525 for ($i = 0, $chks = 0; $i < 148; $i++) { 526 $chks += ord($block[$i]); 527 } 528 529 for ($i = 156, $chks += 256; $i < 512; $i++) { 530 $chks += ord($block[$i]); 531 } 532 533 $header = @unpack( 534 "a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix", 535 $block 536 ); 537 if (!$header) { 538 return false; 539 } 540 541 $return['checksum'] = OctDec(trim($header['checksum'])); 542 if ($return['checksum'] != $chks) { 543 return false; 544 } 545 546 $return['filename'] = trim($header['filename']); 547 $return['perm'] = OctDec(trim($header['perm'])); 548 $return['uid'] = OctDec(trim($header['uid'])); 549 $return['gid'] = OctDec(trim($header['gid'])); 550 $return['size'] = OctDec(trim($header['size'])); 551 $return['mtime'] = OctDec(trim($header['mtime'])); 552 $return['typeflag'] = $header['typeflag']; 553 $return['link'] = trim($header['link']); 554 $return['uname'] = trim($header['uname']); 555 $return['gname'] = trim($header['gname']); 556 557 // Handle ustar Posix compliant path prefixes 558 if (trim($header['prefix'])) { 559 $return['filename'] = trim($header['prefix']).'/'.$return['filename']; 560 } 561 562 // Handle Long-Link entries from GNU Tar 563 if ($return['typeflag'] == 'L') { 564 // following data block(s) is the filename 565 $filename = trim($this->readbytes(ceil($header['size'] / 512) * 512)); 566 // next block is the real header 567 $block = $this->readbytes(512); 568 $return = $this->parseHeader($block); 569 // overwrite the filename 570 $return['filename'] = $filename; 571 } 572 573 return $return; 574 } 575 576 /** 577 * Creates a FileInfo object from the given parsed header 578 * 579 * @param $header 580 * @return FileInfo 581 */ 582 protected function header2fileinfo($header) 583 { 584 $fileinfo = new FileInfo(); 585 $fileinfo->setPath($header['filename']); 586 $fileinfo->setMode($header['perm']); 587 $fileinfo->setUid($header['uid']); 588 $fileinfo->setGid($header['gid']); 589 $fileinfo->setSize($header['size']); 590 $fileinfo->setMtime($header['mtime']); 591 $fileinfo->setOwner($header['uname']); 592 $fileinfo->setGroup($header['gname']); 593 $fileinfo->setIsdir((bool) $header['typeflag']); 594 595 return $fileinfo; 596 } 597 598 /** 599 * Checks if the given compression type is available and throws an exception if not 600 * 601 * @param $comptype 602 * @throws ArchiveIllegalCompressionException 603 */ 604 protected function compressioncheck($comptype) 605 { 606 if ($comptype === Archive::COMPRESS_GZIP && !function_exists('gzopen')) { 607 throw new ArchiveIllegalCompressionException('No gzip support available'); 608 } 609 610 if ($comptype === Archive::COMPRESS_BZIP && !function_exists('bzopen')) { 611 throw new ArchiveIllegalCompressionException('No bzip2 support available'); 612 } 613 } 614 615 /** 616 * Guesses the wanted compression from the given filename extension 617 * 618 * You don't need to call this yourself. It's used when you pass Archive::COMPRESS_AUTO somewhere 619 * 620 * @param string $file 621 * @return int 622 */ 623 public function filetype($file) 624 { 625 $file = strtolower($file); 626 if (substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') { 627 $comptype = Archive::COMPRESS_GZIP; 628 } elseif (substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') { 629 $comptype = Archive::COMPRESS_BZIP; 630 } else { 631 $comptype = Archive::COMPRESS_NONE; 632 } 633 return $comptype; 634 } 635} 636