1<?php
2
3namespace splitbrain\PHPArchive;
4
5/**
6 * Class Tar
7 *
8 * Creates or extracts Tar archives. Supports gz and bzip compression
9 *
10 * Long pathnames (>100 chars) are supported in POSIX ustar and GNU longlink formats.
11 *
12 * @author  Andreas Gohr <andi@splitbrain.org>
13 * @package splitbrain\PHPArchive
14 * @license MIT
15 */
16class Tar extends Archive
17{
18
19    protected $file = '';
20    protected $comptype = Archive::COMPRESS_AUTO;
21    protected $complevel = 9;
22    protected $fh;
23    protected $memory = '';
24    protected $closed = true;
25    protected $writeaccess = false;
26
27    /**
28     * Sets the compression to use
29     *
30     * @param int $level Compression level (0 to 9)
31     * @param int $type Type of compression to use (use COMPRESS_* constants)
32     * @throws ArchiveIllegalCompressionException
33     */
34    public function setCompression($level = 9, $type = Archive::COMPRESS_AUTO)
35    {
36        $this->compressioncheck($type);
37        if ($level < -1 || $level > 9) {
38            throw new ArchiveIllegalCompressionException('Compression level should be between -1 and 9');
39        }
40        $this->comptype  = $type;
41        $this->complevel = $level;
42        if($level == 0) $this->comptype = Archive::COMPRESS_NONE;
43        if($type == Archive::COMPRESS_NONE) $this->complevel = 0;
44    }
45
46    /**
47     * Open an existing TAR file for reading
48     *
49     * @param string $file
50     * @throws ArchiveIOException
51     * @throws ArchiveIllegalCompressionException
52     */
53    public function open($file)
54    {
55        $this->file = $file;
56
57        // update compression to mach file
58        if ($this->comptype == Tar::COMPRESS_AUTO) {
59            $this->setCompression($this->complevel, $this->filetype($file));
60        }
61
62        // open file handles
63        if ($this->comptype === Archive::COMPRESS_GZIP) {
64            $this->fh = @gzopen($this->file, 'rb');
65        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
66            $this->fh = @bzopen($this->file, 'r');
67        } else {
68            $this->fh = @fopen($this->file, 'rb');
69        }
70
71        if (!$this->fh) {
72            throw new ArchiveIOException('Could not open file for reading: '.$this->file);
73        }
74        $this->closed = false;
75    }
76
77    /**
78     * Read the contents of a TAR archive
79     *
80     * This function lists the files stored in the archive
81     *
82     * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams.
83     * Reopen the file with open() again if you want to do additional operations
84     *
85     * @throws ArchiveIOException
86     * @throws ArchiveCorruptedException
87     * @returns FileInfo[]
88     */
89    public function contents()
90    {
91        $result = array();
92
93        foreach ($this->yieldContents() as $fileinfo) {
94            $result[] = $fileinfo;
95        }
96
97        return $result;
98    }
99
100    /**
101     * Read the contents of a TAR archive and return each entry using yield
102     * for memory efficiency.
103     *
104     * @see contents()
105     * @throws ArchiveIOException
106     * @throws ArchiveCorruptedException
107     * @returns FileInfo[]
108     */
109    public function yieldContents()
110    {
111        if ($this->closed || !$this->file) {
112            throw new ArchiveIOException('Can not read from a closed archive');
113        }
114
115        while ($read = $this->readbytes(512)) {
116            $header = $this->parseHeader($read);
117            if (!is_array($header)) {
118                continue;
119            }
120
121            $this->skipbytes(ceil($header['size'] / 512) * 512);
122            yield $this->header2fileinfo($header);
123        }
124
125        $this->close();
126
127    }
128
129    /**
130     * Extract an existing TAR archive
131     *
132     * The $strip parameter allows you to strip a certain number of path components from the filenames
133     * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when
134     * an integer is passed as $strip.
135     * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix,
136     * the prefix will be stripped. It is recommended to give prefixes with a trailing slash.
137     *
138     * By default this will extract all files found in the archive. You can restrict the output using the $include
139     * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If
140     * $include is set only files that match this expression will be extracted. Files that match the $exclude
141     * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against
142     * stripped filenames as described above.
143     *
144     * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams.
145     * Reopen the file with open() again if you want to do additional operations
146     *
147     * @param string $outdir the target directory for extracting
148     * @param int|string $strip either the number of path components or a fixed prefix to strip
149     * @param string $exclude a regular expression of files to exclude
150     * @param string $include a regular expression of files to include
151     * @throws ArchiveIOException
152     * @throws ArchiveCorruptedException
153     * @return FileInfo[]
154     */
155    public function extract($outdir, $strip = '', $exclude = '', $include = '')
156    {
157        if ($this->closed || !$this->file) {
158            throw new ArchiveIOException('Can not read from a closed archive');
159        }
160
161        $outdir = rtrim($outdir, '/');
162        @mkdir($outdir, 0777, true);
163        if (!is_dir($outdir)) {
164            throw new ArchiveIOException("Could not create directory '$outdir'");
165        }
166
167        $extracted = array();
168        while ($dat = $this->readbytes(512)) {
169            // read the file header
170            $header = $this->parseHeader($dat);
171            if (!is_array($header)) {
172                continue;
173            }
174            $fileinfo = $this->header2fileinfo($header);
175
176            // apply strip rules
177            $fileinfo->strip($strip);
178
179            // skip unwanted files
180            if (!strlen($fileinfo->getPath()) || !$fileinfo->matchExpression($include, $exclude)) {
181                $this->skipbytes(ceil($header['size'] / 512) * 512);
182                continue;
183            }
184
185            // create output directory
186            $output    = $outdir.'/'.$fileinfo->getPath();
187            $directory = ($fileinfo->getIsdir()) ? $output : dirname($output);
188            if (!file_exists($directory)) {
189                mkdir($directory, 0777, true);
190            }
191
192            // extract data
193            if (!$fileinfo->getIsdir()) {
194                $fp = @fopen($output, "wb");
195                if (!$fp) {
196                    throw new ArchiveIOException('Could not open file for writing: '.$output);
197                }
198
199                $size = floor($header['size'] / 512);
200                for ($i = 0; $i < $size; $i++) {
201                    fwrite($fp, $this->readbytes(512), 512);
202                }
203                if (($header['size'] % 512) != 0) {
204                    fwrite($fp, $this->readbytes(512), $header['size'] % 512);
205                }
206
207                fclose($fp);
208                @touch($output, $fileinfo->getMtime());
209                @chmod($output, $fileinfo->getMode());
210            } else {
211                $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories
212            }
213
214            if(is_callable($this->callback)) {
215                call_user_func($this->callback, $fileinfo);
216            }
217            $extracted[] = $fileinfo;
218        }
219
220        $this->close();
221        return $extracted;
222    }
223
224    /**
225     * Create a new TAR file
226     *
227     * If $file is empty, the tar file will be created in memory
228     *
229     * @param string $file
230     * @throws ArchiveIOException
231     * @throws ArchiveIllegalCompressionException
232     */
233    public function create($file = '')
234    {
235        $this->file   = $file;
236        $this->memory = '';
237        $this->fh     = 0;
238
239        if ($this->file) {
240            // determine compression
241            if ($this->comptype == Archive::COMPRESS_AUTO) {
242                $this->setCompression($this->complevel, $this->filetype($file));
243            }
244
245            if ($this->comptype === Archive::COMPRESS_GZIP) {
246                $this->fh = @gzopen($this->file, 'wb'.$this->complevel);
247            } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
248                $this->fh = @bzopen($this->file, 'w');
249            } else {
250                $this->fh = @fopen($this->file, 'wb');
251            }
252
253            if (!$this->fh) {
254                throw new ArchiveIOException('Could not open file for writing: '.$this->file);
255            }
256        }
257        $this->writeaccess = true;
258        $this->closed      = false;
259    }
260
261    /**
262     * Add a file to the current TAR archive using an existing file in the filesystem
263     *
264     * @param string $file path to the original file
265     * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data, empty to take from original
266     * @throws ArchiveCorruptedException when the file changes while reading it, the archive will be corrupt and should be deleted
267     * @throws ArchiveIOException there was trouble reading the given file, it was not added
268     * @throws FileInfoException trouble reading file info, it was not added
269     */
270    public function addFile($file, $fileinfo = '')
271    {
272        if (is_string($fileinfo)) {
273            $fileinfo = FileInfo::fromPath($file, $fileinfo);
274        }
275
276        if ($this->closed) {
277            throw new ArchiveIOException('Archive has been closed, files can no longer be added');
278        }
279
280        // create file header
281        $this->writeFileHeader($fileinfo);
282
283        // write data, but only if we have data to write.
284        // note: on Windows fopen() on a directory will fail, so we prevent
285        // errors on Windows by testing if we have data to write.
286        if (!$fileinfo->getIsdir() && $fileinfo->getSize() > 0) {
287            $read = 0;
288            $fp = @fopen($file, 'rb');
289            if (!$fp) {
290                throw new ArchiveIOException('Could not open file for reading: ' . $file);
291            }
292            while (!feof($fp)) {
293                $data = fread($fp, 512);
294                $read += strlen($data);
295                if ($data === false) {
296                    break;
297                }
298                if ($data === '') {
299                    break;
300                }
301                $packed = pack("a512", $data);
302                $this->writebytes($packed);
303            }
304            fclose($fp);
305
306            if ($read != $fileinfo->getSize()) {
307                $this->close();
308                throw new ArchiveCorruptedException("The size of $file changed while reading, archive corrupted. read $read expected ".$fileinfo->getSize());
309            }
310        }
311
312        if(is_callable($this->callback)) {
313            call_user_func($this->callback, $fileinfo);
314        }
315    }
316
317    /**
318     * Add a file to the current TAR archive using the given $data as content
319     *
320     * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data
321     * @param string          $data     binary content of the file to add
322     * @throws ArchiveIOException
323     */
324    public function addData($fileinfo, $data)
325    {
326        if (is_string($fileinfo)) {
327            $fileinfo = new FileInfo($fileinfo);
328        }
329
330        if ($this->closed) {
331            throw new ArchiveIOException('Archive has been closed, files can no longer be added');
332        }
333
334        $len = strlen($data);
335        $fileinfo->setSize($len);
336        $this->writeFileHeader($fileinfo);
337
338        for ($s = 0; $s < $len; $s += 512) {
339            $this->writebytes(pack("a512", substr($data, $s, 512)));
340        }
341
342        if (is_callable($this->callback)) {
343            call_user_func($this->callback, $fileinfo);
344        }
345    }
346
347    /**
348     * Add the closing footer to the archive if in write mode, close all file handles
349     *
350     * After a call to this function no more data can be added to the archive, for
351     * read access no reading is allowed anymore
352     *
353     * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which
354     * consists of two 512 blocks of zero bytes"
355     *
356     * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134
357     * @throws ArchiveIOException
358     */
359    public function close()
360    {
361        if ($this->closed) {
362            return;
363        } // we did this already
364
365        // write footer
366        if ($this->writeaccess) {
367            $this->writebytes(pack("a512", ""));
368            $this->writebytes(pack("a512", ""));
369        }
370
371        // close file handles
372        if ($this->file) {
373            if ($this->comptype === Archive::COMPRESS_GZIP) {
374                gzclose($this->fh);
375            } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
376                bzclose($this->fh);
377            } else {
378                fclose($this->fh);
379            }
380
381            $this->file = '';
382            $this->fh   = 0;
383        }
384
385        $this->writeaccess = false;
386        $this->closed      = true;
387    }
388
389    /**
390     * Returns the created in-memory archive data
391     *
392     * This implicitly calls close() on the Archive
393     * @throws ArchiveIOException
394     */
395    public function getArchive()
396    {
397        $this->close();
398
399        if ($this->comptype === Archive::COMPRESS_AUTO) {
400            $this->comptype = Archive::COMPRESS_NONE;
401        }
402
403        if ($this->comptype === Archive::COMPRESS_GZIP) {
404            return gzencode($this->memory, $this->complevel);
405        }
406        if ($this->comptype === Archive::COMPRESS_BZIP) {
407            return bzcompress($this->memory);
408        }
409        return $this->memory;
410    }
411
412    /**
413     * Save the created in-memory archive data
414     *
415     * Note: It more memory effective to specify the filename in the create() function and
416     * let the library work on the new file directly.
417     *
418     * @param string $file
419     * @throws ArchiveIOException
420     * @throws ArchiveIllegalCompressionException
421     */
422    public function save($file)
423    {
424        if ($this->comptype === Archive::COMPRESS_AUTO) {
425            $this->setCompression($this->complevel, $this->filetype($file));
426        }
427
428        if (!@file_put_contents($file, $this->getArchive())) {
429            throw new ArchiveIOException('Could not write to file: '.$file);
430        }
431    }
432
433    /**
434     * Read from the open file pointer
435     *
436     * @param int $length bytes to read
437     * @return string
438     */
439    protected function readbytes($length)
440    {
441        if ($this->comptype === Archive::COMPRESS_GZIP) {
442            return @gzread($this->fh, $length);
443        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
444            return @bzread($this->fh, $length);
445        } else {
446            return @fread($this->fh, $length);
447        }
448    }
449
450    /**
451     * Write to the open filepointer or memory
452     *
453     * @param string $data
454     * @throws ArchiveIOException
455     * @return int number of bytes written
456     */
457    protected function writebytes($data)
458    {
459        if (!$this->file) {
460            $this->memory .= $data;
461            $written = strlen($data);
462        } elseif ($this->comptype === Archive::COMPRESS_GZIP) {
463            $written = @gzwrite($this->fh, $data);
464        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
465            $written = @bzwrite($this->fh, $data);
466        } else {
467            $written = @fwrite($this->fh, $data);
468        }
469        if ($written === false) {
470            throw new ArchiveIOException('Failed to write to archive stream');
471        }
472        return $written;
473    }
474
475    /**
476     * Skip forward in the open file pointer
477     *
478     * This is basically a wrapper around seek() (and a workaround for bzip2)
479     *
480     * @param int $bytes seek to this position
481     */
482    protected function skipbytes($bytes)
483    {
484        if ($this->comptype === Archive::COMPRESS_GZIP) {
485            @gzseek($this->fh, $bytes, SEEK_CUR);
486        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
487            // there is no seek in bzip2, we simply read on
488            // bzread allows to read a max of 8kb at once
489            while($bytes) {
490                $toread = min(8192, $bytes);
491                @bzread($this->fh, $toread);
492                $bytes -= $toread;
493            }
494        } else {
495            @fseek($this->fh, $bytes, SEEK_CUR);
496        }
497    }
498
499    /**
500     * Write the given file meta data as header
501     *
502     * @param FileInfo $fileinfo
503     * @throws ArchiveIOException
504     */
505    protected function writeFileHeader(FileInfo $fileinfo)
506    {
507        $this->writeRawFileHeader(
508            $fileinfo->getPath(),
509            $fileinfo->getUid(),
510            $fileinfo->getGid(),
511            $fileinfo->getMode(),
512            $fileinfo->getSize(),
513            $fileinfo->getMtime(),
514            $fileinfo->getIsdir() ? '5' : '0'
515        );
516    }
517
518    /**
519     * Write a file header to the stream
520     *
521     * @param string $name
522     * @param int $uid
523     * @param int $gid
524     * @param int $perm
525     * @param int $size
526     * @param int $mtime
527     * @param string $typeflag Set to '5' for directories
528     * @throws ArchiveIOException
529     */
530    protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '')
531    {
532        // handle filename length restrictions
533        $prefix  = '';
534        $namelen = strlen($name);
535        if ($namelen > 100) {
536            $file = basename($name);
537            $dir  = dirname($name);
538            if (strlen($file) > 100 || strlen($dir) > 155) {
539                // we're still too large, let's use GNU longlink
540                $this->writeRawFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L');
541                for ($s = 0; $s < $namelen; $s += 512) {
542                    $this->writebytes(pack("a512", substr($name, $s, 512)));
543                }
544                $name = substr($name, 0, 100); // cut off name
545            } else {
546                // we're fine when splitting, use POSIX ustar
547                $prefix = $dir;
548                $name   = $file;
549            }
550        }
551
552        // values are needed in octal
553        $uid   = sprintf("%6s ", decoct($uid));
554        $gid   = sprintf("%6s ", decoct($gid));
555        $perm  = sprintf("%6s ", decoct($perm));
556        $size  = sprintf("%11s ", decoct($size));
557        $mtime = sprintf("%11s", decoct($mtime));
558
559        $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime);
560        $data_last  = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, "");
561
562        for ($i = 0, $chks = 0; $i < 148; $i++) {
563            $chks += ord($data_first[$i]);
564        }
565
566        for ($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++) {
567            $chks += ord($data_last[$j]);
568        }
569
570        $this->writebytes($data_first);
571
572        $chks = pack("a8", sprintf("%6s ", decoct($chks)));
573        $this->writebytes($chks.$data_last);
574    }
575
576    /**
577     * Decode the given tar file header
578     *
579     * @param string $block a 512 byte block containing the header data
580     * @return array|false returns false when this was a null block
581     * @throws ArchiveCorruptedException
582     */
583    protected function parseHeader($block)
584    {
585        if (!$block || strlen($block) != 512) {
586            throw new ArchiveCorruptedException('Unexpected length of header');
587        }
588
589        // null byte blocks are ignored
590        if(trim($block) === '') return false;
591
592        for ($i = 0, $chks = 0; $i < 148; $i++) {
593            $chks += ord($block[$i]);
594        }
595
596        for ($i = 156, $chks += 256; $i < 512; $i++) {
597            $chks += ord($block[$i]);
598        }
599
600        $header = @unpack(
601            "a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix",
602            $block
603        );
604        if (!$header) {
605            throw new ArchiveCorruptedException('Failed to parse header');
606        }
607
608        $return['checksum'] = OctDec(trim($header['checksum']));
609        if ($return['checksum'] != $chks) {
610            throw new ArchiveCorruptedException('Header does not match its checksum');
611        }
612
613        $return['filename'] = trim($header['filename']);
614        $return['perm']     = OctDec(trim($header['perm']));
615        $return['uid']      = OctDec(trim($header['uid']));
616        $return['gid']      = OctDec(trim($header['gid']));
617        $return['size']     = OctDec(trim($header['size']));
618        $return['mtime']    = OctDec(trim($header['mtime']));
619        $return['typeflag'] = $header['typeflag'];
620        $return['link']     = trim($header['link']);
621        $return['uname']    = trim($header['uname']);
622        $return['gname']    = trim($header['gname']);
623
624        // Handle ustar Posix compliant path prefixes
625        if (trim($header['prefix'])) {
626            $return['filename'] = trim($header['prefix']).'/'.$return['filename'];
627        }
628
629        // Handle Long-Link entries from GNU Tar
630        if ($return['typeflag'] == 'L') {
631            // following data block(s) is the filename
632            $filename = trim($this->readbytes(ceil($return['size'] / 512) * 512));
633            // next block is the real header
634            $block  = $this->readbytes(512);
635            $return = $this->parseHeader($block);
636            // overwrite the filename
637            $return['filename'] = $filename;
638        }
639
640        return $return;
641    }
642
643    /**
644     * Creates a FileInfo object from the given parsed header
645     *
646     * @param $header
647     * @return FileInfo
648     */
649    protected function header2fileinfo($header)
650    {
651        $fileinfo = new FileInfo();
652        $fileinfo->setPath($header['filename']);
653        $fileinfo->setMode($header['perm']);
654        $fileinfo->setUid($header['uid']);
655        $fileinfo->setGid($header['gid']);
656        $fileinfo->setSize($header['size']);
657        $fileinfo->setMtime($header['mtime']);
658        $fileinfo->setOwner($header['uname']);
659        $fileinfo->setGroup($header['gname']);
660        $fileinfo->setIsdir((bool) $header['typeflag']);
661
662        return $fileinfo;
663    }
664
665    /**
666     * Checks if the given compression type is available and throws an exception if not
667     *
668     * @param $comptype
669     * @throws ArchiveIllegalCompressionException
670     */
671    protected function compressioncheck($comptype)
672    {
673        if ($comptype === Archive::COMPRESS_GZIP && !function_exists('gzopen')) {
674            throw new ArchiveIllegalCompressionException('No gzip support available');
675        }
676
677        if ($comptype === Archive::COMPRESS_BZIP && !function_exists('bzopen')) {
678            throw new ArchiveIllegalCompressionException('No bzip2 support available');
679        }
680    }
681
682    /**
683     * Guesses the wanted compression from the given file
684     *
685     * Uses magic bytes for existing files, the file extension otherwise
686     *
687     * You don't need to call this yourself. It's used when you pass Archive::COMPRESS_AUTO somewhere
688     *
689     * @param string $file
690     * @return int
691     */
692    public function filetype($file)
693    {
694        // for existing files, try to read the magic bytes
695        if(file_exists($file) && is_readable($file) && filesize($file) > 5) {
696            $fh = @fopen($file, 'rb');
697            if(!$fh) return false;
698            $magic = fread($fh, 5);
699            fclose($fh);
700
701            if(strpos($magic, "\x42\x5a") === 0) return Archive::COMPRESS_BZIP;
702            if(strpos($magic, "\x1f\x8b") === 0) return Archive::COMPRESS_GZIP;
703        }
704
705        // otherwise rely on file name
706        $file = strtolower($file);
707        if (substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') {
708            return Archive::COMPRESS_GZIP;
709        } elseif (substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') {
710            return Archive::COMPRESS_BZIP;
711        }
712
713        return Archive::COMPRESS_NONE;
714    }
715
716}
717