1<?php
2
3namespace splitbrain\PHPArchive;
4
5/**
6 * Class Tar
7 *
8 * Creates or extracts Tar archives. Supports gz and bzip compression
9 *
10 * Long pathnames (>100 chars) are supported in POSIX ustar and GNU longlink formats.
11 *
12 * @author  Andreas Gohr <andi@splitbrain.org>
13 * @package splitbrain\PHPArchive
14 * @license MIT
15 */
16class Tar extends Archive
17{
18
19    protected $file = '';
20    protected $comptype = Archive::COMPRESS_AUTO;
21    protected $complevel = 9;
22    protected $fh;
23    protected $memory = '';
24    protected $closed = true;
25    protected $writeaccess = false;
26
27    /**
28     * Sets the compression to use
29     *
30     * @param int $level Compression level (0 to 9)
31     * @param int $type Type of compression to use (use COMPRESS_* constants)
32     * @throws ArchiveIllegalCompressionException
33     */
34    public function setCompression($level = 9, $type = Archive::COMPRESS_AUTO)
35    {
36        $this->compressioncheck($type);
37        if ($level < -1 || $level > 9) {
38            throw new ArchiveIllegalCompressionException('Compression level should be between -1 and 9');
39        }
40        $this->comptype  = $type;
41        $this->complevel = $level;
42        if($level == 0) $this->comptype = Archive::COMPRESS_NONE;
43        if($type == Archive::COMPRESS_NONE) $this->complevel = 0;
44    }
45
46    /**
47     * Open an existing TAR file for reading
48     *
49     * @param string $file
50     * @throws ArchiveIOException
51     * @throws ArchiveIllegalCompressionException
52     */
53    public function open($file)
54    {
55        $this->file = $file;
56
57        // update compression to mach file
58        if ($this->comptype == Tar::COMPRESS_AUTO) {
59            $this->setCompression($this->complevel, $this->filetype($file));
60        }
61
62        // open file handles
63        if ($this->comptype === Archive::COMPRESS_GZIP) {
64            $this->fh = @gzopen($this->file, 'rb');
65        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
66            $this->fh = @bzopen($this->file, 'r');
67        } else {
68            $this->fh = @fopen($this->file, 'rb');
69        }
70
71        if (!$this->fh) {
72            throw new ArchiveIOException('Could not open file for reading: '.$this->file);
73        }
74        $this->closed = false;
75    }
76
77    /**
78     * Read the contents of a TAR archive
79     *
80     * This function lists the files stored in the archive
81     *
82     * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams.
83     * Reopen the file with open() again if you want to do additional operations
84     *
85     * @throws ArchiveIOException
86     * @throws ArchiveCorruptedException
87     * @returns FileInfo[]
88     */
89    public function contents()
90    {
91        if ($this->closed || !$this->file) {
92            throw new ArchiveIOException('Can not read from a closed archive');
93        }
94
95        $result = array();
96        while ($read = $this->readbytes(512)) {
97            $header = $this->parseHeader($read);
98            if (!is_array($header)) {
99                continue;
100            }
101
102            $this->skipbytes(ceil($header['size'] / 512) * 512);
103            $result[] = $this->header2fileinfo($header);
104        }
105
106        $this->close();
107        return $result;
108    }
109
110    /**
111     * Extract an existing TAR archive
112     *
113     * The $strip parameter allows you to strip a certain number of path components from the filenames
114     * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when
115     * an integer is passed as $strip.
116     * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix,
117     * the prefix will be stripped. It is recommended to give prefixes with a trailing slash.
118     *
119     * By default this will extract all files found in the archive. You can restrict the output using the $include
120     * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If
121     * $include is set only files that match this expression will be extracted. Files that match the $exclude
122     * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against
123     * stripped filenames as described above.
124     *
125     * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams.
126     * Reopen the file with open() again if you want to do additional operations
127     *
128     * @param string $outdir the target directory for extracting
129     * @param int|string $strip either the number of path components or a fixed prefix to strip
130     * @param string $exclude a regular expression of files to exclude
131     * @param string $include a regular expression of files to include
132     * @throws ArchiveIOException
133     * @throws ArchiveCorruptedException
134     * @return FileInfo[]
135     */
136    public function extract($outdir, $strip = '', $exclude = '', $include = '')
137    {
138        if ($this->closed || !$this->file) {
139            throw new ArchiveIOException('Can not read from a closed archive');
140        }
141
142        $outdir = rtrim($outdir, '/');
143        @mkdir($outdir, 0777, true);
144        if (!is_dir($outdir)) {
145            throw new ArchiveIOException("Could not create directory '$outdir'");
146        }
147
148        $extracted = array();
149        while ($dat = $this->readbytes(512)) {
150            // read the file header
151            $header = $this->parseHeader($dat);
152            if (!is_array($header)) {
153                continue;
154            }
155            $fileinfo = $this->header2fileinfo($header);
156
157            // apply strip rules
158            $fileinfo->strip($strip);
159
160            // skip unwanted files
161            if (!strlen($fileinfo->getPath()) || !$fileinfo->matchExpression($include, $exclude)) {
162                $this->skipbytes(ceil($header['size'] / 512) * 512);
163                continue;
164            }
165
166            // create output directory
167            $output    = $outdir.'/'.$fileinfo->getPath();
168            $directory = ($fileinfo->getIsdir()) ? $output : dirname($output);
169            @mkdir($directory, 0777, true);
170
171            // extract data
172            if (!$fileinfo->getIsdir()) {
173                $fp = @fopen($output, "wb");
174                if (!$fp) {
175                    throw new ArchiveIOException('Could not open file for writing: '.$output);
176                }
177
178                $size = floor($header['size'] / 512);
179                for ($i = 0; $i < $size; $i++) {
180                    fwrite($fp, $this->readbytes(512), 512);
181                }
182                if (($header['size'] % 512) != 0) {
183                    fwrite($fp, $this->readbytes(512), $header['size'] % 512);
184                }
185
186                fclose($fp);
187                @touch($output, $fileinfo->getMtime());
188                @chmod($output, $fileinfo->getMode());
189            } else {
190                $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories
191            }
192
193            if(is_callable($this->callback)) {
194                call_user_func($this->callback, $fileinfo);
195            }
196            $extracted[] = $fileinfo;
197        }
198
199        $this->close();
200        return $extracted;
201    }
202
203    /**
204     * Create a new TAR file
205     *
206     * If $file is empty, the tar file will be created in memory
207     *
208     * @param string $file
209     * @throws ArchiveIOException
210     * @throws ArchiveIllegalCompressionException
211     */
212    public function create($file = '')
213    {
214        $this->file   = $file;
215        $this->memory = '';
216        $this->fh     = 0;
217
218        if ($this->file) {
219            // determine compression
220            if ($this->comptype == Archive::COMPRESS_AUTO) {
221                $this->setCompression($this->complevel, $this->filetype($file));
222            }
223
224            if ($this->comptype === Archive::COMPRESS_GZIP) {
225                $this->fh = @gzopen($this->file, 'wb'.$this->complevel);
226            } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
227                $this->fh = @bzopen($this->file, 'w');
228            } else {
229                $this->fh = @fopen($this->file, 'wb');
230            }
231
232            if (!$this->fh) {
233                throw new ArchiveIOException('Could not open file for writing: '.$this->file);
234            }
235        }
236        $this->writeaccess = true;
237        $this->closed      = false;
238    }
239
240    /**
241     * Add a file to the current TAR archive using an existing file in the filesystem
242     *
243     * @param string $file path to the original file
244     * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data, empty to take from original
245     * @throws ArchiveCorruptedException when the file changes while reading it, the archive will be corrupt and should be deleted
246     * @throws ArchiveIOException there was trouble reading the given file, it was not added
247     * @throws FileInfoException trouble reading file info, it was not added
248     */
249    public function addFile($file, $fileinfo = '')
250    {
251        if (is_string($fileinfo)) {
252            $fileinfo = FileInfo::fromPath($file, $fileinfo);
253        }
254
255        if ($this->closed) {
256            throw new ArchiveIOException('Archive has been closed, files can no longer be added');
257        }
258
259        // create file header
260        $this->writeFileHeader($fileinfo);
261
262        // write data, but only if we have data to write.
263        // note: on Windows fopen() on a directory will fail, so we prevent
264        // errors on Windows by testing if we have data to write.
265        if (!$fileinfo->getIsdir() && $fileinfo->getSize() > 0) {
266            $read = 0;
267            $fp = @fopen($file, 'rb');
268            if (!$fp) {
269                throw new ArchiveIOException('Could not open file for reading: ' . $file);
270            }
271            while (!feof($fp)) {
272                $data = fread($fp, 512);
273                $read += strlen($data);
274                if ($data === false) {
275                    break;
276                }
277                if ($data === '') {
278                    break;
279                }
280                $packed = pack("a512", $data);
281                $this->writebytes($packed);
282            }
283            fclose($fp);
284
285            if ($read != $fileinfo->getSize()) {
286                $this->close();
287                throw new ArchiveCorruptedException("The size of $file changed while reading, archive corrupted. read $read expected ".$fileinfo->getSize());
288            }
289        }
290
291        if(is_callable($this->callback)) {
292            call_user_func($this->callback, $fileinfo);
293        }
294    }
295
296    /**
297     * Add a file to the current TAR archive using the given $data as content
298     *
299     * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data
300     * @param string          $data     binary content of the file to add
301     * @throws ArchiveIOException
302     */
303    public function addData($fileinfo, $data)
304    {
305        if (is_string($fileinfo)) {
306            $fileinfo = new FileInfo($fileinfo);
307        }
308
309        if ($this->closed) {
310            throw new ArchiveIOException('Archive has been closed, files can no longer be added');
311        }
312
313        $len = strlen($data);
314        $fileinfo->setSize($len);
315        $this->writeFileHeader($fileinfo);
316
317        for ($s = 0; $s < $len; $s += 512) {
318            $this->writebytes(pack("a512", substr($data, $s, 512)));
319        }
320
321        if (is_callable($this->callback)) {
322            call_user_func($this->callback, $fileinfo);
323        }
324    }
325
326    /**
327     * Add the closing footer to the archive if in write mode, close all file handles
328     *
329     * After a call to this function no more data can be added to the archive, for
330     * read access no reading is allowed anymore
331     *
332     * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which
333     * consists of two 512 blocks of zero bytes"
334     *
335     * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134
336     * @throws ArchiveIOException
337     */
338    public function close()
339    {
340        if ($this->closed) {
341            return;
342        } // we did this already
343
344        // write footer
345        if ($this->writeaccess) {
346            $this->writebytes(pack("a512", ""));
347            $this->writebytes(pack("a512", ""));
348        }
349
350        // close file handles
351        if ($this->file) {
352            if ($this->comptype === Archive::COMPRESS_GZIP) {
353                gzclose($this->fh);
354            } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
355                bzclose($this->fh);
356            } else {
357                fclose($this->fh);
358            }
359
360            $this->file = '';
361            $this->fh   = 0;
362        }
363
364        $this->writeaccess = false;
365        $this->closed      = true;
366    }
367
368    /**
369     * Returns the created in-memory archive data
370     *
371     * This implicitly calls close() on the Archive
372     * @throws ArchiveIOException
373     */
374    public function getArchive()
375    {
376        $this->close();
377
378        if ($this->comptype === Archive::COMPRESS_AUTO) {
379            $this->comptype = Archive::COMPRESS_NONE;
380        }
381
382        if ($this->comptype === Archive::COMPRESS_GZIP) {
383            return gzencode($this->memory, $this->complevel);
384        }
385        if ($this->comptype === Archive::COMPRESS_BZIP) {
386            return bzcompress($this->memory);
387        }
388        return $this->memory;
389    }
390
391    /**
392     * Save the created in-memory archive data
393     *
394     * Note: It more memory effective to specify the filename in the create() function and
395     * let the library work on the new file directly.
396     *
397     * @param string $file
398     * @throws ArchiveIOException
399     * @throws ArchiveIllegalCompressionException
400     */
401    public function save($file)
402    {
403        if ($this->comptype === Archive::COMPRESS_AUTO) {
404            $this->setCompression($this->complevel, $this->filetype($file));
405        }
406
407        if (!@file_put_contents($file, $this->getArchive())) {
408            throw new ArchiveIOException('Could not write to file: '.$file);
409        }
410    }
411
412    /**
413     * Read from the open file pointer
414     *
415     * @param int $length bytes to read
416     * @return string
417     */
418    protected function readbytes($length)
419    {
420        if ($this->comptype === Archive::COMPRESS_GZIP) {
421            return @gzread($this->fh, $length);
422        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
423            return @bzread($this->fh, $length);
424        } else {
425            return @fread($this->fh, $length);
426        }
427    }
428
429    /**
430     * Write to the open filepointer or memory
431     *
432     * @param string $data
433     * @throws ArchiveIOException
434     * @return int number of bytes written
435     */
436    protected function writebytes($data)
437    {
438        if (!$this->file) {
439            $this->memory .= $data;
440            $written = strlen($data);
441        } elseif ($this->comptype === Archive::COMPRESS_GZIP) {
442            $written = @gzwrite($this->fh, $data);
443        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
444            $written = @bzwrite($this->fh, $data);
445        } else {
446            $written = @fwrite($this->fh, $data);
447        }
448        if ($written === false) {
449            throw new ArchiveIOException('Failed to write to archive stream');
450        }
451        return $written;
452    }
453
454    /**
455     * Skip forward in the open file pointer
456     *
457     * This is basically a wrapper around seek() (and a workaround for bzip2)
458     *
459     * @param int $bytes seek to this position
460     */
461    protected function skipbytes($bytes)
462    {
463        if ($this->comptype === Archive::COMPRESS_GZIP) {
464            @gzseek($this->fh, $bytes, SEEK_CUR);
465        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
466            // there is no seek in bzip2, we simply read on
467            // bzread allows to read a max of 8kb at once
468            while($bytes) {
469                $toread = min(8192, $bytes);
470                @bzread($this->fh, $toread);
471                $bytes -= $toread;
472            }
473        } else {
474            @fseek($this->fh, $bytes, SEEK_CUR);
475        }
476    }
477
478    /**
479     * Write the given file meta data as header
480     *
481     * @param FileInfo $fileinfo
482     * @throws ArchiveIOException
483     */
484    protected function writeFileHeader(FileInfo $fileinfo)
485    {
486        $this->writeRawFileHeader(
487            $fileinfo->getPath(),
488            $fileinfo->getUid(),
489            $fileinfo->getGid(),
490            $fileinfo->getMode(),
491            $fileinfo->getSize(),
492            $fileinfo->getMtime(),
493            $fileinfo->getIsdir() ? '5' : '0'
494        );
495    }
496
497    /**
498     * Write a file header to the stream
499     *
500     * @param string $name
501     * @param int $uid
502     * @param int $gid
503     * @param int $perm
504     * @param int $size
505     * @param int $mtime
506     * @param string $typeflag Set to '5' for directories
507     * @throws ArchiveIOException
508     */
509    protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '')
510    {
511        // handle filename length restrictions
512        $prefix  = '';
513        $namelen = strlen($name);
514        if ($namelen > 100) {
515            $file = basename($name);
516            $dir  = dirname($name);
517            if (strlen($file) > 100 || strlen($dir) > 155) {
518                // we're still too large, let's use GNU longlink
519                $this->writeRawFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L');
520                for ($s = 0; $s < $namelen; $s += 512) {
521                    $this->writebytes(pack("a512", substr($name, $s, 512)));
522                }
523                $name = substr($name, 0, 100); // cut off name
524            } else {
525                // we're fine when splitting, use POSIX ustar
526                $prefix = $dir;
527                $name   = $file;
528            }
529        }
530
531        // values are needed in octal
532        $uid   = sprintf("%6s ", decoct($uid));
533        $gid   = sprintf("%6s ", decoct($gid));
534        $perm  = sprintf("%6s ", decoct($perm));
535        $size  = sprintf("%11s ", decoct($size));
536        $mtime = sprintf("%11s", decoct($mtime));
537
538        $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime);
539        $data_last  = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, "");
540
541        for ($i = 0, $chks = 0; $i < 148; $i++) {
542            $chks += ord($data_first[$i]);
543        }
544
545        for ($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++) {
546            $chks += ord($data_last[$j]);
547        }
548
549        $this->writebytes($data_first);
550
551        $chks = pack("a8", sprintf("%6s ", decoct($chks)));
552        $this->writebytes($chks.$data_last);
553    }
554
555    /**
556     * Decode the given tar file header
557     *
558     * @param string $block a 512 byte block containing the header data
559     * @return array|false returns false when this was a null block
560     * @throws ArchiveCorruptedException
561     */
562    protected function parseHeader($block)
563    {
564        if (!$block || strlen($block) != 512) {
565            throw new ArchiveCorruptedException('Unexpected length of header');
566        }
567
568        // null byte blocks are ignored
569        if(trim($block) === '') return false;
570
571        for ($i = 0, $chks = 0; $i < 148; $i++) {
572            $chks += ord($block[$i]);
573        }
574
575        for ($i = 156, $chks += 256; $i < 512; $i++) {
576            $chks += ord($block[$i]);
577        }
578
579        $header = @unpack(
580            "a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix",
581            $block
582        );
583        if (!$header) {
584            throw new ArchiveCorruptedException('Failed to parse header');
585        }
586
587        $return['checksum'] = OctDec(trim($header['checksum']));
588        if ($return['checksum'] != $chks) {
589            throw new ArchiveCorruptedException('Header does not match its checksum');
590        }
591
592        $return['filename'] = trim($header['filename']);
593        $return['perm']     = OctDec(trim($header['perm']));
594        $return['uid']      = OctDec(trim($header['uid']));
595        $return['gid']      = OctDec(trim($header['gid']));
596        $return['size']     = OctDec(trim($header['size']));
597        $return['mtime']    = OctDec(trim($header['mtime']));
598        $return['typeflag'] = $header['typeflag'];
599        $return['link']     = trim($header['link']);
600        $return['uname']    = trim($header['uname']);
601        $return['gname']    = trim($header['gname']);
602
603        // Handle ustar Posix compliant path prefixes
604        if (trim($header['prefix'])) {
605            $return['filename'] = trim($header['prefix']).'/'.$return['filename'];
606        }
607
608        // Handle Long-Link entries from GNU Tar
609        if ($return['typeflag'] == 'L') {
610            // following data block(s) is the filename
611            $filename = trim($this->readbytes(ceil($return['size'] / 512) * 512));
612            // next block is the real header
613            $block  = $this->readbytes(512);
614            $return = $this->parseHeader($block);
615            // overwrite the filename
616            $return['filename'] = $filename;
617        }
618
619        return $return;
620    }
621
622    /**
623     * Creates a FileInfo object from the given parsed header
624     *
625     * @param $header
626     * @return FileInfo
627     */
628    protected function header2fileinfo($header)
629    {
630        $fileinfo = new FileInfo();
631        $fileinfo->setPath($header['filename']);
632        $fileinfo->setMode($header['perm']);
633        $fileinfo->setUid($header['uid']);
634        $fileinfo->setGid($header['gid']);
635        $fileinfo->setSize($header['size']);
636        $fileinfo->setMtime($header['mtime']);
637        $fileinfo->setOwner($header['uname']);
638        $fileinfo->setGroup($header['gname']);
639        $fileinfo->setIsdir((bool) $header['typeflag']);
640
641        return $fileinfo;
642    }
643
644    /**
645     * Checks if the given compression type is available and throws an exception if not
646     *
647     * @param $comptype
648     * @throws ArchiveIllegalCompressionException
649     */
650    protected function compressioncheck($comptype)
651    {
652        if ($comptype === Archive::COMPRESS_GZIP && !function_exists('gzopen')) {
653            throw new ArchiveIllegalCompressionException('No gzip support available');
654        }
655
656        if ($comptype === Archive::COMPRESS_BZIP && !function_exists('bzopen')) {
657            throw new ArchiveIllegalCompressionException('No bzip2 support available');
658        }
659    }
660
661    /**
662     * Guesses the wanted compression from the given file
663     *
664     * Uses magic bytes for existing files, the file extension otherwise
665     *
666     * You don't need to call this yourself. It's used when you pass Archive::COMPRESS_AUTO somewhere
667     *
668     * @param string $file
669     * @return int
670     */
671    public function filetype($file)
672    {
673        // for existing files, try to read the magic bytes
674        if(file_exists($file) && is_readable($file) && filesize($file) > 5) {
675            $fh = @fopen($file, 'rb');
676            if(!$fh) return false;
677            $magic = fread($fh, 5);
678            fclose($fh);
679
680            if(strpos($magic, "\x42\x5a") === 0) return Archive::COMPRESS_BZIP;
681            if(strpos($magic, "\x1f\x8b") === 0) return Archive::COMPRESS_GZIP;
682        }
683
684        // otherwise rely on file name
685        $file = strtolower($file);
686        if (substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') {
687            return Archive::COMPRESS_GZIP;
688        } elseif (substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') {
689            return Archive::COMPRESS_BZIP;
690        }
691
692        return Archive::COMPRESS_NONE;
693    }
694
695}
696