xref: /dokuwiki/vendor/splitbrain/php-archive/src/Tar.php (revision 70519db93fdd6d635397c1354a33f2b1071e2d13)
1<?php
2
3namespace splitbrain\PHPArchive;
4
5/**
6 * Class Tar
7 *
8 * Creates or extracts Tar archives. Supports gz and bzip compression
9 *
10 * Long pathnames (>100 chars) are supported in POSIX ustar and GNU longlink formats.
11 *
12 * @author  Andreas Gohr <andi@splitbrain.org>
13 * @package splitbrain\PHPArchive
14 * @license MIT
15 */
16class Tar extends Archive
17{
18
19    protected $file = '';
20    protected $comptype = Archive::COMPRESS_AUTO;
21    protected $complevel = 9;
22    protected $fh;
23    protected $memory = '';
24    protected $closed = true;
25    protected $writeaccess = false;
26
27    /**
28     * Sets the compression to use
29     *
30     * @param int $level Compression level (0 to 9)
31     * @param int $type  Type of compression to use (use COMPRESS_* constants)
32     * @return mixed
33     */
34    public function setCompression($level = 9, $type = Archive::COMPRESS_AUTO)
35    {
36        $this->compressioncheck($type);
37        $this->comptype  = $type;
38        $this->complevel = $level;
39    }
40
41    /**
42     * Open an existing TAR file for reading
43     *
44     * @param string $file
45     * @throws ArchiveIOException
46     */
47    public function open($file)
48    {
49        $this->file = $file;
50
51        // update compression to mach file
52        if ($this->comptype == Tar::COMPRESS_AUTO) {
53            $this->setCompression($this->complevel, $this->filetype($file));
54        }
55
56        // open file handles
57        if ($this->comptype === Archive::COMPRESS_GZIP) {
58            $this->fh = @gzopen($this->file, 'rb');
59        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
60            $this->fh = @bzopen($this->file, 'r');
61        } else {
62            $this->fh = @fopen($this->file, 'rb');
63        }
64
65        if (!$this->fh) {
66            throw new ArchiveIOException('Could not open file for reading: '.$this->file);
67        }
68        $this->closed = false;
69    }
70
71    /**
72     * Read the contents of a TAR archive
73     *
74     * This function lists the files stored in the archive
75     *
76     * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams.
77     * Reopen the file with open() again if you want to do additional operations
78     *
79     * @throws ArchiveIOException
80     * @returns FileInfo[]
81     */
82    public function contents()
83    {
84        if ($this->closed || !$this->file) {
85            throw new ArchiveIOException('Can not read from a closed archive');
86        }
87
88        $result = array();
89        while ($read = $this->readbytes(512)) {
90            $header = $this->parseHeader($read);
91            if (!is_array($header)) {
92                continue;
93            }
94
95            $this->skipbytes(ceil($header['size'] / 512) * 512);
96            $result[] = $this->header2fileinfo($header);
97        }
98
99        $this->close();
100        return $result;
101    }
102
103    /**
104     * Extract an existing TAR archive
105     *
106     * The $strip parameter allows you to strip a certain number of path components from the filenames
107     * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when
108     * an integer is passed as $strip.
109     * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix,
110     * the prefix will be stripped. It is recommended to give prefixes with a trailing slash.
111     *
112     * By default this will extract all files found in the archive. You can restrict the output using the $include
113     * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If
114     * $include is set only files that match this expression will be extracted. Files that match the $exclude
115     * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against
116     * stripped filenames as described above.
117     *
118     * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams.
119     * Reopen the file with open() again if you want to do additional operations
120     *
121     * @param string     $outdir  the target directory for extracting
122     * @param int|string $strip   either the number of path components or a fixed prefix to strip
123     * @param string     $exclude a regular expression of files to exclude
124     * @param string     $include a regular expression of files to include
125     * @throws ArchiveIOException
126     * @return FileInfo[]
127     */
128    public function extract($outdir, $strip = '', $exclude = '', $include = '')
129    {
130        if ($this->closed || !$this->file) {
131            throw new ArchiveIOException('Can not read from a closed archive');
132        }
133
134        $outdir = rtrim($outdir, '/');
135        @mkdir($outdir, 0777, true);
136        if (!is_dir($outdir)) {
137            throw new ArchiveIOException("Could not create directory '$outdir'");
138        }
139
140        $extracted = array();
141        while ($dat = $this->readbytes(512)) {
142            // read the file header
143            $header = $this->parseHeader($dat);
144            if (!is_array($header)) {
145                continue;
146            }
147            $fileinfo = $this->header2fileinfo($header);
148
149            // apply strip rules
150            $fileinfo->strip($strip);
151
152            // skip unwanted files
153            if (!strlen($fileinfo->getPath()) || !$fileinfo->match($include, $exclude)) {
154                $this->skipbytes(ceil($header['size'] / 512) * 512);
155                continue;
156            }
157
158            // create output directory
159            $output    = $outdir.'/'.$fileinfo->getPath();
160            $directory = ($fileinfo->getIsdir()) ? $output : dirname($output);
161            @mkdir($directory, 0777, true);
162
163            // extract data
164            if (!$fileinfo->getIsdir()) {
165                $fp = fopen($output, "wb");
166                if (!$fp) {
167                    throw new ArchiveIOException('Could not open file for writing: '.$output);
168                }
169
170                $size = floor($header['size'] / 512);
171                for ($i = 0; $i < $size; $i++) {
172                    fwrite($fp, $this->readbytes(512), 512);
173                }
174                if (($header['size'] % 512) != 0) {
175                    fwrite($fp, $this->readbytes(512), $header['size'] % 512);
176                }
177
178                fclose($fp);
179                touch($output, $fileinfo->getMtime());
180                chmod($output, $fileinfo->getMode());
181            } else {
182                $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories
183            }
184
185            $extracted[] = $fileinfo;
186        }
187
188        $this->close();
189        return $extracted;
190    }
191
192    /**
193     * Create a new TAR file
194     *
195     * If $file is empty, the tar file will be created in memory
196     *
197     * @param string $file
198     * @throws ArchiveIOException
199     */
200    public function create($file = '')
201    {
202        $this->file   = $file;
203        $this->memory = '';
204        $this->fh     = 0;
205
206        if ($this->file) {
207            // determine compression
208            if ($this->comptype == Archive::COMPRESS_AUTO) {
209                $this->setCompression($this->complevel, $this->filetype($file));
210            }
211
212            if ($this->comptype === Archive::COMPRESS_GZIP) {
213                $this->fh = @gzopen($this->file, 'wb'.$this->complevel);
214            } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
215                $this->fh = @bzopen($this->file, 'w');
216            } else {
217                $this->fh = @fopen($this->file, 'wb');
218            }
219
220            if (!$this->fh) {
221                throw new ArchiveIOException('Could not open file for writing: '.$this->file);
222            }
223        }
224        $this->writeaccess = true;
225        $this->closed      = false;
226    }
227
228    /**
229     * Add a file to the current TAR archive using an existing file in the filesystem
230     *
231     * @param string          $file     path to the original file
232     * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data, empty to take from original
233     * @throws ArchiveIOException
234     */
235    public function addFile($file, $fileinfo = '')
236    {
237        if (is_string($fileinfo)) {
238            $fileinfo = FileInfo::fromPath($file, $fileinfo);
239        }
240
241        if ($this->closed) {
242            throw new ArchiveIOException('Archive has been closed, files can no longer be added');
243        }
244
245        $fp = fopen($file, 'rb');
246        if (!$fp) {
247            throw new ArchiveIOException('Could not open file for reading: '.$file);
248        }
249
250        // create file header
251        $this->writeFileHeader($fileinfo);
252
253        // write data
254        while (!feof($fp)) {
255            $data = fread($fp, 512);
256            if ($data === false) {
257                break;
258            }
259            if ($data === '') {
260                break;
261            }
262            $packed = pack("a512", $data);
263            $this->writebytes($packed);
264        }
265        fclose($fp);
266    }
267
268    /**
269     * Add a file to the current TAR archive using the given $data as content
270     *
271     * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data
272     * @param string          $data     binary content of the file to add
273     * @throws ArchiveIOException
274     */
275    public function addData($fileinfo, $data)
276    {
277        if (is_string($fileinfo)) {
278            $fileinfo = new FileInfo($fileinfo);
279        }
280
281        if ($this->closed) {
282            throw new ArchiveIOException('Archive has been closed, files can no longer be added');
283        }
284
285        $len = strlen($data);
286        $fileinfo->setSize($len);
287        $this->writeFileHeader($fileinfo);
288
289        for ($s = 0; $s < $len; $s += 512) {
290            $this->writebytes(pack("a512", substr($data, $s, 512)));
291        }
292    }
293
294    /**
295     * Add the closing footer to the archive if in write mode, close all file handles
296     *
297     * After a call to this function no more data can be added to the archive, for
298     * read access no reading is allowed anymore
299     *
300     * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which
301     * consists of two 512 blocks of zero bytes"
302     *
303     * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134
304     */
305    public function close()
306    {
307        if ($this->closed) {
308            return;
309        } // we did this already
310
311        // write footer
312        if ($this->writeaccess) {
313            $this->writebytes(pack("a512", ""));
314            $this->writebytes(pack("a512", ""));
315        }
316
317        // close file handles
318        if ($this->file) {
319            if ($this->comptype === Archive::COMPRESS_GZIP) {
320                gzclose($this->fh);
321            } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
322                bzclose($this->fh);
323            } else {
324                fclose($this->fh);
325            }
326
327            $this->file = '';
328            $this->fh   = 0;
329        }
330
331        $this->writeaccess = false;
332        $this->closed      = true;
333    }
334
335    /**
336     * Returns the created in-memory archive data
337     *
338     * This implicitly calls close() on the Archive
339     */
340    public function getArchive()
341    {
342        $this->close();
343
344        if ($this->comptype === Archive::COMPRESS_AUTO) {
345            $this->comptype = Archive::COMPRESS_NONE;
346        }
347
348        if ($this->comptype === Archive::COMPRESS_GZIP) {
349            return gzcompress($this->memory, $this->complevel);
350        }
351        if ($this->comptype === Archive::COMPRESS_BZIP) {
352            return bzcompress($this->memory);
353        }
354        return $this->memory;
355    }
356
357    /**
358     * Save the created in-memory archive data
359     *
360     * Note: It more memory effective to specify the filename in the create() function and
361     * let the library work on the new file directly.
362     *
363     * @param string $file
364     * @throws ArchiveIOException
365     */
366    public function save($file)
367    {
368        if ($this->comptype === Archive::COMPRESS_AUTO) {
369            $this->setCompression($this->filetype($this->complevel, $file));
370        }
371
372        if (!file_put_contents($file, $this->getArchive())) {
373            throw new ArchiveIOException('Could not write to file: '.$file);
374        }
375    }
376
377    /**
378     * Read from the open file pointer
379     *
380     * @param int $length bytes to read
381     * @return string
382     */
383    protected function readbytes($length)
384    {
385        if ($this->comptype === Archive::COMPRESS_GZIP) {
386            return @gzread($this->fh, $length);
387        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
388            return @bzread($this->fh, $length);
389        } else {
390            return @fread($this->fh, $length);
391        }
392    }
393
394    /**
395     * Write to the open filepointer or memory
396     *
397     * @param string $data
398     * @throws ArchiveIOException
399     * @return int number of bytes written
400     */
401    protected function writebytes($data)
402    {
403        if (!$this->file) {
404            $this->memory .= $data;
405            $written = strlen($data);
406        } elseif ($this->comptype === Archive::COMPRESS_GZIP) {
407            $written = @gzwrite($this->fh, $data);
408        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
409            $written = @bzwrite($this->fh, $data);
410        } else {
411            $written = @fwrite($this->fh, $data);
412        }
413        if ($written === false) {
414            throw new ArchiveIOException('Failed to write to archive stream');
415        }
416        return $written;
417    }
418
419    /**
420     * Skip forward in the open file pointer
421     *
422     * This is basically a wrapper around seek() (and a workaround for bzip2)
423     *
424     * @param int $bytes seek to this position
425     */
426    function skipbytes($bytes)
427    {
428        if ($this->comptype === Archive::COMPRESS_GZIP) {
429            @gzseek($this->fh, $bytes, SEEK_CUR);
430        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
431            // there is no seek in bzip2, we simply read on
432            @bzread($this->fh, $bytes);
433        } else {
434            @fseek($this->fh, $bytes, SEEK_CUR);
435        }
436    }
437
438    /**
439     * Write the given file metat data as header
440     *
441     * @param FileInfo $fileinfo
442     */
443    protected function writeFileHeader(FileInfo $fileinfo)
444    {
445        $this->writeRawFileHeader(
446            $fileinfo->getPath(),
447            $fileinfo->getUid(),
448            $fileinfo->getGid(),
449            $fileinfo->getMode(),
450            $fileinfo->getSize(),
451            $fileinfo->getMtime(),
452            $fileinfo->getIsdir() ? '5' : '0'
453        );
454    }
455
456    /**
457     * Write a file header to the stream
458     *
459     * @param string $name
460     * @param int    $uid
461     * @param int    $gid
462     * @param int    $perm
463     * @param int    $size
464     * @param int    $mtime
465     * @param string $typeflag Set to '5' for directories
466     */
467    protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '')
468    {
469        // handle filename length restrictions
470        $prefix  = '';
471        $namelen = strlen($name);
472        if ($namelen > 100) {
473            $file = basename($name);
474            $dir  = dirname($name);
475            if (strlen($file) > 100 || strlen($dir) > 155) {
476                // we're still too large, let's use GNU longlink
477                $this->writeRawFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L');
478                for ($s = 0; $s < $namelen; $s += 512) {
479                    $this->writebytes(pack("a512", substr($name, $s, 512)));
480                }
481                $name = substr($name, 0, 100); // cut off name
482            } else {
483                // we're fine when splitting, use POSIX ustar
484                $prefix = $dir;
485                $name   = $file;
486            }
487        }
488
489        // values are needed in octal
490        $uid   = sprintf("%6s ", decoct($uid));
491        $gid   = sprintf("%6s ", decoct($gid));
492        $perm  = sprintf("%6s ", decoct($perm));
493        $size  = sprintf("%11s ", decoct($size));
494        $mtime = sprintf("%11s", decoct($mtime));
495
496        $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime);
497        $data_last  = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, "");
498
499        for ($i = 0, $chks = 0; $i < 148; $i++) {
500            $chks += ord($data_first[$i]);
501        }
502
503        for ($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++) {
504            $chks += ord($data_last[$j]);
505        }
506
507        $this->writebytes($data_first);
508
509        $chks = pack("a8", sprintf("%6s ", decoct($chks)));
510        $this->writebytes($chks.$data_last);
511    }
512
513    /**
514     * Decode the given tar file header
515     *
516     * @param string $block a 512 byte block containign the header data
517     * @return array|bool
518     */
519    protected function parseHeader($block)
520    {
521        if (!$block || strlen($block) != 512) {
522            return false;
523        }
524
525        for ($i = 0, $chks = 0; $i < 148; $i++) {
526            $chks += ord($block[$i]);
527        }
528
529        for ($i = 156, $chks += 256; $i < 512; $i++) {
530            $chks += ord($block[$i]);
531        }
532
533        $header = @unpack(
534            "a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix",
535            $block
536        );
537        if (!$header) {
538            return false;
539        }
540
541        $return['checksum'] = OctDec(trim($header['checksum']));
542        if ($return['checksum'] != $chks) {
543            return false;
544        }
545
546        $return['filename'] = trim($header['filename']);
547        $return['perm']     = OctDec(trim($header['perm']));
548        $return['uid']      = OctDec(trim($header['uid']));
549        $return['gid']      = OctDec(trim($header['gid']));
550        $return['size']     = OctDec(trim($header['size']));
551        $return['mtime']    = OctDec(trim($header['mtime']));
552        $return['typeflag'] = $header['typeflag'];
553        $return['link']     = trim($header['link']);
554        $return['uname']    = trim($header['uname']);
555        $return['gname']    = trim($header['gname']);
556
557        // Handle ustar Posix compliant path prefixes
558        if (trim($header['prefix'])) {
559            $return['filename'] = trim($header['prefix']).'/'.$return['filename'];
560        }
561
562        // Handle Long-Link entries from GNU Tar
563        if ($return['typeflag'] == 'L') {
564            // following data block(s) is the filename
565            $filename = trim($this->readbytes(ceil($header['size'] / 512) * 512));
566            // next block is the real header
567            $block  = $this->readbytes(512);
568            $return = $this->parseHeader($block);
569            // overwrite the filename
570            $return['filename'] = $filename;
571        }
572
573        return $return;
574    }
575
576    /**
577     * Creates a FileInfo object from the given parsed header
578     *
579     * @param $header
580     * @return FileInfo
581     */
582    protected function header2fileinfo($header)
583    {
584        $fileinfo = new FileInfo();
585        $fileinfo->setPath($header['filename']);
586        $fileinfo->setMode($header['perm']);
587        $fileinfo->setUid($header['uid']);
588        $fileinfo->setGid($header['gid']);
589        $fileinfo->setSize($header['size']);
590        $fileinfo->setMtime($header['mtime']);
591        $fileinfo->setOwner($header['uname']);
592        $fileinfo->setGroup($header['gname']);
593        $fileinfo->setIsdir((bool) $header['typeflag']);
594
595        return $fileinfo;
596    }
597
598    /**
599     * Checks if the given compression type is available and throws an exception if not
600     *
601     * @param $comptype
602     * @throws ArchiveIllegalCompressionException
603     */
604    protected function compressioncheck($comptype)
605    {
606        if ($comptype === Archive::COMPRESS_GZIP && !function_exists('gzopen')) {
607            throw new ArchiveIllegalCompressionException('No gzip support available');
608        }
609
610        if ($comptype === Archive::COMPRESS_BZIP && !function_exists('bzopen')) {
611            throw new ArchiveIllegalCompressionException('No bzip2 support available');
612        }
613    }
614
615    /**
616     * Guesses the wanted compression from the given filename extension
617     *
618     * You don't need to call this yourself. It's used when you pass Archive::COMPRESS_AUTO somewhere
619     *
620     * @param string $file
621     * @return int
622     */
623    public function filetype($file)
624    {
625        $file = strtolower($file);
626        if (substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') {
627            $comptype = Archive::COMPRESS_GZIP;
628        } elseif (substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') {
629            $comptype = Archive::COMPRESS_BZIP;
630        } else {
631            $comptype = Archive::COMPRESS_NONE;
632        }
633        return $comptype;
634    }
635}
636