xref: /dokuwiki/vendor/splitbrain/php-archive/src/Tar.php (revision 9c9753d6eadb7a5b7495594bd7f1bc4480edd4a2)
1<?php
2
3namespace splitbrain\PHPArchive;
4
5/**
6 * Class Tar
7 *
8 * Creates or extracts Tar archives. Supports gz and bzip compression
9 *
10 * Long pathnames (>100 chars) are supported in POSIX ustar and GNU longlink formats.
11 *
12 * @author  Andreas Gohr <andi@splitbrain.org>
13 * @package splitbrain\PHPArchive
14 * @license MIT
15 */
16class Tar extends Archive
17{
18
19    protected $file = '';
20    protected $comptype = Archive::COMPRESS_AUTO;
21    protected $complevel = 9;
22    protected $fh;
23    protected $memory = '';
24    protected $closed = true;
25    protected $writeaccess = false;
26
27    /**
28     * Sets the compression to use
29     *
30     * @param int $level Compression level (0 to 9)
31     * @param int $type  Type of compression to use (use COMPRESS_* constants)
32     * @return mixed
33     */
34    public function setCompression($level = 9, $type = Archive::COMPRESS_AUTO)
35    {
36        $this->compressioncheck($type);
37        $this->comptype  = $type;
38        $this->complevel = $level;
39        if($level == 0) $this->comptype = Archive::COMPRESS_NONE;
40        if($type == Archive::COMPRESS_NONE) $this->complevel = 0;
41    }
42
43    /**
44     * Open an existing TAR file for reading
45     *
46     * @param string $file
47     * @throws ArchiveIOException
48     */
49    public function open($file)
50    {
51        $this->file = $file;
52
53        // update compression to mach file
54        if ($this->comptype == Tar::COMPRESS_AUTO) {
55            $this->setCompression($this->complevel, $this->filetype($file));
56        }
57
58        // open file handles
59        if ($this->comptype === Archive::COMPRESS_GZIP) {
60            $this->fh = @gzopen($this->file, 'rb');
61        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
62            $this->fh = @bzopen($this->file, 'r');
63        } else {
64            $this->fh = @fopen($this->file, 'rb');
65        }
66
67        if (!$this->fh) {
68            throw new ArchiveIOException('Could not open file for reading: '.$this->file);
69        }
70        $this->closed = false;
71    }
72
73    /**
74     * Read the contents of a TAR archive
75     *
76     * This function lists the files stored in the archive
77     *
78     * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams.
79     * Reopen the file with open() again if you want to do additional operations
80     *
81     * @throws ArchiveIOException
82     * @returns FileInfo[]
83     */
84    public function contents()
85    {
86        if ($this->closed || !$this->file) {
87            throw new ArchiveIOException('Can not read from a closed archive');
88        }
89
90        $result = array();
91        while ($read = $this->readbytes(512)) {
92            $header = $this->parseHeader($read);
93            if (!is_array($header)) {
94                continue;
95            }
96
97            $this->skipbytes(ceil($header['size'] / 512) * 512);
98            $result[] = $this->header2fileinfo($header);
99        }
100
101        $this->close();
102        return $result;
103    }
104
105    /**
106     * Extract an existing TAR archive
107     *
108     * The $strip parameter allows you to strip a certain number of path components from the filenames
109     * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when
110     * an integer is passed as $strip.
111     * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix,
112     * the prefix will be stripped. It is recommended to give prefixes with a trailing slash.
113     *
114     * By default this will extract all files found in the archive. You can restrict the output using the $include
115     * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If
116     * $include is set only files that match this expression will be extracted. Files that match the $exclude
117     * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against
118     * stripped filenames as described above.
119     *
120     * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams.
121     * Reopen the file with open() again if you want to do additional operations
122     *
123     * @param string     $outdir  the target directory for extracting
124     * @param int|string $strip   either the number of path components or a fixed prefix to strip
125     * @param string     $exclude a regular expression of files to exclude
126     * @param string     $include a regular expression of files to include
127     * @throws ArchiveIOException
128     * @return FileInfo[]
129     */
130    public function extract($outdir, $strip = '', $exclude = '', $include = '')
131    {
132        if ($this->closed || !$this->file) {
133            throw new ArchiveIOException('Can not read from a closed archive');
134        }
135
136        $outdir = rtrim($outdir, '/');
137        @mkdir($outdir, 0777, true);
138        if (!is_dir($outdir)) {
139            throw new ArchiveIOException("Could not create directory '$outdir'");
140        }
141
142        $extracted = array();
143        while ($dat = $this->readbytes(512)) {
144            // read the file header
145            $header = $this->parseHeader($dat);
146            if (!is_array($header)) {
147                continue;
148            }
149            $fileinfo = $this->header2fileinfo($header);
150
151            // apply strip rules
152            $fileinfo->strip($strip);
153
154            // skip unwanted files
155            if (!strlen($fileinfo->getPath()) || !$fileinfo->match($include, $exclude)) {
156                $this->skipbytes(ceil($header['size'] / 512) * 512);
157                continue;
158            }
159
160            // create output directory
161            $output    = $outdir.'/'.$fileinfo->getPath();
162            $directory = ($fileinfo->getIsdir()) ? $output : dirname($output);
163            @mkdir($directory, 0777, true);
164
165            // extract data
166            if (!$fileinfo->getIsdir()) {
167                $fp = fopen($output, "wb");
168                if (!$fp) {
169                    throw new ArchiveIOException('Could not open file for writing: '.$output);
170                }
171
172                $size = floor($header['size'] / 512);
173                for ($i = 0; $i < $size; $i++) {
174                    fwrite($fp, $this->readbytes(512), 512);
175                }
176                if (($header['size'] % 512) != 0) {
177                    fwrite($fp, $this->readbytes(512), $header['size'] % 512);
178                }
179
180                fclose($fp);
181                touch($output, $fileinfo->getMtime());
182                chmod($output, $fileinfo->getMode());
183            } else {
184                $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories
185            }
186
187            $extracted[] = $fileinfo;
188        }
189
190        $this->close();
191        return $extracted;
192    }
193
194    /**
195     * Create a new TAR file
196     *
197     * If $file is empty, the tar file will be created in memory
198     *
199     * @param string $file
200     * @throws ArchiveIOException
201     */
202    public function create($file = '')
203    {
204        $this->file   = $file;
205        $this->memory = '';
206        $this->fh     = 0;
207
208        if ($this->file) {
209            // determine compression
210            if ($this->comptype == Archive::COMPRESS_AUTO) {
211                $this->setCompression($this->complevel, $this->filetype($file));
212            }
213
214            if ($this->comptype === Archive::COMPRESS_GZIP) {
215                $this->fh = @gzopen($this->file, 'wb'.$this->complevel);
216            } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
217                $this->fh = @bzopen($this->file, 'w');
218            } else {
219                $this->fh = @fopen($this->file, 'wb');
220            }
221
222            if (!$this->fh) {
223                throw new ArchiveIOException('Could not open file for writing: '.$this->file);
224            }
225        }
226        $this->writeaccess = true;
227        $this->closed      = false;
228    }
229
230    /**
231     * Add a file to the current TAR archive using an existing file in the filesystem
232     *
233     * @param string $file path to the original file
234     * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data, empty to take from original
235     * @throws ArchiveCorruptedException when the file changes while reading it, the archive will be corrupt and should be deleted
236     * @throws ArchiveIOException there was trouble reading the given file, it was not added
237     */
238    public function addFile($file, $fileinfo = '')
239    {
240        if (is_string($fileinfo)) {
241            $fileinfo = FileInfo::fromPath($file, $fileinfo);
242        }
243
244        if ($this->closed) {
245            throw new ArchiveIOException('Archive has been closed, files can no longer be added');
246        }
247
248        $fp = fopen($file, 'rb');
249        if (!$fp) {
250            throw new ArchiveIOException('Could not open file for reading: '.$file);
251        }
252
253        // create file header
254        $this->writeFileHeader($fileinfo);
255
256        // write data
257        $read = 0;
258        while (!feof($fp)) {
259            $data = fread($fp, 512);
260            $read += strlen($data);
261            if ($data === false) {
262                break;
263            }
264            if ($data === '') {
265                break;
266            }
267            $packed = pack("a512", $data);
268            $this->writebytes($packed);
269        }
270        fclose($fp);
271
272        if($read != $fileinfo->getSize()) {
273            $this->close();
274            throw new ArchiveCorruptedException("The size of $file changed while reading, archive corrupted. read $read expected ".$fileinfo->getSize());
275        }
276    }
277
278    /**
279     * Add a file to the current TAR archive using the given $data as content
280     *
281     * @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data
282     * @param string          $data     binary content of the file to add
283     * @throws ArchiveIOException
284     */
285    public function addData($fileinfo, $data)
286    {
287        if (is_string($fileinfo)) {
288            $fileinfo = new FileInfo($fileinfo);
289        }
290
291        if ($this->closed) {
292            throw new ArchiveIOException('Archive has been closed, files can no longer be added');
293        }
294
295        $len = strlen($data);
296        $fileinfo->setSize($len);
297        $this->writeFileHeader($fileinfo);
298
299        for ($s = 0; $s < $len; $s += 512) {
300            $this->writebytes(pack("a512", substr($data, $s, 512)));
301        }
302    }
303
304    /**
305     * Add the closing footer to the archive if in write mode, close all file handles
306     *
307     * After a call to this function no more data can be added to the archive, for
308     * read access no reading is allowed anymore
309     *
310     * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which
311     * consists of two 512 blocks of zero bytes"
312     *
313     * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134
314     */
315    public function close()
316    {
317        if ($this->closed) {
318            return;
319        } // we did this already
320
321        // write footer
322        if ($this->writeaccess) {
323            $this->writebytes(pack("a512", ""));
324            $this->writebytes(pack("a512", ""));
325        }
326
327        // close file handles
328        if ($this->file) {
329            if ($this->comptype === Archive::COMPRESS_GZIP) {
330                gzclose($this->fh);
331            } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
332                bzclose($this->fh);
333            } else {
334                fclose($this->fh);
335            }
336
337            $this->file = '';
338            $this->fh   = 0;
339        }
340
341        $this->writeaccess = false;
342        $this->closed      = true;
343    }
344
345    /**
346     * Returns the created in-memory archive data
347     *
348     * This implicitly calls close() on the Archive
349     */
350    public function getArchive()
351    {
352        $this->close();
353
354        if ($this->comptype === Archive::COMPRESS_AUTO) {
355            $this->comptype = Archive::COMPRESS_NONE;
356        }
357
358        if ($this->comptype === Archive::COMPRESS_GZIP) {
359            return gzcompress($this->memory, $this->complevel);
360        }
361        if ($this->comptype === Archive::COMPRESS_BZIP) {
362            return bzcompress($this->memory);
363        }
364        return $this->memory;
365    }
366
367    /**
368     * Save the created in-memory archive data
369     *
370     * Note: It more memory effective to specify the filename in the create() function and
371     * let the library work on the new file directly.
372     *
373     * @param string $file
374     * @throws ArchiveIOException
375     */
376    public function save($file)
377    {
378        if ($this->comptype === Archive::COMPRESS_AUTO) {
379            $this->setCompression($this->complevel, $this->filetype($file));
380        }
381
382        if (!file_put_contents($file, $this->getArchive())) {
383            throw new ArchiveIOException('Could not write to file: '.$file);
384        }
385    }
386
387    /**
388     * Read from the open file pointer
389     *
390     * @param int $length bytes to read
391     * @return string
392     */
393    protected function readbytes($length)
394    {
395        if ($this->comptype === Archive::COMPRESS_GZIP) {
396            return @gzread($this->fh, $length);
397        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
398            return @bzread($this->fh, $length);
399        } else {
400            return @fread($this->fh, $length);
401        }
402    }
403
404    /**
405     * Write to the open filepointer or memory
406     *
407     * @param string $data
408     * @throws ArchiveIOException
409     * @return int number of bytes written
410     */
411    protected function writebytes($data)
412    {
413        if (!$this->file) {
414            $this->memory .= $data;
415            $written = strlen($data);
416        } elseif ($this->comptype === Archive::COMPRESS_GZIP) {
417            $written = @gzwrite($this->fh, $data);
418        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
419            $written = @bzwrite($this->fh, $data);
420        } else {
421            $written = @fwrite($this->fh, $data);
422        }
423        if ($written === false) {
424            throw new ArchiveIOException('Failed to write to archive stream');
425        }
426        return $written;
427    }
428
429    /**
430     * Skip forward in the open file pointer
431     *
432     * This is basically a wrapper around seek() (and a workaround for bzip2)
433     *
434     * @param int $bytes seek to this position
435     */
436    function skipbytes($bytes)
437    {
438        if ($this->comptype === Archive::COMPRESS_GZIP) {
439            @gzseek($this->fh, $bytes, SEEK_CUR);
440        } elseif ($this->comptype === Archive::COMPRESS_BZIP) {
441            // there is no seek in bzip2, we simply read on
442            // bzread allows to read a max of 8kb at once
443            while($bytes) {
444                $toread = min(8192, $bytes);
445                @bzread($this->fh, $toread);
446                $bytes -= $toread;
447            }
448        } else {
449            @fseek($this->fh, $bytes, SEEK_CUR);
450        }
451    }
452
453    /**
454     * Write the given file metat data as header
455     *
456     * @param FileInfo $fileinfo
457     */
458    protected function writeFileHeader(FileInfo $fileinfo)
459    {
460        $this->writeRawFileHeader(
461            $fileinfo->getPath(),
462            $fileinfo->getUid(),
463            $fileinfo->getGid(),
464            $fileinfo->getMode(),
465            $fileinfo->getSize(),
466            $fileinfo->getMtime(),
467            $fileinfo->getIsdir() ? '5' : '0'
468        );
469    }
470
471    /**
472     * Write a file header to the stream
473     *
474     * @param string $name
475     * @param int    $uid
476     * @param int    $gid
477     * @param int    $perm
478     * @param int    $size
479     * @param int    $mtime
480     * @param string $typeflag Set to '5' for directories
481     */
482    protected function writeRawFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '')
483    {
484        // handle filename length restrictions
485        $prefix  = '';
486        $namelen = strlen($name);
487        if ($namelen > 100) {
488            $file = basename($name);
489            $dir  = dirname($name);
490            if (strlen($file) > 100 || strlen($dir) > 155) {
491                // we're still too large, let's use GNU longlink
492                $this->writeRawFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L');
493                for ($s = 0; $s < $namelen; $s += 512) {
494                    $this->writebytes(pack("a512", substr($name, $s, 512)));
495                }
496                $name = substr($name, 0, 100); // cut off name
497            } else {
498                // we're fine when splitting, use POSIX ustar
499                $prefix = $dir;
500                $name   = $file;
501            }
502        }
503
504        // values are needed in octal
505        $uid   = sprintf("%6s ", decoct($uid));
506        $gid   = sprintf("%6s ", decoct($gid));
507        $perm  = sprintf("%6s ", decoct($perm));
508        $size  = sprintf("%11s ", decoct($size));
509        $mtime = sprintf("%11s", decoct($mtime));
510
511        $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime);
512        $data_last  = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, "");
513
514        for ($i = 0, $chks = 0; $i < 148; $i++) {
515            $chks += ord($data_first[$i]);
516        }
517
518        for ($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++) {
519            $chks += ord($data_last[$j]);
520        }
521
522        $this->writebytes($data_first);
523
524        $chks = pack("a8", sprintf("%6s ", decoct($chks)));
525        $this->writebytes($chks.$data_last);
526    }
527
528    /**
529     * Decode the given tar file header
530     *
531     * @param string $block a 512 byte block containing the header data
532     * @return array|false returns false when this was a null block
533     * @throws ArchiveCorruptedException
534     */
535    protected function parseHeader($block)
536    {
537        if (!$block || strlen($block) != 512) {
538            throw new ArchiveCorruptedException('Unexpected length of header');
539        }
540
541        // null byte blocks are ignored
542        if(trim($block) === '') return false;
543
544        for ($i = 0, $chks = 0; $i < 148; $i++) {
545            $chks += ord($block[$i]);
546        }
547
548        for ($i = 156, $chks += 256; $i < 512; $i++) {
549            $chks += ord($block[$i]);
550        }
551
552        $header = @unpack(
553            "a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix",
554            $block
555        );
556        if (!$header) {
557            throw new ArchiveCorruptedException('Failed to parse header');
558        }
559
560        $return['checksum'] = OctDec(trim($header['checksum']));
561        if ($return['checksum'] != $chks) {
562            throw new ArchiveCorruptedException('Header does not match it\'s checksum');
563        }
564
565        $return['filename'] = trim($header['filename']);
566        $return['perm']     = OctDec(trim($header['perm']));
567        $return['uid']      = OctDec(trim($header['uid']));
568        $return['gid']      = OctDec(trim($header['gid']));
569        $return['size']     = OctDec(trim($header['size']));
570        $return['mtime']    = OctDec(trim($header['mtime']));
571        $return['typeflag'] = $header['typeflag'];
572        $return['link']     = trim($header['link']);
573        $return['uname']    = trim($header['uname']);
574        $return['gname']    = trim($header['gname']);
575
576        // Handle ustar Posix compliant path prefixes
577        if (trim($header['prefix'])) {
578            $return['filename'] = trim($header['prefix']).'/'.$return['filename'];
579        }
580
581        // Handle Long-Link entries from GNU Tar
582        if ($return['typeflag'] == 'L') {
583            // following data block(s) is the filename
584            $filename = trim($this->readbytes(ceil($return['size'] / 512) * 512));
585            // next block is the real header
586            $block  = $this->readbytes(512);
587            $return = $this->parseHeader($block);
588            // overwrite the filename
589            $return['filename'] = $filename;
590        }
591
592        return $return;
593    }
594
595    /**
596     * Creates a FileInfo object from the given parsed header
597     *
598     * @param $header
599     * @return FileInfo
600     */
601    protected function header2fileinfo($header)
602    {
603        $fileinfo = new FileInfo();
604        $fileinfo->setPath($header['filename']);
605        $fileinfo->setMode($header['perm']);
606        $fileinfo->setUid($header['uid']);
607        $fileinfo->setGid($header['gid']);
608        $fileinfo->setSize($header['size']);
609        $fileinfo->setMtime($header['mtime']);
610        $fileinfo->setOwner($header['uname']);
611        $fileinfo->setGroup($header['gname']);
612        $fileinfo->setIsdir((bool) $header['typeflag']);
613
614        return $fileinfo;
615    }
616
617    /**
618     * Checks if the given compression type is available and throws an exception if not
619     *
620     * @param $comptype
621     * @throws ArchiveIllegalCompressionException
622     */
623    protected function compressioncheck($comptype)
624    {
625        if ($comptype === Archive::COMPRESS_GZIP && !function_exists('gzopen')) {
626            throw new ArchiveIllegalCompressionException('No gzip support available');
627        }
628
629        if ($comptype === Archive::COMPRESS_BZIP && !function_exists('bzopen')) {
630            throw new ArchiveIllegalCompressionException('No bzip2 support available');
631        }
632    }
633
634    /**
635     * Guesses the wanted compression from the given file
636     *
637     * Uses magic bytes for existing files, the file extension otherwise
638     *
639     * You don't need to call this yourself. It's used when you pass Archive::COMPRESS_AUTO somewhere
640     *
641     * @param string $file
642     * @return int
643     */
644    public function filetype($file)
645    {
646        // for existing files, try to read the magic bytes
647        if(file_exists($file) && is_readable($file) && filesize($file) > 5) {
648            $fh = fopen($file, 'rb');
649            if(!$fh) return false;
650            $magic = fread($fh, 5);
651            fclose($fh);
652
653            if(strpos($magic, "\x42\x5a") === 0) return Archive::COMPRESS_BZIP;
654            if(strpos($magic, "\x1f\x8b") === 0) return Archive::COMPRESS_GZIP;
655        }
656
657        // otherwise rely on file name
658        $file = strtolower($file);
659        if (substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') {
660            return Archive::COMPRESS_GZIP;
661        } elseif (substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') {
662            return Archive::COMPRESS_BZIP;
663        }
664
665        return Archive::COMPRESS_NONE;
666    }
667}
668