1<?php
2/**
3 * This is a copy of DokuWiki's core tar library
4 *
5 * A copy is used because old DokuWiki installs may have an old version. It has also been adjusted to
6 * print each extracted file
7 *
8 * @author Andreas Gohr <andi@splitbrain.org>
9 * @author Bouchon <tarlib@bouchon.org> (Maxg)
10 * @license GPL 2
11 */
12class VerboseTar {
13
14    const COMPRESS_AUTO = 0;
15    const COMPRESS_NONE = 1;
16    const COMPRESS_GZIP = 2;
17    const COMPRESS_BZIP = 3;
18
19    protected $file = '';
20    protected $comptype = self::COMPRESS_AUTO;
21    protected $fh;
22    protected $memory = '';
23    protected $closed = true;
24    protected $writeaccess = false;
25
26    /**
27     * Open an existing TAR file for reading
28     *
29     * @param string $file
30     * @param int    $comptype
31     * @throws VerboseTarIOException
32     */
33    public function open($file, $comptype = self::COMPRESS_AUTO) {
34        // determine compression
35        if($comptype == self::COMPRESS_AUTO) $comptype = $this->filetype($file);
36        $this->compressioncheck($comptype);
37
38        $this->comptype = $comptype;
39        $this->file     = $file;
40
41        if($this->comptype === self::COMPRESS_GZIP) {
42            $this->fh = @gzopen($this->file, 'rb');
43        } elseif($this->comptype === self::COMPRESS_BZIP) {
44            $this->fh = @bzopen($this->file, 'r');
45        } else {
46            $this->fh = @fopen($this->file, 'rb');
47        }
48
49        if(!$this->fh) throw new VerboseTarIOException('Could not open file for reading: '.$this->file);
50        $this->closed = false;
51    }
52
53    /**
54     * Read the contents of a TAR archive
55     *
56     * This function lists the files stored in the archive, and returns an indexed array of associative
57     * arrays containing for each file the following information:
58     *
59     * checksum    Tar Checksum of the file
60     * filename    The full name of the stored file (up to 100 c.)
61     * mode        UNIX permissions in DECIMAL, not octal
62     * uid         The Owner ID
63     * gid         The Group ID
64     * size        Uncompressed filesize
65     * mtime       Timestamp of last modification
66     * typeflag    Empty for files, set for folders
67     * link        Is it a symlink?
68     * uname       Owner name
69     * gname       Group name
70     *
71     * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams.
72     * Reopen the file with open() again if you want to do additional operations
73     */
74    public function contents() {
75        if($this->closed || !$this->file) throw new VerboseTarIOException('Can not read from a closed archive');
76
77        $result = array();
78        while($read = $this->readbytes(512)) {
79            $header = $this->parseHeader($read);
80            if(!is_array($header)) continue;
81
82            $this->skipbytes(ceil($header['size'] / 512) * 512);
83            $result[] = $header;
84        }
85
86        $this->close();
87        return $result;
88    }
89
90    /**
91     * Extract an existing TAR archive
92     *
93     * The $strip parameter allows you to strip a certain number of path components from the filenames
94     * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when
95     * an integer is passed as $strip.
96     * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix,
97     * the prefix will be stripped. It is recommended to give prefixes with a trailing slash.
98     *
99     * By default this will extract all files found in the archive. You can restrict the output using the $include
100     * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If
101     * $include is set only files that match this expression will be extracted. Files that match the $exclude
102     * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against
103     * stripped filenames as described above.
104     *
105     * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams.
106     * Reopen the file with open() again if you want to do additional operations
107     *
108     * @param string     $outdir  the target directory for extracting
109     * @param int|string $strip   either the number of path components or a fixed prefix to strip
110     * @param string     $exclude a regular expression of files to exclude
111     * @param string     $include a regular expression of files to include
112     * @throws VerboseTarIOException
113     * @return array
114     */
115    function extract($outdir, $strip = '', $exclude = '', $include = '') {
116        if($this->closed || !$this->file) throw new VerboseTarIOException('Can not read from a closed archive');
117
118        $outdir = rtrim($outdir, '/');
119        io_mkdir_p($outdir);
120        $striplen = strlen($strip);
121
122        $extracted = array();
123
124        while($dat = $this->readbytes(512)) {
125            // read the file header
126            $header = $this->parseHeader($dat);
127            if(!is_array($header)) continue;
128            if(!$header['filename']) continue;
129
130            // strip prefix
131            $filename = $this->cleanPath($header['filename']);
132            if(is_int($strip)) {
133                // if $strip is an integer we strip this many path components
134                $parts = explode('/', $filename);
135                if(!$header['typeflag']) {
136                    $base = array_pop($parts); // keep filename itself
137                } else {
138                    $base = '';
139                }
140                $filename = join('/', array_slice($parts, $strip));
141                if($base) $filename .= "/$base";
142            } else {
143                // ifstrip is a string, we strip a prefix here
144                if(substr($filename, 0, $striplen) == $strip) $filename = substr($filename, $striplen);
145            }
146
147            // check if this should be extracted
148            $extract = true;
149            if(!$filename) {
150                $extract = false;
151            } else {
152                if($include) {
153                    if(preg_match($include, $filename)) {
154                        $extract = true;
155                    } else {
156                        $extract = false;
157                    }
158                }
159                if($exclude && preg_match($exclude, $filename)) {
160                    $extract = false;
161                }
162            }
163
164            // Now do the extraction (or not)
165            if($extract) {
166                $extracted[] = $header;
167
168                $output    = "$outdir/$filename";
169                $directory = ($header['typeflag']) ? $output : dirname($output);
170                io_mkdir_p($directory);
171
172                // print status
173                admin_plugin_elwikiupgrade::_say(hsc($filename));
174
175                // is this a file?
176                if(!$header['typeflag']) {
177                    $fp = fopen($output, "wb");
178                    if(!$fp) throw new VerboseTarIOException('Could not open file for writing: '.$output);
179
180                    $size = floor($header['size'] / 512);
181                    for($i = 0; $i < $size; $i++) {
182                        fwrite($fp, $this->readbytes(512), 512);
183                    }
184                    if(($header['size'] % 512) != 0) fwrite($fp, $this->readbytes(512), $header['size'] % 512);
185
186                    fclose($fp);
187                    touch($output, $header['mtime']);
188                    chmod($output, $header['perm']);
189                } else {
190                    $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories
191                }
192            } else {
193                $this->skipbytes(ceil($header['size'] / 512) * 512);
194            }
195        }
196
197        $this->close();
198        return $extracted;
199    }
200
201    /**
202     * Create a new TAR file
203     *
204     * If $file is empty, the tar file will be created in memory
205     *
206     * @param string $file
207     * @param int    $comptype
208     * @param int    $complevel
209     * @throws VerboseTarIOException
210     * @throws VerboseTarIllegalCompressionException
211     */
212    public function create($file = '', $comptype = self::COMPRESS_AUTO, $complevel = 9) {
213        // determine compression
214        if($comptype == self::COMPRESS_AUTO) $comptype = $this->filetype($file);
215        $this->compressioncheck($comptype);
216
217        $this->comptype = $comptype;
218        $this->file     = $file;
219        $this->memory   = '';
220        $this->fh       = 0;
221
222        if($this->file) {
223            if($this->comptype === self::COMPRESS_GZIP) {
224                $this->fh = @gzopen($this->file, 'wb'.$complevel);
225            } elseif($this->comptype === self::COMPRESS_BZIP) {
226                $this->fh = @bzopen($this->file, 'w');
227            } else {
228                $this->fh = @fopen($this->file, 'wb');
229            }
230
231            if(!$this->fh) throw new VerboseTarIOException('Could not open file for writing: '.$this->file);
232        }
233        $this->writeaccess = true;
234        $this->closed      = false;
235    }
236
237    /**
238     * Add a file to the current TAR archive using an existing file in the filesystem
239     *
240     * @todo handle directory adding
241     * @param string $file the original file
242     * @param string $name the name to use for the file in the archive
243     * @throws VerboseTarIOException
244     */
245    public function addFile($file, $name = '') {
246        if($this->closed) throw new VerboseTarIOException('Archive has been closed, files can no longer be added');
247
248        if(!$name) $name = $file;
249        $name = $this->cleanPath($name);
250
251        $fp = fopen($file, 'rb');
252        if(!$fp) throw new VerboseTarIOException('Could not open file for reading: '.$file);
253
254        // create file header and copy all stat info from the original file
255        clearstatcache(false, $file);
256        $stat = stat($file);
257        $this->writeFileHeader(
258            $name,
259            $stat[4],
260            $stat[5],
261            fileperms($file),
262            filesize($file),
263            filemtime($file)
264        );
265
266        while(!feof($fp)) {
267            $data = fread($fp, 512);
268            if($data === false) break;
269            if($data === '') break;
270            $packed = pack("a512", $data);
271            $this->writebytes($packed);
272        }
273        fclose($fp);
274    }
275
276    /**
277     * Add a file to the current TAR archive using the given $data as content
278     *
279     * @param string $name
280     * @param string $data
281     * @param int    $uid
282     * @param int    $gid
283     * @param int    $perm
284     * @param int    $mtime
285     * @throws VerboseTarIOException
286     */
287    public function addData($name, $data, $uid = 0, $gid = 0, $perm = 0666, $mtime = 0) {
288        if($this->closed) throw new VerboseTarIOException('Archive has been closed, files can no longer be added');
289
290        $name = $this->cleanPath($name);
291        $len  = strlen($data);
292
293        $this->writeFileHeader(
294            $name,
295            $uid,
296            $gid,
297            $perm,
298            $len,
299            ($mtime) ? $mtime : time()
300        );
301
302        for($s = 0; $s < $len; $s += 512) {
303            $this->writebytes(pack("a512", substr($data, $s, 512)));
304        }
305    }
306
307    /**
308     * Add the closing footer to the archive if in write mode, close all file handles
309     *
310     * After a call to this function no more data can be added to the archive, for
311     * read access no reading is allowed anymore
312     *
313     * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which
314     * consists of two 512 blocks of zero bytes"
315     *
316     * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134
317     */
318    public function close() {
319        if($this->closed) return; // we did this already
320
321        // write footer
322        if($this->writeaccess) {
323            $this->writebytes(pack("a512", ""));
324            $this->writebytes(pack("a512", ""));
325        }
326
327        // close file handles
328        if($this->file) {
329            if($this->comptype === self::COMPRESS_GZIP) {
330                gzclose($this->fh);
331            } elseif($this->comptype === self::COMPRESS_BZIP) {
332                bzclose($this->fh);
333            } else {
334                fclose($this->fh);
335            }
336
337            $this->file = '';
338            $this->fh   = 0;
339        }
340
341        $this->closed = true;
342    }
343
344    /**
345     * Returns the created in-memory archive data
346     *
347     * This implicitly calls close() on the Archive
348     */
349    public function getArchive($comptype = self::COMPRESS_AUTO, $complevel = 9) {
350        $this->close();
351
352        if($comptype === self::COMPRESS_AUTO) $comptype = $this->comptype;
353        $this->compressioncheck($comptype);
354
355        if($comptype === self::COMPRESS_GZIP) return gzcompress($this->memory, $complevel);
356        if($comptype === self::COMPRESS_BZIP) return bzcompress($this->memory);
357        return $this->memory;
358    }
359
360    /**
361     * Save the created in-memory archive data
362     *
363     * Note: It more memory effective to specify the filename in the create() function and
364     * let the library work on the new file directly.
365     *
366     * @param     $file
367     * @param int $comptype
368     * @param int $complevel
369     * @throws VerboseTarIOException
370     */
371    public function save($file, $comptype = self::COMPRESS_AUTO, $complevel = 9) {
372        if($comptype === self::COMPRESS_AUTO) $comptype = $this->filetype($file);
373
374        if(!file_put_contents($file, $this->getArchive($comptype, $complevel))) {
375            throw new VerboseTarIOException('Could not write to file: '.$file);
376        }
377    }
378
379    /**
380     * Read from the open file pointer
381     *
382     * @param int $length bytes to read
383     * @return string
384     */
385    protected function readbytes($length) {
386        if($this->comptype === self::COMPRESS_GZIP) {
387            return @gzread($this->fh, $length);
388        } elseif($this->comptype === self::COMPRESS_BZIP) {
389            return @bzread($this->fh, $length);
390        } else {
391            return @fread($this->fh, $length);
392        }
393    }
394
395    /**
396     * Write to the open filepointer or memory
397     *
398     * @param string $data
399     * @throws VerboseTarIOException
400     * @return int number of bytes written
401     */
402    protected function writebytes($data) {
403        if(!$this->file) {
404            $this->memory .= $data;
405            $written = strlen($data);
406        } elseif($this->comptype === self::COMPRESS_GZIP) {
407            $written = @gzwrite($this->fh, $data);
408        } elseif($this->comptype === self::COMPRESS_BZIP) {
409            $written = @bzwrite($this->fh, $data);
410        } else {
411            $written = @fwrite($this->fh, $data);
412        }
413        if($written === false) throw new VerboseTarIOException('Failed to write to archive stream');
414        return $written;
415    }
416
417    /**
418     * Skip forward in the open file pointer
419     *
420     * This is basically a wrapper around seek() (and a workaround for bzip2)
421     *
422     * @param int  $bytes seek to this position
423     */
424    function skipbytes($bytes) {
425        if($this->comptype === self::COMPRESS_GZIP) {
426            @gzseek($this->fh, $bytes, SEEK_CUR);
427        } elseif($this->comptype === self::COMPRESS_BZIP) {
428            // there is no seek in bzip2, we simply read on
429            @bzread($this->fh, $bytes);
430        } else {
431            @fseek($this->fh, $bytes, SEEK_CUR);
432        }
433    }
434
435    /**
436     * Write a file header
437     *
438     * @param string $name
439     * @param int    $uid
440     * @param int    $gid
441     * @param int    $perm
442     * @param int    $size
443     * @param int    $mtime
444     * @param string $typeflag Set to '5' for directories
445     */
446    protected function writeFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '') {
447        // handle filename length restrictions
448        $prefix  = '';
449        $namelen = strlen($name);
450        if($namelen > 100) {
451            $file = basename($name);
452            $dir  = dirname($name);
453            if(strlen($file) > 100 || strlen($dir) > 155) {
454                // we're still too large, let's use GNU longlink
455                $this->writeFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L');
456                for($s = 0; $s < $namelen; $s += 512) {
457                    $this->writebytes(pack("a512", substr($name, $s, 512)));
458                }
459                $name = substr($name, 0, 100); // cut off name
460            } else {
461                // we're fine when splitting, use POSIX ustar
462                $prefix = $dir;
463                $name   = $file;
464            }
465        }
466
467        // values are needed in octal
468        $uid   = sprintf("%6s ", decoct($uid));
469        $gid   = sprintf("%6s ", decoct($gid));
470        $perm  = sprintf("%6s ", decoct($perm));
471        $size  = sprintf("%11s ", decoct($size));
472        $mtime = sprintf("%11s", decoct($mtime));
473
474        $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime);
475        $data_last  = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, "");
476
477        for($i = 0, $chks = 0; $i < 148; $i++)
478            $chks += ord($data_first[$i]);
479
480        for($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++)
481            $chks += ord($data_last[$j]);
482
483        $this->writebytes($data_first);
484
485        $chks = pack("a8", sprintf("%6s ", decoct($chks)));
486        $this->writebytes($chks.$data_last);
487    }
488
489    /**
490     * Decode the given tar file header
491     *
492     * @param string $block a 512 byte block containign the header data
493     * @return array|bool
494     */
495    protected function parseHeader($block) {
496        if(!$block || strlen($block) != 512) return false;
497
498        for($i = 0, $chks = 0; $i < 148; $i++)
499            $chks += ord($block[$i]);
500
501        for($i = 156, $chks += 256; $i < 512; $i++)
502            $chks += ord($block[$i]);
503
504        $header = @unpack("a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix", $block);
505        if(!$header) return false;
506
507        $return['checksum'] = OctDec(trim($header['checksum']));
508        if($return['checksum'] != $chks) return false;
509
510        $return['filename'] = trim($header['filename']);
511        $return['perm']     = OctDec(trim($header['perm']));
512        $return['uid']      = OctDec(trim($header['uid']));
513        $return['gid']      = OctDec(trim($header['gid']));
514        $return['size']     = OctDec(trim($header['size']));
515        $return['mtime']    = OctDec(trim($header['mtime']));
516        $return['typeflag'] = $header['typeflag'];
517        $return['link']     = trim($header['link']);
518        $return['uname']    = trim($header['uname']);
519        $return['gname']    = trim($header['gname']);
520
521        // Handle ustar Posix compliant path prefixes
522        if(trim($header['prefix'])) $return['filename'] = trim($header['prefix']).'/'.$return['filename'];
523
524        // Handle Long-Link entries from GNU Tar
525        if($return['typeflag'] == 'L') {
526            // following data block(s) is the filename
527            $filename = trim($this->readbytes(ceil($header['size'] / 512) * 512));
528            // next block is the real header
529            $block  = $this->readbytes(512);
530            $return = $this->parseHeader($block);
531            // overwrite the filename
532            $return['filename'] = $filename;
533        }
534
535        return $return;
536    }
537
538    /**
539     * Cleans up a path and removes relative parts, also strips leading slashes
540     *
541     * @param string $p_dir
542     * @return string
543     */
544    public function cleanPath($path) {
545        $path=explode('/', $path);
546        $newpath=array();
547        foreach($path as $p) {
548            if ($p === '' || $p === '.') continue;
549            if ($p==='..') {
550                array_pop($newpath);
551                continue;
552            }
553            array_push($newpath, $p);
554        }
555        return trim(implode('/', $newpath), '/');
556    }
557
558    /**
559     * Checks if the given compression type is available and throws an exception if not
560     *
561     * @param $comptype
562     * @throws VerboseTarIllegalCompressionException
563     */
564    protected function compressioncheck($comptype) {
565        if($comptype === self::COMPRESS_GZIP && !function_exists('gzopen')) {
566            throw new VerboseTarIllegalCompressionException('No gzip support available');
567        }
568
569        if($comptype === self::COMPRESS_BZIP && !function_exists('bzopen')) {
570            throw new VerboseTarIllegalCompressionException('No bzip2 support available');
571        }
572    }
573
574    /**
575     * Guesses the wanted compression from the given filename extension
576     *
577     * You don't need to call this yourself. It's used when you pass self::COMPRESS_AUTO somewhere
578     *
579     * @param string $file
580     * @return int
581     */
582    public function filetype($file) {
583        $file = strtolower($file);
584        if(substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') {
585            $comptype = self::COMPRESS_GZIP;
586        } elseif(substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') {
587            $comptype = self::COMPRESS_BZIP;
588        } else {
589            $comptype = self::COMPRESS_NONE;
590        }
591        return $comptype;
592    }
593}
594
595class VerboseTarIOException extends Exception {
596}
597
598class VerboseTarIllegalCompressionException extends Exception {
599}
600