xref: /dokuwiki/inc/io.php (revision 6beb5edc7f6912312807b6135ef730e133666d77)
1<?php
2
3/**
4 * File IO functions
5 *
6 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
7 * @author     Andreas Gohr <andi@splitbrain.org>
8 */
9
10use dokuwiki\Logger;
11use dokuwiki\Utf8\PhpString;
12use dokuwiki\HTTP\DokuHTTPClient;
13use dokuwiki\Extension\Event;
14
15/**
16 * Removes empty directories
17 *
18 * Sends IO_NAMESPACE_DELETED events for 'pages' and 'media' namespaces.
19 * Event data:
20 * $data[0]    ns: The colon separated namespace path minus the trailing page name.
21 * $data[1]    ns_type: 'pages' or 'media' namespace tree.
22 *
23 * @param string $id - a pageid, the namespace of that id will be tried to deleted
24 * @param string $basedir - the config name of the type to delete (datadir or mediadir usally)
25 * @return bool - true if at least one namespace was deleted
26 *
27 * @author  Andreas Gohr <andi@splitbrain.org>
28 * @author Ben Coburn <btcoburn@silicodon.net>
29 */
30function io_sweepNS($id, $basedir = 'datadir')
31{
32    global $conf;
33    $types = ['datadir' => 'pages', 'mediadir' => 'media'];
34    $ns_type = ($types[$basedir] ?? false);
35
36    $delone = false;
37
38    //scan all namespaces
39    while (($id = getNS($id)) !== false) {
40        $dir = $conf[$basedir] . '/' . utf8_encodeFN(str_replace(':', '/', $id));
41
42        //try to delete dir else return
43        if (@rmdir($dir)) {
44            if ($ns_type !== false) {
45                $data = [$id, $ns_type];
46                $delone = true; // we deleted at least one dir
47                Event::createAndTrigger('IO_NAMESPACE_DELETED', $data);
48            }
49        } else {
50            return $delone;
51        }
52    }
53    return $delone;
54}
55
56/**
57 * Used to read in a DokuWiki page from file, and send IO_WIKIPAGE_READ events.
58 *
59 * Generates the action event which delegates to io_readFile().
60 * Action plugins are allowed to modify the page content in transit.
61 * The file path should not be changed.
62 *
63 * Event data:
64 * $data[0]    The raw arguments for io_readFile as an array.
65 * $data[1]    ns: The colon separated namespace path minus the trailing page name. (false if root ns)
66 * $data[2]    page_name: The wiki page name.
67 * $data[3]    rev: The page revision, false for current wiki pages.
68 *
69 * @param string $file filename
70 * @param string $id page id
71 * @param bool|int|string $rev revision timestamp
72 * @return string
73 *
74 * @author Ben Coburn <btcoburn@silicodon.net>
75 */
76function io_readWikiPage($file, $id, $rev = false)
77{
78    if (empty($rev)) {
79        $rev = false;
80    }
81    $data = [[$file, true], getNS($id), noNS($id), $rev];
82    return Event::createAndTrigger('IO_WIKIPAGE_READ', $data, '_io_readWikiPage_action', false);
83}
84
85/**
86 * Callback adapter for io_readFile().
87 *
88 * @param array $data event data
89 * @return string
90 *
91 * @author Ben Coburn <btcoburn@silicodon.net>
92 */
93function _io_readWikiPage_action($data)
94{
95    if (is_array($data) && is_array($data[0]) && count($data[0]) === 2) {
96        return io_readFile(...$data[0]);
97    } else {
98        return ''; //callback error
99    }
100}
101
102/**
103 * Returns content of $file as cleaned string.
104 *
105 * Uses gzip if extension is .gz
106 *
107 * If you want to use the returned value in unserialize
108 * be sure to set $clean to false!
109 *
110 *
111 * @param string $file filename
112 * @param bool $clean
113 * @return string|bool the file contents or false on error
114 *
115 * @author  Andreas Gohr <andi@splitbrain.org>
116 */
117function io_readFile($file, $clean = true)
118{
119    $ret = '';
120    if (file_exists($file)) {
121        if (str_ends_with($file, '.gz')) {
122            if (!DOKU_HAS_GZIP) return false;
123            $ret = gzfile_get_contents($file);
124            if ($ret === false) return false;
125        } elseif (str_ends_with($file, '.bz2')) {
126            if (!DOKU_HAS_BZIP) return false;
127            $ret = bzfile($file);
128        } else {
129            $ret = file_get_contents($file);
130        }
131    }
132    if ($ret === null) return false;
133    if ($ret !== false && $clean) {
134        return cleanText($ret);
135    } else {
136        return $ret;
137    }
138}
139
140/**
141 * Returns the content of a .gz compressed file as string
142 *
143 * This reads the file in chunks and decompresses using inflate_* functions
144 * rather than gzfile(). This is necessary because PHP's zlib stream wrapper
145 * has a bug (php/php-src#21376) in PHP 8.5.3+ where gzfile() fails to detect
146 * corrupt gzip data and returns garbage instead of an error.
147 *
148 * Handles concatenated gzip streams as created by gzopen() in append mode.
149 *
150 * @param string $file filename
151 * @return string|false content or false on error
152 *
153 * @author Andreas Gohr <andi@splitbrain.org>
154 */
155function gzfile_get_contents($file)
156{
157    $fh = @fopen($file, 'rb');
158    if ($fh === false) return false;
159
160    $ret = '';
161    $leftover = '';
162    while ($leftover !== '' || !feof($fh)) {
163        $ctx = inflate_init(ZLIB_ENCODING_GZIP);
164
165        // decompress one gzip stream
166        while (true) {
167            if ($leftover !== '') {
168                $chunk = $leftover;
169                $leftover = '';
170            } else {
171                $chunk = fread($fh, 8192);
172                if ($chunk === '' || $chunk === false) break;
173            }
174            $readBefore = inflate_get_read_len($ctx);
175            $decoded = @inflate_add($ctx, $chunk);
176            if ($decoded === false) {
177                fclose($fh);
178                return false;
179            }
180            $ret .= $decoded;
181            if (inflate_get_status($ctx) === ZLIB_STREAM_END) {
182                $consumed = inflate_get_read_len($ctx) - $readBefore;
183                $leftover = substr($chunk, $consumed);
184                break;
185            }
186        }
187    }
188    fclose($fh);
189    return $ret;
190}
191
192/**
193 * Returns the content of a .bz2 compressed file as string
194 *
195 * @param string $file filename
196 * @param bool $array return array of lines
197 * @return string|array|bool content or false on error
198 *
199 * @author marcel senf <marcel@rucksackreinigung.de>
200 * @author  Andreas Gohr <andi@splitbrain.org>
201 */
202function bzfile($file, $array = false)
203{
204    $bz = bzopen($file, "r");
205    if ($bz === false) return false;
206
207    if ($array) {
208        $lines = [];
209    }
210    $str = '';
211    while (!feof($bz)) {
212        //8192 seems to be the maximum buffersize?
213        $buffer = bzread($bz, 8192);
214        if (($buffer === false) || (bzerrno($bz) !== 0)) {
215            return false;
216        }
217        $str .= $buffer;
218        if ($array) {
219            $pos = strpos($str, "\n");
220            while ($pos !== false) {
221                $lines[] = substr($str, 0, $pos + 1);
222                $str = substr($str, $pos + 1);
223                $pos = strpos($str, "\n");
224            }
225        }
226    }
227    bzclose($bz);
228    if ($array) {
229        if ($str !== '') {
230            $lines[] = $str;
231        }
232        return $lines;
233    }
234    return $str;
235}
236
237/**
238 * Used to write out a DokuWiki page to file, and send IO_WIKIPAGE_WRITE events.
239 *
240 * This generates an action event and delegates to io_saveFile().
241 * Action plugins are allowed to modify the page content in transit.
242 * The file path should not be changed.
243 * (The append parameter is set to false.)
244 *
245 * Event data:
246 * $data[0]    The raw arguments for io_saveFile as an array.
247 * $data[1]    ns: The colon separated namespace path minus the trailing page name. (false if root ns)
248 * $data[2]    page_name: The wiki page name.
249 * $data[3]    rev: The page revision, false for current wiki pages.
250 *
251 * @param string $file filename
252 * @param string $content
253 * @param string $id page id
254 * @param int|bool|string $rev timestamp of revision
255 * @return bool
256 *
257 * @author Ben Coburn <btcoburn@silicodon.net>
258 */
259function io_writeWikiPage($file, $content, $id, $rev = false)
260{
261    if (empty($rev)) {
262        $rev = false;
263    }
264    if ($rev === false) {
265        io_createNamespace($id); // create namespaces as needed
266    }
267    $data = [[$file, $content, false], getNS($id), noNS($id), $rev];
268    return Event::createAndTrigger('IO_WIKIPAGE_WRITE', $data, '_io_writeWikiPage_action', false);
269}
270
271/**
272 * Callback adapter for io_saveFile().
273 *
274 * @param array $data event data
275 * @return bool
276 *
277 * @author Ben Coburn <btcoburn@silicodon.net>
278 */
279function _io_writeWikiPage_action($data)
280{
281    if (is_array($data) && is_array($data[0]) && count($data[0]) === 3) {
282        $ok = io_saveFile(...$data[0]);
283        // for attic files make sure the file has the mtime of the revision
284        if ($ok && is_int($data[3]) && $data[3] > 0) {
285            @touch($data[0][0], $data[3]);
286        }
287        return $ok;
288    } else {
289        return false; //callback error
290    }
291}
292
293/**
294 * Internal function to save contents to a file.
295 *
296 * @param string $file filename path to file
297 * @param string $content
298 * @param bool $append
299 * @return bool true on success, otherwise false
300 *
301 * @author  Andreas Gohr <andi@splitbrain.org>
302 */
303function _io_saveFile($file, $content, $append)
304{
305    global $conf;
306    $mode = ($append) ? 'ab' : 'wb';
307    $fileexists = file_exists($file);
308
309    if (str_ends_with($file, '.gz')) {
310        if (!DOKU_HAS_GZIP) return false;
311        $fh = @gzopen($file, $mode . '9');
312        if (!$fh) return false;
313        gzwrite($fh, $content);
314        gzclose($fh);
315    } elseif (str_ends_with($file, '.bz2')) {
316        if (!DOKU_HAS_BZIP) return false;
317        if ($append) {
318            $bzcontent = bzfile($file);
319            if ($bzcontent === false) return false;
320            $content = $bzcontent . $content;
321        }
322        $fh = @bzopen($file, 'w');
323        if (!$fh) return false;
324        bzwrite($fh, $content);
325        bzclose($fh);
326    } else {
327        $fh = @fopen($file, $mode);
328        if (!$fh) return false;
329        fwrite($fh, $content);
330        fclose($fh);
331    }
332
333    if (!$fileexists && $conf['fperm']) {
334        chmod($file, $conf['fperm']);
335    }
336    return true;
337}
338
339/**
340 * Saves $content to $file.
341 *
342 * If the third parameter is set to true the given content
343 * will be appended.
344 *
345 * Uses gzip if extension is .gz
346 * and bz2 if extension is .bz2
347 *
348 * @param string $file filename path to file
349 * @param string $content
350 * @param bool $append
351 * @return bool true on success, otherwise false
352 *
353 * @author  Andreas Gohr <andi@splitbrain.org>
354 */
355function io_saveFile($file, $content, $append = false)
356{
357    io_makeFileDir($file);
358    io_lock($file);
359    if (!_io_saveFile($file, $content, $append)) {
360        msg("Writing $file failed", -1);
361        io_unlock($file);
362        return false;
363    }
364    io_unlock($file);
365    return true;
366}
367
368/**
369 * Replace one or more occurrences of a line in a file.
370 *
371 * The default, when $maxlines is 0 is to delete all matching lines then append a single line.
372 * A regex that matches any part of the line will remove the entire line in this mode.
373 * Captures in $newline are not available.
374 *
375 * Otherwise each line is matched and replaced individually, up to the first $maxlines lines
376 * or all lines if $maxlines is -1. If $regex is true then captures can be used in $newline.
377 *
378 * Be sure to include the trailing newline in $oldline when replacing entire lines.
379 *
380 * Uses gzip if extension is .gz
381 * and bz2 if extension is .bz2
382 *
383 * @param string $file filename
384 * @param string $oldline exact linematch to remove
385 * @param string $newline new line to insert
386 * @param bool $regex use regexp?
387 * @param int $maxlines number of occurrences of the line to replace
388 * @return bool true on success
389 *
390 * @author Steven Danz <steven-danz@kc.rr.com>
391 * @author Christopher Smith <chris@jalakai.co.uk>
392 * @author Patrick Brown <ptbrown@whoopdedo.org>
393 */
394function io_replaceInFile($file, $oldline, $newline, $regex = false, $maxlines = 0)
395{
396    if ((string)$oldline === '') {
397        Logger::error('io_replaceInFile() $oldline parameter cannot be empty');
398        return false;
399    }
400
401    if (!file_exists($file)) return true;
402
403    io_lock($file);
404
405    // load into array
406    if (str_ends_with($file, '.gz')) {
407        if (!DOKU_HAS_GZIP) return false;
408        $lines = gzfile($file);
409    } elseif (str_ends_with($file, '.bz2')) {
410        if (!DOKU_HAS_BZIP) return false;
411        $lines = bzfile($file, true);
412    } else {
413        $lines = file($file);
414    }
415
416    // make non-regexes into regexes
417    $pattern = $regex ? $oldline : '/^' . preg_quote($oldline, '/') . '$/';
418    $replace = $regex ? $newline : addcslashes($newline, '\$');
419
420    // remove matching lines
421    if ($maxlines > 0) {
422        $count = 0;
423        $matched = 0;
424        foreach ($lines as $i => $line) {
425            if ($count >= $maxlines) break;
426            // $matched will be set to 0|1 depending on whether pattern is matched and line replaced
427            $lines[$i] = preg_replace($pattern, $replace, $line, -1, $matched);
428            if ($matched) {
429                $count++;
430            }
431        }
432    } elseif ($maxlines == 0) {
433        $lines = preg_grep($pattern, $lines, PREG_GREP_INVERT);
434        if ((string)$newline !== '') {
435            $lines[] = $newline;
436        }
437    } else {
438        $lines = preg_replace($pattern, $replace, $lines);
439    }
440
441    if (count($lines)) {
442        if (!_io_saveFile($file, implode('', $lines), false)) {
443            msg("Removing content from $file failed", -1);
444            io_unlock($file);
445            return false;
446        }
447    } else {
448        @unlink($file);
449    }
450
451    io_unlock($file);
452    return true;
453}
454
455/**
456 * Delete lines that match $badline from $file.
457 *
458 * Be sure to include the trailing newline in $badline
459 *
460 * @param string $file filename
461 * @param string $badline exact linematch to remove
462 * @param bool $regex use regexp?
463 * @return bool true on success
464 *
465 * @author Patrick Brown <ptbrown@whoopdedo.org>
466 */
467function io_deleteFromFile($file, $badline, $regex = false)
468{
469    return io_replaceInFile($file, $badline, '', $regex, 0);
470}
471
472/**
473 * Tries to lock a file
474 *
475 * Locking is only done for io_savefile and uses directories
476 * inside $conf['lockdir']
477 *
478 * It waits maximal 3 seconds for the lock, after this time
479 * the lock is assumed to be stale and the function goes on
480 *
481 * @param string $file filename
482 *
483 * @author Andreas Gohr <andi@splitbrain.org>
484 */
485function io_lock($file)
486{
487    global $conf;
488
489    $lockDir = $conf['lockdir'] . '/' . md5($file);
490    @ignore_user_abort(1);
491
492    $timeStart = time();
493    do {
494        //waited longer than 3 seconds? -> stale lock
495        if ((time() - $timeStart) > 3) break;
496        $locked = @mkdir($lockDir);
497        if ($locked) {
498            if ($conf['dperm']) {
499                chmod($lockDir, $conf['dperm']);
500            }
501            break;
502        }
503        usleep(50);
504    } while ($locked === false);
505}
506
507/**
508 * Unlocks a file
509 *
510 * @param string $file filename
511 *
512 * @author Andreas Gohr <andi@splitbrain.org>
513 */
514function io_unlock($file)
515{
516    global $conf;
517
518    $lockDir = $conf['lockdir'] . '/' . md5($file);
519    @rmdir($lockDir);
520    @ignore_user_abort(0);
521}
522
523/**
524 * Create missing namespace directories and send the IO_NAMESPACE_CREATED events
525 * in the order of directory creation. (Parent directories first.)
526 *
527 * Event data:
528 * $data[0]    ns: The colon separated namespace path minus the trailing page name.
529 * $data[1]    ns_type: 'pages' or 'media' namespace tree.
530 *
531 * @param string $id page id
532 * @param string $ns_type 'pages' or 'media'
533 *
534 * @author Ben Coburn <btcoburn@silicodon.net>
535 */
536function io_createNamespace($id, $ns_type = 'pages')
537{
538    // verify ns_type
539    $types = ['pages' => 'wikiFN', 'media' => 'mediaFN'];
540    if (!isset($types[$ns_type])) {
541        trigger_error('Bad $ns_type parameter for io_createNamespace().');
542        return;
543    }
544    // make event list
545    $missing = [];
546    $ns_stack = explode(':', $id);
547    $ns = $id;
548    $tmp = dirname($file = call_user_func($types[$ns_type], $ns));
549    while (!@is_dir($tmp) && !(file_exists($tmp) && !is_dir($tmp))) {
550        array_pop($ns_stack);
551        $ns = implode(':', $ns_stack);
552        if (strlen($ns) == 0) {
553            break;
554        }
555        $missing[] = $ns;
556        $tmp = dirname(call_user_func($types[$ns_type], $ns));
557    }
558    // make directories
559    io_makeFileDir($file);
560    // send the events
561    $missing = array_reverse($missing); // inside out
562    foreach ($missing as $ns) {
563        $data = [$ns, $ns_type];
564        Event::createAndTrigger('IO_NAMESPACE_CREATED', $data);
565    }
566}
567
568/**
569 * Create the directory needed for the given file
570 *
571 * @param string $file file name
572 *
573 * @author  Andreas Gohr <andi@splitbrain.org>
574 */
575function io_makeFileDir($file)
576{
577    $dir = dirname($file);
578    if (!@is_dir($dir)) {
579        if (!io_mkdir_p($dir)) {
580            msg("Creating directory $dir failed", -1);
581        }
582    }
583}
584
585/**
586 * Creates a directory hierachy.
587 *
588 * @param string $target filename
589 * @return bool
590 *
591 * @link    http://php.net/manual/en/function.mkdir.php
592 * @author  <saint@corenova.com>
593 * @author  Andreas Gohr <andi@splitbrain.org>
594 */
595function io_mkdir_p($target)
596{
597    global $conf;
598    if (@is_dir($target) || empty($target)) return true; // best case check first
599    if (file_exists($target) && !is_dir($target)) return false;
600    //recursion
601    if (io_mkdir_p(substr($target, 0, strrpos($target, '/')))) {
602        $ret = @mkdir($target); // crawl back up & create dir tree
603        if ($ret && !empty($conf['dperm'])) {
604            chmod($target, $conf['dperm']);
605        }
606        return $ret;
607    }
608    return false;
609}
610
611/**
612 * Recursively delete a directory
613 *
614 * @param string $path
615 * @param bool $removefiles defaults to false which will delete empty directories only
616 * @return bool
617 *
618 * @author Andreas Gohr <andi@splitbrain.org>
619 */
620function io_rmdir($path, $removefiles = false)
621{
622    if (!is_string($path) || $path == "") return false;
623    if (!file_exists($path)) return true; // it's already gone or was never there, count as success
624
625    if (is_dir($path) && !is_link($path)) {
626        $dirs = [];
627        $files = [];
628        if (!$dh = @opendir($path)) return false;
629        while (false !== ($f = readdir($dh))) {
630            if ($f == '..' || $f == '.') continue;
631
632            // collect dirs and files first
633            if (is_dir("$path/$f") && !is_link("$path/$f")) {
634                $dirs[] = "$path/$f";
635            } elseif ($removefiles) {
636                $files[] = "$path/$f";
637            } else {
638                return false; // abort when non empty
639            }
640        }
641        closedir($dh);
642        // now traverse into  directories first
643        foreach ($dirs as $dir) {
644            if (!io_rmdir($dir, $removefiles)) return false; // abort on any error
645        }
646        // now delete files
647        foreach ($files as $file) {
648            if (!@unlink($file)) return false; //abort on any error
649        }
650        // remove self
651        return @rmdir($path);
652    } elseif ($removefiles) {
653        return @unlink($path);
654    }
655    return false;
656}
657
658/**
659 * Creates a unique temporary directory and returns
660 * its path.
661 *
662 * @return false|string path to new directory or false
663 * @throws Exception
664 *
665 * @author Michael Klier <chi@chimeric.de>
666 */
667function io_mktmpdir()
668{
669    global $conf;
670
671    $base = $conf['tmpdir'];
672    $dir = md5(uniqid(random_int(0, mt_getrandmax()), true));
673    $tmpdir = $base . '/' . $dir;
674
675    if (io_mkdir_p($tmpdir)) {
676        return $tmpdir;
677    } else {
678        return false;
679    }
680}
681
682/**
683 * downloads a file from the net and saves it
684 *
685 * if $useAttachment is false,
686 * - $file is the full filename to save the file, incl. path
687 * - if successful will return true, false otherwise
688 *
689 * if $useAttachment is true,
690 * - $file is the directory where the file should be saved
691 * - if successful will return the name used for the saved file, false otherwise
692 *
693 * @param string $url url to download
694 * @param string $file path to file or directory where to save
695 * @param bool $useAttachment true: try to use name of download, uses otherwise $defaultName
696 *                            false: uses $file as path to file
697 * @param string $defaultName fallback for if using $useAttachment
698 * @param int $maxSize maximum file size
699 * @return bool|string          if failed false, otherwise true or the name of the file in the given dir
700 *
701 * @author Andreas Gohr <andi@splitbrain.org>
702 * @author Chris Smith <chris@jalakai.co.uk>
703 */
704function io_download($url, $file, $useAttachment = false, $defaultName = '', $maxSize = 2_097_152)
705{
706    global $conf;
707    $http = new DokuHTTPClient();
708    $http->max_bodysize = $maxSize;
709    $http->timeout = 25; //max. 25 sec
710    $http->keep_alive = false; // we do single ops here, no need for keep-alive
711
712    $data = $http->get($url);
713    if (!$data) return false;
714
715    $name = '';
716    if ($useAttachment) {
717        if (isset($http->resp_headers['content-disposition'])) {
718            $content_disposition = $http->resp_headers['content-disposition'];
719            $match = [];
720            if (
721                is_string($content_disposition) &&
722                preg_match('/attachment;\s*filename\s*=\s*"([^"]*)"/i', $content_disposition, $match)
723            ) {
724                $name = PhpString::basename($match[1]);
725            }
726        }
727
728        if (!$name) {
729            if (!$defaultName) return false;
730            $name = $defaultName;
731        }
732
733        $file .= $name;
734    }
735
736    $fileexists = file_exists($file);
737    $fp = @fopen($file, "w");
738    if (!$fp) return false;
739    fwrite($fp, $data);
740    fclose($fp);
741    if (!$fileexists && $conf['fperm']) {
742        chmod($file, $conf['fperm']);
743    }
744    if ($useAttachment) return $name;
745    return true;
746}
747
748/**
749 * Windows compatible rename
750 *
751 * rename() can not overwrite existing files on Windows
752 * this function will use copy/unlink instead
753 *
754 * @param string $from
755 * @param string $to
756 * @return bool succes or fail
757 */
758function io_rename($from, $to)
759{
760    global $conf;
761    if (!@rename($from, $to)) {
762        if (@copy($from, $to)) {
763            if ($conf['fperm']) {
764                chmod($to, $conf['fperm']);
765            }
766            @unlink($from);
767            return true;
768        }
769        return false;
770    }
771    return true;
772}
773
774/**
775 * Runs an external command with input and output pipes.
776 * Returns the exit code from the process.
777 *
778 * @param string $cmd
779 * @param string $input input pipe
780 * @param string $output output pipe
781 * @return int exit code from process
782 *
783 * @author Tom N Harris <tnharris@whoopdedo.org>
784 */
785function io_exec($cmd, $input, &$output)
786{
787    $descspec = [
788        0 => ["pipe", "r"],
789        1 => ["pipe", "w"],
790        2 => ["pipe", "w"]
791    ];
792    $ph = proc_open($cmd, $descspec, $pipes);
793    if (!$ph) return -1;
794    fclose($pipes[2]); // ignore stderr
795    fwrite($pipes[0], $input);
796    fclose($pipes[0]);
797    $output = stream_get_contents($pipes[1]);
798    fclose($pipes[1]);
799    return proc_close($ph);
800}
801
802/**
803 * Search a file for matching lines
804 *
805 * This is probably not faster than file()+preg_grep() but less
806 * memory intensive because not the whole file needs to be loaded
807 * at once.
808 *
809 * @param string $file The file to search
810 * @param string $pattern PCRE pattern
811 * @param int $max How many lines to return (0 for all)
812 * @param bool $backref When true returns array with backreferences instead of lines
813 * @return array matching lines or backref, false on error
814 *
815 * @author Andreas Gohr <andi@splitbrain.org>
816 */
817function io_grep($file, $pattern, $max = 0, $backref = false)
818{
819    $fh = @fopen($file, 'r');
820    if (!$fh) return false;
821    $matches = [];
822
823    $cnt = 0;
824    $line = '';
825    while (!feof($fh)) {
826        $line .= fgets($fh, 4096);  // read full line
827        if (!str_ends_with($line, "\n")) continue;
828
829        // check if line matches
830        if (preg_match($pattern, $line, $match)) {
831            if ($backref) {
832                $matches[] = $match;
833            } else {
834                $matches[] = $line;
835            }
836            $cnt++;
837        }
838        if ($max && $max == $cnt) break;
839        $line = '';
840    }
841    fclose($fh);
842    return $matches;
843}
844
845
846/**
847 * Get size of contents of a file, for a compressed file the uncompressed size
848 * Warning: reading uncompressed size of content of bz-files requires uncompressing
849 *
850 * @param string $file filename path to file
851 * @return int size of file
852 *
853 * @author  Gerrit Uitslag <klapinklapin@gmail.com>
854 */
855function io_getSizeFile($file)
856{
857    if (!file_exists($file)) return 0;
858
859    if (str_ends_with($file, '.gz')) {
860        $fp = @fopen($file, "rb");
861        if ($fp === false) return 0;
862        fseek($fp, -4, SEEK_END);
863        $buffer = fread($fp, 4);
864        fclose($fp);
865        $array = unpack("V", $buffer);
866        $uncompressedsize = end($array);
867    } elseif (str_ends_with($file, '.bz2')) {
868        if (!DOKU_HAS_BZIP) return 0;
869        $bz = bzopen($file, "r");
870        if ($bz === false) return 0;
871        $uncompressedsize = 0;
872        while (!feof($bz)) {
873            //8192 seems to be the maximum buffersize?
874            $buffer = bzread($bz, 8192);
875            if (($buffer === false) || (bzerrno($bz) !== 0)) {
876                return 0;
877            }
878            $uncompressedsize += strlen($buffer);
879        }
880    } else {
881        $uncompressedsize = filesize($file);
882    }
883
884    return $uncompressedsize;
885}
886