xref: /plugin/sitebackup/admin.php (revision c874c2c0a464f5afa1b4711c21d732aad51b8080)
1<?php
2if (!defined('DOKU_INC')) die();
3
4/**
5 * Site Backup admin plugin for DokuWiki.
6 *
7 * Streams a tar.gz of selected wiki parts (pages, media, conf, lib/plugins, lib/tpl)
8 * to the admin's browser. The archive is built in data/tmp/ with a random filename,
9 * streamed out, and deleted immediately. Nothing persists on the server.
10 *
11 * Security model:
12 *  - Admin-only: DokuWiki's AdminPlugin framework enforces auth_isadmin() before
13 *    handle()/html() are invoked because forAdminOnly() returns true. A second
14 *    explicit check inside streamArchive() guards against any framework bypass.
15 *  - The temp archive lives in $conf['tmpdir'] (data/tmp/), which DokuWiki ships
16 *    with a deny-all .htaccess; it cannot be fetched directly even if the path
17 *    were known.
18 *  - Filename uses 64 bits of CSPRNG randomness, file is chmod'd to 0600, and is
19 *    deleted both at the natural end of streamArchive() and via a shutdown
20 *    function in case the connection is aborted partway.
21 *  - Stale temp files from previous runs (older than 1 hour) are swept on each
22 *    invocation, so even a crash-during-stream leaves nothing for long.
23 *
24 * Treat downloaded archives as credentials: they may include conf/users.auth.php
25 * (password hashes), ACL rules, and any secrets stored in conf/local.php.
26 */
27
28use dokuwiki\Extension\AdminPlugin;
29use dokuwiki\Form\Form;
30use splitbrain\PHPArchive\Archive;
31use splitbrain\PHPArchive\ArchiveIOException;
32
33// PatchedTar fixes splitbrain/php-archive PR #38 (mtime bug) for the version
34// of the library vendored with DokuWiki Librarian.
35// The class lives in PatchedTar.php and is autoloaded via DokuWiki's PSR-4 loader
36// (dokuwiki\plugin\sitebackup namespace -> lib/plugins/sitebackup/).
37use dokuwiki\plugin\sitebackup\PatchedTar as Tar;
38
39class admin_plugin_sitebackup extends AdminPlugin
40{
41    /** Prefix used for the temp archive filename in data/tmp/. */
42    const TMP_PREFIX = 'sitebackup_tmp_';
43
44    /** Max age (seconds) of leftover temp files before sweep removes them. */
45    const TMP_STALE_AGE = 3600;
46
47    /** @var array list of [absolute path, archive-relative path, size] of files to include */
48    protected $fileList = [];
49
50    /** @var int total uncompressed size of selected files */
51    protected $totalBytes = 0;
52
53    /**
54     * Tracks real paths already added to the archive to prevent double-inclusion
55     * via symlinks pointing to the same file.
56     *
57     * @var array<string, true>
58     */
59    protected $visitedPaths = [];
60
61    /**
62     * @return bool
63     */
64    public function forAdminOnly(): bool
65    {
66        return true;
67    }
68
69    /**
70     * @return int
71     */
72    public function getMenuSort(): int
73    {
74        return 1000;
75    }
76
77    /**
78     * Dispatch based on the submitted action.
79     * Valid actions: "preview" (build file list, render summary table),
80     *                "download" (build archive, stream as tar.gz).
81     */
82    public function handle(): void
83    {
84        global $INPUT;
85
86        // Sweep stale temp files from previous runs every time we enter the page.
87        $this->sweepStaleTempFiles();
88
89        if (!$INPUT->has('sitebackup_action')) return;
90        if (!checkSecurityToken()) return;
91
92        $action = $INPUT->str('sitebackup_action');
93        if ($action !== 'preview' && $action !== 'download') return;
94
95        // Download MUST be POST. Refuse GET / HEAD / etc. so a stray link, browser
96        // prefetch, or curious co-admin pasting a URL can't trigger a backup.
97        if ($action === 'download' && $INPUT->server->str('REQUEST_METHOD', 'GET') !== 'POST') {
98            msg($this->getLang('err_post'), -1);
99            return;
100        }
101
102        $this->collectFiles();
103
104        if ($action === 'download') {
105            $this->streamArchive();
106            // streamArchive() exits on success. If it returns, an error was shown
107            // via msg() and we fall through to html() so the user sees the form.
108        }
109    }
110
111    /**
112     * Render the admin page: intro, form, and (if $fileList is populated) preview table.
113     */
114    public function html(): void
115    {
116        echo '<h1>' . hsc($this->getLang('menu')) . '</h1>';
117        echo '<p>' . $this->getLang('intro') . '</p>';
118        echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">'
119            . '<strong>' . hsc($this->getLang('warn_title')) . '</strong> '
120            . $this->getLang('warn_body')
121            . '</p>';
122
123        $this->renderForm();
124
125        if ($this->fileList) {
126            $this->renderPreview();
127        }
128    }
129
130    /* ----------------------------------------------------------------- *
131     *  Form
132     * ----------------------------------------------------------------- */
133
134    /**
135     * Render the selection form with checkboxes for each backup section.
136     */
137    protected function renderForm(): void
138    {
139        global $INPUT;
140
141        $hasSubmitted = $INPUT->has('sitebackup_action');
142        $defaults = [
143            'pages'       => true,
144            'media'       => true,
145            'meta'        => true,
146            'media_meta'  => true,
147            'attic'       => false,
148            'media_attic' => false,
149            'index'       => false,
150            'conf'        => true,
151            'plugins'     => true,
152            'tpl'         => true,
153        ];
154        $sel = [];
155        foreach ($defaults as $k => $def) {
156            $sel[$k] = $hasSubmitted ? $INPUT->bool('sb_' . $k, false) : $def;
157        }
158
159        $form = new Form(['method' => 'POST', 'id' => 'sitebackup_form']);
160        $form->setHiddenField('do', 'admin');
161        $form->setHiddenField('page', 'sitebackup');
162
163        $style = 'text-align: left; padding: 0 1em .5em 1em; margin: 1em 0;';
164
165        $form->addFieldsetOpen($this->getLang('fs_content'))->attr('style', $style);
166        $this->addCheckboxRow($form, 'sb_pages',       $this->getLang('opt_pages'),       $sel['pages']);
167        $this->addCheckboxRow($form, 'sb_media',       $this->getLang('opt_media'),       $sel['media']);
168        $this->addCheckboxRow($form, 'sb_meta',        $this->getLang('opt_meta'),        $sel['meta']);
169        $this->addCheckboxRow($form, 'sb_media_meta',  $this->getLang('opt_media_meta'),  $sel['media_meta']);
170        $this->addCheckboxRow($form, 'sb_attic',       $this->getLang('opt_attic'),       $sel['attic']);
171        $this->addCheckboxRow($form, 'sb_media_attic', $this->getLang('opt_media_attic'), $sel['media_attic']);
172        $this->addCheckboxRow($form, 'sb_index',       $this->getLang('opt_index'),       $sel['index']);
173        $form->addFieldsetClose();
174
175        $form->addFieldsetOpen($this->getLang('fs_code'))->attr('style', $style);
176        $this->addCheckboxRow($form, 'sb_conf',    $this->getLang('opt_conf'),    $sel['conf']);
177        $this->addCheckboxRow($form, 'sb_plugins', $this->getLang('opt_plugins'), $sel['plugins']);
178        $this->addCheckboxRow($form, 'sb_tpl',     $this->getLang('opt_tpl'),     $sel['tpl']);
179        $form->addFieldsetClose();
180
181        $form->addTagOpen('p');
182        $form->addButton('sitebackup_action', $this->getLang('btn_preview'))->val('preview');
183        $form->addHTML(' &nbsp;&nbsp; ');
184        $form->addButton('sitebackup_action', $this->getLang('btn_download'))->val('download');
185        $form->addTagClose('p');
186
187        echo $form->toHTML();
188    }
189
190    /**
191     * Add a labelled checkbox row to the form.
192     *
193     * @param Form   $form
194     * @param string $name    field name
195     * @param string $label   display label
196     * @param bool   $checked whether the checkbox is pre-checked
197     */
198    protected function addCheckboxRow(Form $form, string $name, string $label, bool $checked): void
199    {
200        $form->addTagOpen('div')->attr('style', 'margin:.4em 0;');
201        $cb = $form->addCheckbox($name, ' ' . $label);
202        $cb->val('1');
203        if ($checked) $cb->attr('checked', 'checked');
204        $form->addTagClose('div');
205    }
206
207    /* ----------------------------------------------------------------- *
208     *  File collection
209     * ----------------------------------------------------------------- */
210
211    /**
212     * Build $this->fileList from the selected checkboxes in the current request.
213     */
214    protected function collectFiles(): void
215    {
216        global $INPUT, $conf;
217
218        $this->fileList     = [];
219        $this->totalBytes   = 0;
220        $this->visitedPaths = [];
221
222        // Use $conf[...] for the data dirs so relocated savedir installs still work.
223        $roots = [
224            'sb_pages'       => [$conf['datadir'],        'data/pages'],
225            'sb_media'       => [$conf['mediadir'],       'data/media'],
226            'sb_meta'        => [$conf['metadir'],        'data/meta'],
227            'sb_media_meta'  => [$conf['mediametadir'],   'data/media_meta'],
228            'sb_attic'       => [$conf['olddir'],         'data/attic'],
229            'sb_media_attic' => [$conf['mediaolddir'],    'data/media_attic'],
230            'sb_index'       => [$conf['indexdir'],       'data/index'],
231            'sb_conf'        => [rtrim(DOKU_CONF, '/'),   'conf'],
232            'sb_plugins'     => [rtrim(DOKU_PLUGIN, '/'), 'lib/plugins'],
233            'sb_tpl'         => [DOKU_INC . 'lib/tpl',    'lib/tpl'],
234        ];
235
236        foreach ($roots as $field => $pair) {
237            if (!$INPUT->bool($field, false)) continue;
238            [$srcAbs, $archiveRel] = $pair;
239            $this->walkInto($srcAbs, $archiveRel);
240        }
241    }
242
243    /**
244     * Recursively enumerate all readable files under $srcAbs and append them to $this->fileList.
245     *
246     * @param string $srcAbs     absolute filesystem path (file or directory)
247     * @param string $archiveRel path prefix to use inside the archive
248     */
249    protected function walkInto(string $srcAbs, string $archiveRel): void
250    {
251        if (!file_exists($srcAbs)) return;
252
253        if (is_file($srcAbs)) {
254            $this->appendFile($srcAbs, $archiveRel);
255            return;
256        }
257
258        try {
259            $it = new RecursiveIteratorIterator(
260                new RecursiveDirectoryIterator(
261                    $srcAbs,
262                    FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS
263                ),
264                RecursiveIteratorIterator::LEAVES_ONLY
265            );
266        } catch (Exception $e) {
267            return;
268        }
269
270        $srcRoot = rtrim($srcAbs, '/');
271        $rootLen = strlen($srcRoot) + 1;
272        foreach ($it as $info) {
273            try {
274                if (!$info->isFile() || !$info->isReadable()) continue;
275
276                // Skip files already included via a different symlink path.
277                $realPath = $info->getRealPath();
278                if ($realPath === false) continue;
279                if (isset($this->visitedPaths[$realPath])) continue;
280                $this->visitedPaths[$realPath] = true;
281
282                $abs = $info->getPathname();
283                $rel = str_replace('\\', '/', substr($abs, $rootLen));
284
285                if ($this->isIgnored($archiveRel, $rel)) continue;
286
287                $this->appendFile($abs, $archiveRel . '/' . $rel);
288            } catch (Exception $e) {
289                continue;
290            }
291        }
292    }
293
294    /**
295     * Return true if a file should be excluded from the archive.
296     * Hardcoded (no config) to keep the plugin small.
297     *
298     * @param string $archiveRel top-level archive branch, e.g. "conf" or "lib/plugins"
299     * @param string $rel        path within that branch
300     * @return bool
301     */
302    protected function isIgnored(string $archiveRel, string $rel): bool
303    {
304        $base = basename($rel);
305
306        // Universal noise.
307        if ($base === '_dummy') return true;
308        if ($base === '.DS_Store') return true;
309        if ($base === 'Thumbs.db') return true;
310
311        // Belt-and-suspenders: never include our own scratch files even if
312        // someone pointed savedir at an unusual location.
313        if (str_starts_with($base, self::TMP_PREFIX)) return true;
314
315        // Skip VCS metadata anywhere in any branch. Local clones / checkouts
316        // can be huge and aren't part of "live" state.
317        $segments = explode('/', $rel);
318        foreach ($segments as $seg) {
319            if ($seg === '.git') return true;
320            if ($seg === '.svn') return true;
321            if ($seg === '.hg') return true;
322        }
323
324        // conf/ branch: drop *.dist / *.example / *.bak sample files. They're
325        // shipped with DokuWiki and templates, not real configuration.
326        if ($archiveRel === 'conf') {
327            if (preg_match('/\.(dist|example|bak)$/i', $base)) return true;
328        }
329
330        return false;
331    }
332
333    /**
334     * Append a single file entry to the file list.
335     *
336     * @param string $abs        absolute filesystem path
337     * @param string $archiveRel path inside the archive
338     */
339    protected function appendFile(string $abs, string $archiveRel): void
340    {
341        $size = filesize($abs);
342        if ($size === false) $size = 0;
343        $this->fileList[] = [$abs, $archiveRel, $size];
344        $this->totalBytes += $size;
345    }
346
347    /* ----------------------------------------------------------------- *
348     *  Preview
349     * ----------------------------------------------------------------- */
350
351    /**
352     * Render a summary table grouping files by top-level archive section.
353     */
354    protected function renderPreview(): void
355    {
356        echo '<h2>' . hsc($this->getLang('preview_head')) . '</h2>';
357        echo '<p>' . sprintf(
358            $this->getLang('preview_summary'),
359            count($this->fileList),
360            hsc($this->humanBytes($this->totalBytes))
361        ) . '</p>';
362
363        $perRoot = [];
364        foreach ($this->fileList as [$abs, $rel, $size]) {
365            $parts = explode('/', $rel, 4);
366            $top = isset($parts[1]) ? ($parts[0] . '/' . $parts[1]) : $parts[0];
367            if (!isset($perRoot[$top])) $perRoot[$top] = ['count' => 0, 'bytes' => 0];
368            $perRoot[$top]['count']++;
369            $perRoot[$top]['bytes'] += $size;
370        }
371        ksort($perRoot);
372
373        echo '<table class="inline"><thead><tr>'
374            . '<th>' . hsc($this->getLang('col_section')) . '</th>'
375            . '<th style="text-align:right;">' . hsc($this->getLang('col_files')) . '</th>'
376            . '<th style="text-align:right;">' . hsc($this->getLang('col_size')) . '</th>'
377            . '</tr></thead><tbody>';
378        foreach ($perRoot as $section => $stats) {
379            echo '<tr><td><code>' . hsc($section) . '</code></td>'
380                . '<td style="text-align:right;">' . (int)$stats['count'] . '</td>'
381                . '<td style="text-align:right;">' . hsc($this->humanBytes($stats['bytes'])) . '</td></tr>';
382        }
383        echo '</tbody></table>';
384        echo '<p>' . $this->getLang('preview_hint') . '</p>';
385    }
386
387    /**
388     * Format a byte count as a human-readable string (B, KiB, MiB, GiB, TiB).
389     *
390     * @param int $bytes
391     * @return string
392     */
393    protected function humanBytes(int $bytes): string
394    {
395        $units = ['B', 'KiB', 'MiB', 'GiB', 'TiB'];
396        $i = 0;
397        $n = (float)$bytes;
398        while ($n >= 1024 && $i < count($units) - 1) {
399            $n /= 1024;
400            $i++;
401        }
402        return sprintf($i === 0 ? '%d %s' : '%.2f %s', $n, $units[$i]);
403    }
404
405    /* ----------------------------------------------------------------- *
406     *  Archive creation + streaming
407     * ----------------------------------------------------------------- */
408
409    /**
410     * Build the archive in data/tmp/, stream it to the browser as a tar.gz download,
411     * and exit. Returns without exiting only when an error prevents streaming, so the
412     * caller can fall through to html() and display the form again.
413     */
414    protected function streamArchive(): void
415    {
416        global $conf, $INPUT;
417
418        // Defense-in-depth: AdminPlugin framework should have blocked non-admins
419        // before we got here, but verify directly anyway.
420        if (!auth_isadmin()) {
421            msg($this->getLang('err_admin'), -1);
422            return;
423        }
424
425        if (!$this->fileList) {
426            msg($this->getLang('err_empty'), -1);
427            return;
428        }
429
430        set_time_limit(0);
431        ignore_user_abort(true);
432
433        // Only raise the memory limit, never lower it.
434        $rawLimit = ini_get('memory_limit');
435        $unit     = strtolower(substr($rawLimit, -1));
436        $limitVal = (int)$rawLimit;
437        switch ($unit) {
438            case 'g': $limitBytes = $limitVal * 1073741824; break;
439            case 'm': $limitBytes = $limitVal * 1048576;    break;
440            case 'k': $limitBytes = $limitVal * 1024;       break;
441            default:  $limitBytes = $limitVal;              break;
442        }
443        if ($limitBytes !== -1 && $limitBytes < 268435456) {
444            ini_set('memory_limit', '256M');
445        }
446
447        $tmpDir = $conf['tmpdir'];
448        if (!is_dir($tmpDir) || !is_writable($tmpDir)) {
449            msg(sprintf($this->getLang('err_tmp'), hsc($tmpDir)), -1);
450            return;
451        }
452
453        // Build a hard-to-guess filename. 16 hex chars = 64 bits of entropy from
454        // a CSPRNG. The file also lives under data/.htaccess deny-all so even a
455        // guess wouldn't be enough.
456        $host = $INPUT->server->str('HTTP_HOST', 'wiki');
457        $host = preg_replace('/[^a-zA-Z0-9._-]+/', '_', $host);
458        $stamp = date('Ymd-His');
459        $archiveDir = $host . '-backup-' . $stamp;             // dir inside the tar
460        $downloadName = $archiveDir . '.tar.gz';               // browser filename
461        $tmpFile = $tmpDir . '/' . self::TMP_PREFIX . bin2hex(random_bytes(8)) . '.tar.gz';
462
463        // Guarantee the temp file is deleted even on connection abort, fatal
464        // error, or `exit` from within the streaming loop.
465        register_shutdown_function(function () use ($tmpFile) {
466            if (is_file($tmpFile)) unlink($tmpFile);
467        });
468
469        $oldUmask = umask(0077);
470
471        try {
472            $tar = new Tar();
473            $tar->setCompression(6, Archive::COMPRESS_GZIP);
474            $tar->create($tmpFile);
475
476            // Belt-and-suspenders: explicitly chmod once created, in case the
477            // umask wasn't honored (some filesystems / wrappers ignore it).
478            chmod($tmpFile, 0600);
479
480            foreach ($this->fileList as [$abs, $rel, $size]) {
481                try {
482                    $tar->addFile($abs, $archiveDir . '/' . $rel);
483                } catch (Exception $e) {
484                    // Skip individual broken files rather than failing the whole backup.
485                    continue;
486                }
487            }
488            $tar->close();
489        } catch (ArchiveIOException $e) {
490            umask($oldUmask);
491            if (is_file($tmpFile)) unlink($tmpFile);
492            msg(sprintf($this->getLang('err_create'), hsc($e->getMessage())), -1);
493            return;
494        }
495
496        umask($oldUmask);
497
498        if (!is_file($tmpFile) || filesize($tmpFile) === 0) {
499            if (is_file($tmpFile)) unlink($tmpFile);
500            msg($this->getLang('err_archive'), -1);
501            return;
502        }
503
504        $size = filesize($tmpFile);
505
506        // Clear any output buffering DokuWiki / extensions may have started so
507        // headers + binary body go out cleanly.
508        while (ob_get_level() > 0) {
509            ob_end_clean();
510        }
511
512        header('Content-Type: application/gzip');
513        header('Content-Disposition: attachment; filename="' . $downloadName . '"');
514        header('Content-Length: ' . $size);
515        header('Cache-Control: no-store, no-cache, must-revalidate, private');
516        header('Pragma: no-cache');
517        header('X-Content-Type-Options: nosniff');
518
519        $fp = fopen($tmpFile, 'rb');
520        if ($fp) {
521            while (!feof($fp)) {
522                $chunk = fread($fp, 1024 * 256);
523                if ($chunk === false) break;
524                echo $chunk;
525                flush();
526            }
527            fclose($fp);
528        }
529        unlink($tmpFile);
530        exit;
531    }
532
533    /**
534     * Remove leftover temp archives from prior runs that died before unlink.
535     * Anything matching our prefix older than TMP_STALE_AGE is fair game.
536     */
537    protected function sweepStaleTempFiles(): void
538    {
539        global $conf;
540        $tmpDir = $conf['tmpdir'] ?? null;
541        if (!$tmpDir || !is_dir($tmpDir)) return;
542
543        $cutoff = time() - self::TMP_STALE_AGE;
544        $pattern = $tmpDir . '/' . self::TMP_PREFIX . '*';
545        foreach ((array) glob($pattern) as $stale) {
546            if (!is_file($stale)) continue;
547            $mtime = filemtime($stale);
548            if ($mtime !== false && $mtime < $cutoff) {
549                unlink($stale);
550            }
551        }
552    }
553}
554