xref: /plugin/sitebackup/admin.php (revision b484d5bce5caf8c82fc4f2172009f85da67a1001)
1<?php
2if (!defined('DOKU_INC')) die();
3
4/**
5 * Site Backup admin plugin for DokuWiki.
6 *
7 * Streams a tar.gz of selected wiki parts (pages, media, conf, lib/plugins, lib/tpl)
8 * to the admin's browser. The archive is built in data/tmp/ with a random filename,
9 * streamed out, and deleted immediately. Nothing persists on the server.
10 *
11 * Security model:
12 *  - Admin-only: DokuWiki's AdminPlugin framework enforces auth_isadmin() before
13 *    handle()/html() are invoked because forAdminOnly() returns true. A second
14 *    explicit check inside streamArchive() guards against any framework bypass.
15 *  - The temp archive lives in $conf['tmpdir'] (data/tmp/), which DokuWiki ships
16 *    with a deny-all .htaccess; it cannot be fetched directly even if the path
17 *    were known.
18 *  - Filename uses 64 bits of CSPRNG randomness, file is chmod'd to 0600, and is
19 *    deleted both at the natural end of streamArchive() and via a shutdown
20 *    function in case the connection is aborted partway.
21 *  - Stale temp files from previous runs (older than 1 hour) are swept on each
22 *    invocation, so even a crash-during-stream leaves nothing for long.
23 *
24 * Treat downloaded archives as credentials: they may include conf/users.auth.php
25 * (password hashes), ACL rules, and any secrets stored in conf/local.php.
26 */
27
28use dokuwiki\Extension\AdminPlugin;
29use dokuwiki\Form\Form;
30use splitbrain\PHPArchive\Archive;
31use splitbrain\PHPArchive\ArchiveIOException;
32
33// PatchedTar fixes splitbrain/php-archive PR #38 (mtime bug) for the version
34// of the library vendored with DokuWiki Librarian.
35require_once __DIR__ . '/PatchedTar.php';
36use dokuwiki\plugin\sitebackup\PatchedTar as Tar;
37
38class admin_plugin_sitebackup extends AdminPlugin
39{
40    /** Prefix used for the temp archive filename in data/tmp/. */
41    const TMP_PREFIX = 'sitebackup_tmp_';
42
43    /** Max age (seconds) of leftover temp files before sweep removes them. */
44    const TMP_STALE_AGE = 3600;
45
46    /** @var array list of [absolute path, archive-relative path, size] of files to include */
47    protected $fileList = [];
48
49    /** @var int total uncompressed size of selected files */
50    protected $totalBytes = 0;
51
52    /**
53     * @return bool
54     */
55    public function forAdminOnly(): bool
56    {
57        return true;
58    }
59
60    /**
61     * @return int
62     */
63    public function getMenuSort(): int
64    {
65        return 1000;
66    }
67
68    /**
69     * Dispatch based on the submitted action.
70     * Valid actions: "preview" (build file list, render summary table),
71     *                "download" (build archive, stream as tar.gz).
72     */
73    public function handle(): void
74    {
75        global $INPUT;
76
77        // Sweep stale temp files from previous runs every time we enter the page.
78        $this->sweepStaleTempFiles();
79
80        if (!$INPUT->has('sitebackup_action')) return;
81        if (!checkSecurityToken()) return;
82
83        $action = $INPUT->str('sitebackup_action');
84        if ($action !== 'preview' && $action !== 'download') return;
85
86        // Download MUST be POST. Refuse GET / HEAD / etc. so a stray link, browser
87        // prefetch, or curious co-admin pasting a URL can't trigger a backup.
88        if ($action === 'download' && $INPUT->server->str('REQUEST_METHOD', 'GET') !== 'POST') {
89            msg('Site Backup: download must be submitted via POST.', -1);
90            return;
91        }
92
93        $this->collectFiles();
94
95        if ($action === 'download') {
96            $this->streamArchive();
97            // streamArchive() exits on success. If it returns, an error was shown
98            // via msg() and we fall through to html() so the user sees the form.
99        }
100    }
101
102    /**
103     * Render the admin page: intro, form, and (if $fileList is populated) preview table.
104     */
105    public function html(): void
106    {
107        echo '<h1>Site Backup</h1>';
108        echo '<p>Select what to include, click <em>Preview</em> to see the file list and total size, '
109            . 'then <em>Download tar.gz</em> to receive the archive in your browser.</p>';
110        echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">'
111            . '<strong>Sensitive content warning.</strong> The archive may contain password hashes '
112            . '(<code>conf/users.auth.php</code>), ACL rules, and any secrets stored in '
113            . '<code>conf/local.php</code> (DB credentials, SMTP passwords, API keys). '
114            . 'Treat the download like a credential.'
115            . '</p>';
116
117        $this->renderForm();
118
119        if ($this->fileList) {
120            $this->renderPreview();
121        }
122    }
123
124    /* ----------------------------------------------------------------- *
125     *  Form
126     * ----------------------------------------------------------------- */
127
128    /**
129     * Render the selection form with checkboxes for each backup section.
130     */
131    protected function renderForm(): void
132    {
133        global $INPUT;
134
135        $hasSubmitted = $INPUT->has('sitebackup_action');
136        $defaults = [
137            'pages'       => true,
138            'media'       => true,
139            'meta'        => true,
140            'media_meta'  => true,
141            'attic'       => false,
142            'media_attic' => false,
143            'index'       => false,
144            'conf'        => true,
145            'plugins'     => true,
146            'tpl'         => true,
147        ];
148        $sel = [];
149        foreach ($defaults as $k => $def) {
150            $sel[$k] = $hasSubmitted ? $INPUT->bool('sb_' . $k, false) : $def;
151        }
152
153        $form = new Form(['method' => 'POST', 'id' => 'sitebackup_form']);
154        $form->setHiddenField('do', 'admin');
155        $form->setHiddenField('page', 'sitebackup');
156
157        $style = 'text-align: left; padding: 0 1em .5em 1em; margin: 1em 0;';
158
159        $form->addFieldsetOpen('Wiki content')->attr('style', $style);
160        $this->addCheckboxRow($form, 'sb_pages',       'Pages (data/pages)',                          $sel['pages']);
161        $this->addCheckboxRow($form, 'sb_media',       'Media files (data/media)',                    $sel['media']);
162        $this->addCheckboxRow($form, 'sb_meta',        'Page metadata (data/meta)',                   $sel['meta']);
163        $this->addCheckboxRow($form, 'sb_media_meta',  'Media metadata (data/media_meta)',            $sel['media_meta']);
164        $this->addCheckboxRow($form, 'sb_attic',       'Page revisions (data/attic) - can be large',  $sel['attic']);
165        $this->addCheckboxRow($form, 'sb_media_attic', 'Media revisions (data/media_attic)',          $sel['media_attic']);
166        $this->addCheckboxRow($form, 'sb_index',       'Search index (data/index) - rebuildable',     $sel['index']);
167        $form->addFieldsetClose();
168
169        $form->addFieldsetOpen('Configuration & code')->attr('style', $style);
170        $this->addCheckboxRow($form, 'sb_conf',    'Configuration (conf/) - includes secrets',  $sel['conf']);
171        $this->addCheckboxRow($form, 'sb_plugins', 'Plugins source (lib/plugins/)',             $sel['plugins']);
172        $this->addCheckboxRow($form, 'sb_tpl',     'Templates source (lib/tpl/)',               $sel['tpl']);
173        $form->addFieldsetClose();
174
175        $form->addTagOpen('p');
176        $form->addButton('sitebackup_action', 'Preview')->val('preview');
177        $form->addHTML(' &nbsp;&nbsp; ');
178        $form->addButton('sitebackup_action', 'Download tar.gz')->val('download');
179        $form->addTagClose('p');
180
181        echo $form->toHTML();
182    }
183
184    /**
185     * Add a labelled checkbox row to the form.
186     *
187     * @param Form   $form
188     * @param string $name    field name
189     * @param string $label   display label
190     * @param bool   $checked whether the checkbox is pre-checked
191     */
192    protected function addCheckboxRow(Form $form, string $name, string $label, bool $checked): void
193    {
194        $form->addTagOpen('div')->attr('style', 'margin:.4em 0;');
195        $cb = $form->addCheckbox($name, ' ' . $label);
196        $cb->val('1');
197        if ($checked) $cb->attr('checked', 'checked');
198        $form->addTagClose('div');
199    }
200
201    /* ----------------------------------------------------------------- *
202     *  File collection
203     * ----------------------------------------------------------------- */
204
205    /**
206     * Build $this->fileList from the selected checkboxes in the current request.
207     */
208    protected function collectFiles(): void
209    {
210        global $INPUT, $conf;
211
212        // Use $conf[...] for the data dirs so relocated savedir installs still work.
213        $roots = [
214            'sb_pages'       => [$conf['datadir'],        'data/pages'],
215            'sb_media'       => [$conf['mediadir'],       'data/media'],
216            'sb_meta'        => [$conf['metadir'],        'data/meta'],
217            'sb_media_meta'  => [$conf['mediametadir'],   'data/media_meta'],
218            'sb_attic'       => [$conf['olddir'],         'data/attic'],
219            'sb_media_attic' => [$conf['mediaolddir'],    'data/media_attic'],
220            'sb_index'       => [$conf['indexdir'],       'data/index'],
221            'sb_conf'        => [rtrim(DOKU_CONF, '/'),   'conf'],
222            'sb_plugins'     => [rtrim(DOKU_PLUGIN, '/'), 'lib/plugins'],
223            'sb_tpl'         => [DOKU_INC . 'lib/tpl',    'lib/tpl'],
224        ];
225
226        foreach ($roots as $field => $pair) {
227            if (!$INPUT->bool($field, false)) continue;
228            [$srcAbs, $archiveRel] = $pair;
229            $this->walkInto($srcAbs, $archiveRel);
230        }
231    }
232
233    /**
234     * Recursively enumerate all readable files under $srcAbs and append them to $this->fileList.
235     *
236     * @param string $srcAbs     absolute filesystem path (file or directory)
237     * @param string $archiveRel path prefix to use inside the archive
238     */
239    protected function walkInto(string $srcAbs, string $archiveRel): void
240    {
241        if (!file_exists($srcAbs)) return;
242
243        if (is_file($srcAbs)) {
244            $this->appendFile($srcAbs, $archiveRel);
245            return;
246        }
247
248        try {
249            $it = new RecursiveIteratorIterator(
250                new RecursiveDirectoryIterator(
251                    $srcAbs,
252                    FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS
253                ),
254                RecursiveIteratorIterator::LEAVES_ONLY
255            );
256        } catch (Exception $e) {
257            return;
258        }
259
260        $srcRoot = rtrim($srcAbs, '/');
261        $rootLen = strlen($srcRoot) + 1;
262        foreach ($it as $info) {
263            try {
264                if (!$info->isFile() || !$info->isReadable()) continue;
265                $abs = $info->getPathname();
266                $rel = str_replace('\\', '/', substr($abs, $rootLen));
267
268                if ($this->isIgnored($archiveRel, $rel)) continue;
269
270                $this->appendFile($abs, $archiveRel . '/' . $rel);
271            } catch (Exception $e) {
272                continue;
273            }
274        }
275    }
276
277    /**
278     * Return true if a file should be excluded from the archive.
279     * Hardcoded (no config) to keep the plugin small.
280     *
281     * @param string $archiveRel top-level archive branch, e.g. "conf" or "lib/plugins"
282     * @param string $rel        path within that branch
283     * @return bool
284     */
285    protected function isIgnored(string $archiveRel, string $rel): bool
286    {
287        $base = basename($rel);
288
289        // Universal noise.
290        if ($base === '_dummy') return true;
291        if ($base === '.DS_Store') return true;
292        if ($base === 'Thumbs.db') return true;
293
294        // Belt-and-suspenders: never include our own scratch files even if
295        // someone pointed savedir at an unusual location.
296        if (str_starts_with($base, self::TMP_PREFIX)) return true;
297
298        // Skip VCS metadata anywhere in any branch. Local clones / checkouts
299        // can be huge and aren't part of "live" state.
300        $segments = explode('/', $rel);
301        foreach ($segments as $seg) {
302            if ($seg === '.git') return true;
303            if ($seg === '.svn') return true;
304            if ($seg === '.hg') return true;
305        }
306
307        // conf/ branch: drop *.dist / *.example / *.bak sample files. They're
308        // shipped with DokuWiki and templates, not real configuration.
309        if ($archiveRel === 'conf') {
310            if (preg_match('/\.(dist|example|bak)$/i', $base)) return true;
311        }
312
313        return false;
314    }
315
316    /**
317     * Append a single file entry to the file list.
318     *
319     * @param string $abs        absolute filesystem path
320     * @param string $archiveRel path inside the archive
321     */
322    protected function appendFile(string $abs, string $archiveRel): void
323    {
324        $size = filesize($abs);
325        if ($size === false) $size = 0;
326        $this->fileList[] = [$abs, $archiveRel, $size];
327        $this->totalBytes += $size;
328    }
329
330    /* ----------------------------------------------------------------- *
331     *  Preview
332     * ----------------------------------------------------------------- */
333
334    /**
335     * Render a summary table grouping files by top-level archive section.
336     */
337    protected function renderPreview(): void
338    {
339        echo '<h2>Preview</h2>';
340        echo '<p>' . count($this->fileList) . ' files, '
341            . hsc($this->humanBytes($this->totalBytes)) . ' uncompressed.</p>';
342
343        $perRoot = [];
344        foreach ($this->fileList as [$abs, $rel, $size]) {
345            $parts = explode('/', $rel, 4);
346            $top = isset($parts[1]) ? ($parts[0] . '/' . $parts[1]) : $parts[0];
347            if (!isset($perRoot[$top])) $perRoot[$top] = ['count' => 0, 'bytes' => 0];
348            $perRoot[$top]['count']++;
349            $perRoot[$top]['bytes'] += $size;
350        }
351        ksort($perRoot);
352
353        echo '<table class="inline"><thead><tr><th>Section</th><th style="text-align:right;">Files</th><th style="text-align:right;">Size</th></tr></thead><tbody>';
354        foreach ($perRoot as $section => $stats) {
355            echo '<tr><td><code>' . hsc($section) . '</code></td>'
356                . '<td style="text-align:right;">' . (int)$stats['count'] . '</td>'
357                . '<td style="text-align:right;">' . hsc($this->humanBytes($stats['bytes'])) . '</td></tr>';
358        }
359        echo '</tbody></table>';
360        echo '<p>Click <em>Download tar.gz</em> above to create and download the archive '
361            . '(compressed size will typically be smaller).</p>';
362    }
363
364    /**
365     * Format a byte count as a human-readable string (B, KiB, MiB, GiB, TiB).
366     *
367     * @param int $bytes
368     * @return string
369     */
370    protected function humanBytes(int $bytes): string
371    {
372        $units = ['B', 'KiB', 'MiB', 'GiB', 'TiB'];
373        $i = 0;
374        $n = (float)$bytes;
375        while ($n >= 1024 && $i < count($units) - 1) {
376            $n /= 1024;
377            $i++;
378        }
379        return sprintf($i === 0 ? '%d %s' : '%.2f %s', $n, $units[$i]);
380    }
381
382    /* ----------------------------------------------------------------- *
383     *  Archive creation + streaming
384     * ----------------------------------------------------------------- */
385
386    /**
387     * Build the archive in data/tmp/, stream it to the browser as a tar.gz download,
388     * and exit. Returns without exiting only when an error prevents streaming, so the
389     * caller can fall through to html() and display the form again.
390     */
391    protected function streamArchive(): void
392    {
393        global $conf, $INPUT;
394
395        // Defense-in-depth: AdminPlugin framework should have blocked non-admins
396        // before we got here, but verify directly anyway.
397        if (!auth_isadmin()) {
398            msg('Site Backup: admin access required.', -1);
399            return;
400        }
401
402        if (!$this->fileList) {
403            msg('Site Backup: nothing selected.', -1);
404            return;
405        }
406
407        set_time_limit(0);
408        ignore_user_abort(true);
409        ini_set('memory_limit', '256M');
410
411        $tmpDir = $conf['tmpdir'];
412        if (!is_dir($tmpDir) || !is_writable($tmpDir)) {
413            msg('Site Backup: temp directory is not writable: ' . hsc($tmpDir), -1);
414            return;
415        }
416
417        // Build a hard-to-guess filename. 16 hex chars = 64 bits of entropy from
418        // a CSPRNG. The file also lives under data/.htaccess deny-all so even a
419        // guess wouldn't be enough.
420        $host = $INPUT->server->str('HTTP_HOST', 'wiki');
421        $host = preg_replace('/[^a-zA-Z0-9._-]+/', '_', $host);
422        $stamp = date('Ymd-His');
423        $archiveDir = $host . '-backup-' . $stamp;             // dir inside the tar
424        $downloadName = $archiveDir . '.tar.gz';               // browser filename
425        $tmpFile = $tmpDir . '/' . self::TMP_PREFIX . bin2hex(random_bytes(8)) . '.tar.gz';
426
427        // Guarantee the temp file is deleted even on connection abort, fatal
428        // error, or `exit` from within the streaming loop.
429        register_shutdown_function(function () use ($tmpFile) {
430            if (is_file($tmpFile)) unlink($tmpFile);
431        });
432
433        $oldUmask = umask(0077);
434
435        try {
436            $tar = new Tar();
437            $tar->setCompression(6, Archive::COMPRESS_GZIP);
438            $tar->create($tmpFile);
439
440            // Belt-and-suspenders: explicitly chmod once created, in case the
441            // umask wasn't honored (some filesystems / wrappers ignore it).
442            chmod($tmpFile, 0600);
443
444            foreach ($this->fileList as [$abs, $rel, $size]) {
445                try {
446                    $tar->addFile($abs, $archiveDir . '/' . $rel);
447                } catch (Exception $e) {
448                    // Skip individual broken files rather than failing the whole backup.
449                    continue;
450                }
451            }
452            $tar->close();
453        } catch (ArchiveIOException $e) {
454            umask($oldUmask);
455            if (is_file($tmpFile)) unlink($tmpFile);
456            msg('Site Backup: could not create archive: ' . hsc($e->getMessage()), -1);
457            return;
458        }
459
460        umask($oldUmask);
461
462        if (!is_file($tmpFile) || filesize($tmpFile) === 0) {
463            if (is_file($tmpFile)) unlink($tmpFile);
464            msg('Site Backup: archive was empty or could not be written.', -1);
465            return;
466        }
467
468        $size = filesize($tmpFile);
469
470        // Clear any output buffering DokuWiki / extensions may have started so
471        // headers + binary body go out cleanly.
472        while (ob_get_level() > 0) {
473            ob_end_clean();
474        }
475
476        header('Content-Type: application/gzip');
477        header('Content-Disposition: attachment; filename="' . $downloadName . '"');
478        header('Content-Length: ' . $size);
479        header('Cache-Control: no-store, no-cache, must-revalidate, private');
480        header('Pragma: no-cache');
481        header('X-Content-Type-Options: nosniff');
482
483        $fp = fopen($tmpFile, 'rb');
484        if ($fp) {
485            while (!feof($fp)) {
486                $chunk = fread($fp, 1024 * 256);
487                if ($chunk === false) break;
488                echo $chunk;
489                flush();
490            }
491            fclose($fp);
492        }
493        unlink($tmpFile);
494        exit;
495    }
496
497    /**
498     * Remove leftover temp archives from prior runs that died before unlink.
499     * Anything matching our prefix older than TMP_STALE_AGE is fair game.
500     */
501    protected function sweepStaleTempFiles(): void
502    {
503        global $conf;
504        $tmpDir = $conf['tmpdir'] ?? null;
505        if (!$tmpDir || !is_dir($tmpDir)) return;
506
507        $cutoff = time() - self::TMP_STALE_AGE;
508        $pattern = $tmpDir . '/' . self::TMP_PREFIX . '*';
509        foreach ((array) glob($pattern) as $stale) {
510            if (!is_file($stale)) continue;
511            $mtime = filemtime($stale);
512            if ($mtime !== false && $mtime < $cutoff) {
513                unlink($stale);
514            }
515        }
516    }
517}
518