xref: /plugin/sitebackup/admin.php (revision 723bf90e42cc8fe42f2b649e44ffd6864ce3b9e4)
1<?php
2/**
3 * Site Backup admin plugin for DokuWiki.
4 *
5 * Streams a tar.gz of selected wiki parts (pages, media, conf, lib/plugins, lib/tpl)
6 * to the admin's browser. The archive is built in data/tmp/ with a random filename,
7 * streamed out, and deleted immediately. Nothing persists on the server.
8 *
9 * Security model:
10 *  - Admin-only: DokuWiki's AdminPlugin framework enforces auth_isadmin() before
11 *    handle()/html() are invoked because forAdminOnly() returns true. A second
12 *    explicit check inside streamArchive() guards against any framework bypass.
13 *  - The temp archive lives in $conf['tmpdir'] (data/tmp/), which DokuWiki ships
14 *    with a deny-all .htaccess; it cannot be fetched directly even if the path
15 *    were known.
16 *  - Filename uses 64 bits of CSPRNG randomness, file is chmod'd to 0600, and is
17 *    deleted both at the natural end of streamArchive() and via a shutdown
18 *    function in case the connection is aborted partway.
19 *  - Stale temp files from previous runs (older than 1 hour) are swept on each
20 *    invocation, so even a crash-during-stream leaves nothing for long.
21 *
22 * Treat downloaded archives as credentials: they may include conf/users.auth.php
23 * (password hashes), ACL rules, and any secrets stored in conf/local.php.
24 */
25
26use dokuwiki\Extension\AdminPlugin;
27use dokuwiki\Form\Form;
28use splitbrain\PHPArchive\Archive;
29use splitbrain\PHPArchive\ArchiveIOException;
30
31// PatchedTar fixes splitbrain/php-archive PR #38 (mtime bug) for the version
32// of the library vendored with DokuWiki Librarian.
33require_once __DIR__ . '/PatchedTar.php';
34use dokuwiki\plugin\sitebackup\PatchedTar as Tar;
35
36class admin_plugin_sitebackup extends AdminPlugin
37{
38    /** Prefix used for the temp archive filename in data/tmp/. */
39    const TMP_PREFIX = 'sitebackup_tmp_';
40
41    /** Max age (seconds) of leftover temp files before sweep removes them. */
42    const TMP_STALE_AGE = 3600;
43
44    /** @var array list of [absolute path, archive-relative path, size] of files to include */
45    protected $fileList = [];
46
47    /** @var int total uncompressed size of selected files */
48    protected $totalBytes = 0;
49
50    public function forAdminOnly()
51    {
52        return true;
53    }
54
55    public function getMenuSort()
56    {
57        return 1000;
58    }
59
60    public function getMenuText($language)
61    {
62        return 'Site Backup';
63    }
64
65    /**
66     * Dispatch based on the submitted action.
67     * Valid actions: "preview" (build file list, render summary table),
68     *                "download" (build archive, stream as tar.gz).
69     */
70    public function handle()
71    {
72        global $INPUT;
73
74        // Sweep stale temp files from previous runs every time we enter the page.
75        $this->sweepStaleTempFiles();
76
77        if (!$INPUT->has('sitebackup_action')) return;
78        if (!checkSecurityToken()) return;
79
80        $action = $INPUT->str('sitebackup_action');
81        if ($action !== 'preview' && $action !== 'download') return;
82
83        // Download MUST be POST. Refuse GET / HEAD / etc. so a stray link, browser
84        // prefetch, or curious co-admin pasting a URL can't trigger a backup.
85        if ($action === 'download' && ($_SERVER['REQUEST_METHOD'] ?? 'GET') !== 'POST') {
86            msg('Site Backup: download must be submitted via POST.', -1);
87            return;
88        }
89
90        $this->collectFiles();
91
92        if ($action === 'download') {
93            $this->streamArchive();
94            // streamArchive() exits on success. If it returns, an error was shown
95            // via msg() and we fall through to html() so the user sees the form.
96        }
97    }
98
99    public function html()
100    {
101        echo '<h1>Site Backup</h1>';
102        echo '<p>Select what to include, click <em>Preview</em> to see the file list and total size, '
103            . 'then <em>Download tar.gz</em> to receive the archive in your browser.</p>';
104        echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">'
105            . '<strong>Sensitive content warning.</strong> The archive may contain password hashes '
106            . '(<code>conf/users.auth.php</code>), ACL rules, and any secrets stored in '
107            . '<code>conf/local.php</code> (DB credentials, SMTP passwords, API keys). '
108            . 'Treat the download like a credential.'
109            . '</p>';
110
111        $this->renderForm();
112
113        if ($this->fileList) {
114            $this->renderPreview();
115        }
116    }
117
118    /* ----------------------------------------------------------------- *
119     *  Form
120     * ----------------------------------------------------------------- */
121
122    protected function renderForm()
123    {
124        global $INPUT;
125
126        $hasSubmitted = $INPUT->has('sitebackup_action');
127        $defaults = [
128            'pages'       => true,
129            'media'       => true,
130            'meta'        => true,
131            'media_meta'  => true,
132            'attic'       => false,
133            'media_attic' => false,
134            'index'       => false,
135            'conf'        => true,
136            'plugins'     => true,
137            'tpl'         => true,
138        ];
139        $sel = [];
140        foreach ($defaults as $k => $def) {
141            $sel[$k] = $hasSubmitted ? $INPUT->bool('sb_' . $k, false) : $def;
142        }
143
144        $form = new Form(['method' => 'POST', 'id' => 'sitebackup_form']);
145        $form->setHiddenField('do', 'admin');
146        $form->setHiddenField('page', 'sitebackup');
147
148        $style = 'text-align: left; padding: 0 1em .5em 1em; margin: 1em 0;';
149
150        $form->addFieldsetOpen('Wiki content')->attr('style', $style);
151        $this->addCheckboxRow($form, 'sb_pages',       'Pages (data/pages)',                          $sel['pages']);
152        $this->addCheckboxRow($form, 'sb_media',       'Media files (data/media)',                    $sel['media']);
153        $this->addCheckboxRow($form, 'sb_meta',        'Page metadata (data/meta)',                   $sel['meta']);
154        $this->addCheckboxRow($form, 'sb_media_meta',  'Media metadata (data/media_meta)',            $sel['media_meta']);
155        $this->addCheckboxRow($form, 'sb_attic',       'Page revisions (data/attic) - can be large',  $sel['attic']);
156        $this->addCheckboxRow($form, 'sb_media_attic', 'Media revisions (data/media_attic)',          $sel['media_attic']);
157        $this->addCheckboxRow($form, 'sb_index',       'Search index (data/index) - rebuildable',     $sel['index']);
158        $form->addFieldsetClose();
159
160        $form->addFieldsetOpen('Configuration & code')->attr('style', $style);
161        $this->addCheckboxRow($form, 'sb_conf',    'Configuration (conf/) - includes secrets',  $sel['conf']);
162        $this->addCheckboxRow($form, 'sb_plugins', 'Plugins source (lib/plugins/)',             $sel['plugins']);
163        $this->addCheckboxRow($form, 'sb_tpl',     'Templates source (lib/tpl/)',               $sel['tpl']);
164        $form->addFieldsetClose();
165
166        $form->addTagOpen('p');
167        $form->addButton('sitebackup_action', 'Preview')->val('preview');
168        $form->addHTML(' &nbsp;&nbsp; ');
169        $form->addButton('sitebackup_action', 'Download tar.gz')->val('download');
170        $form->addTagClose('p');
171
172        echo $form->toHTML();
173    }
174
175    protected function addCheckboxRow(Form $form, $name, $label, $checked)
176    {
177        $form->addTagOpen('div')->attr('style', 'margin:.4em 0;');
178        $cb = $form->addCheckbox($name, ' ' . $label);
179        $cb->val('1');
180        if ($checked) $cb->attr('checked', 'checked');
181        $form->addTagClose('div');
182    }
183
184    /* ----------------------------------------------------------------- *
185     *  File collection
186     * ----------------------------------------------------------------- */
187
188    protected function collectFiles()
189    {
190        global $INPUT, $conf;
191
192        // Use $conf[...] for the data dirs so relocated savedir installs still work.
193        $roots = [
194            'sb_pages'       => [$conf['datadir'],        'data/pages'],
195            'sb_media'       => [$conf['mediadir'],       'data/media'],
196            'sb_meta'        => [$conf['metadir'],        'data/meta'],
197            'sb_media_meta'  => [$conf['mediametadir'],   'data/media_meta'],
198            'sb_attic'       => [$conf['olddir'],         'data/attic'],
199            'sb_media_attic' => [$conf['mediaolddir'],    'data/media_attic'],
200            'sb_index'       => [$conf['indexdir'],       'data/index'],
201            'sb_conf'        => [rtrim(DOKU_CONF, '/'),   'conf'],
202            'sb_plugins'     => [rtrim(DOKU_PLUGIN, '/'), 'lib/plugins'],
203            'sb_tpl'         => [DOKU_INC . 'lib/tpl',    'lib/tpl'],
204        ];
205
206        foreach ($roots as $field => $pair) {
207            if (!$INPUT->bool($field, false)) continue;
208            [$srcAbs, $archiveRel] = $pair;
209            $this->walkInto($srcAbs, $archiveRel);
210        }
211    }
212
213    protected function walkInto($srcAbs, $archiveRel)
214    {
215        if (!file_exists($srcAbs)) return;
216
217        if (is_file($srcAbs)) {
218            $this->appendFile($srcAbs, $archiveRel);
219            return;
220        }
221
222        try {
223            $it = new RecursiveIteratorIterator(
224                new RecursiveDirectoryIterator(
225                    $srcAbs,
226                    FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS
227                ),
228                RecursiveIteratorIterator::LEAVES_ONLY
229            );
230        } catch (Exception $e) {
231            return;
232        }
233
234        $srcRoot = rtrim($srcAbs, '/');
235        $rootLen = strlen($srcRoot) + 1;
236        foreach ($it as $info) {
237            try {
238                if (!$info->isFile() || !$info->isReadable()) continue;
239                $abs = $info->getPathname();
240                $rel = str_replace('\\', '/', substr($abs, $rootLen));
241
242                if ($this->isIgnored($archiveRel, $rel)) continue;
243
244                $this->appendFile($abs, $archiveRel . '/' . $rel);
245            } catch (Exception $e) {
246                continue;
247            }
248        }
249    }
250
251    /**
252     * Filename / path-segment ignores. Hardcoded (no config) to keep the plugin small.
253     *
254     * @param string $archiveRel  e.g. "conf" or "lib/plugins" - the top-level branch
255     * @param string $rel         path within that branch
256     */
257    protected function isIgnored($archiveRel, $rel)
258    {
259        $base = basename($rel);
260
261        // Universal noise.
262        if ($base === '_dummy') return true;
263        if ($base === '.DS_Store') return true;
264        if ($base === 'Thumbs.db') return true;
265
266        // Belt-and-suspenders: never include our own scratch files even if
267        // someone pointed savedir at an unusual location.
268        if (strpos($base, self::TMP_PREFIX) === 0) return true;
269
270        // Skip VCS metadata anywhere in any branch. Local clones / checkouts
271        // can be huge and aren't part of "live" state.
272        $segments = explode('/', $rel);
273        foreach ($segments as $seg) {
274            if ($seg === '.git') return true;
275            if ($seg === '.svn') return true;
276            if ($seg === '.hg') return true;
277        }
278
279        // conf/ branch: drop *.dist / *.example / *.bak sample files. They're
280        // shipped with DokuWiki and templates, not real configuration.
281        if ($archiveRel === 'conf') {
282            if (preg_match('/\.(dist|example|bak)$/i', $base)) return true;
283        }
284
285        return false;
286    }
287
288    protected function appendFile($abs, $archiveRel)
289    {
290        $size = @filesize($abs);
291        if ($size === false) $size = 0;
292        $this->fileList[] = [$abs, $archiveRel, $size];
293        $this->totalBytes += $size;
294    }
295
296    /* ----------------------------------------------------------------- *
297     *  Preview
298     * ----------------------------------------------------------------- */
299
300    protected function renderPreview()
301    {
302        echo '<h2>Preview</h2>';
303        echo '<p>' . count($this->fileList) . ' files, '
304            . hsc($this->humanBytes($this->totalBytes)) . ' uncompressed.</p>';
305
306        $perRoot = [];
307        foreach ($this->fileList as [$abs, $rel, $size]) {
308            $parts = explode('/', $rel, 4);
309            $top = isset($parts[1]) ? ($parts[0] . '/' . $parts[1]) : $parts[0];
310            if (!isset($perRoot[$top])) $perRoot[$top] = ['count' => 0, 'bytes' => 0];
311            $perRoot[$top]['count']++;
312            $perRoot[$top]['bytes'] += $size;
313        }
314        ksort($perRoot);
315
316        echo '<table class="inline"><thead><tr><th>Section</th><th style="text-align:right;">Files</th><th style="text-align:right;">Size</th></tr></thead><tbody>';
317        foreach ($perRoot as $section => $stats) {
318            echo '<tr><td><code>' . hsc($section) . '</code></td>'
319                . '<td style="text-align:right;">' . (int)$stats['count'] . '</td>'
320                . '<td style="text-align:right;">' . hsc($this->humanBytes($stats['bytes'])) . '</td></tr>';
321        }
322        echo '</tbody></table>';
323        echo '<p>Click <em>Download tar.gz</em> above to create and download the archive '
324            . '(compressed size will typically be smaller).</p>';
325    }
326
327    protected function humanBytes($bytes)
328    {
329        $units = ['B', 'KiB', 'MiB', 'GiB', 'TiB'];
330        $i = 0;
331        $n = (float)$bytes;
332        while ($n >= 1024 && $i < count($units) - 1) {
333            $n /= 1024;
334            $i++;
335        }
336        return sprintf($i === 0 ? '%d %s' : '%.2f %s', $n, $units[$i]);
337    }
338
339    /* ----------------------------------------------------------------- *
340     *  Archive creation + streaming
341     * ----------------------------------------------------------------- */
342
343    protected function streamArchive()
344    {
345        global $conf;
346
347        // Defense-in-depth: AdminPlugin framework should have blocked non-admins
348        // before we got here, but verify directly anyway.
349        if (!auth_isadmin()) {
350            msg('Site Backup: admin access required.', -1);
351            return;
352        }
353
354        if (!$this->fileList) {
355            msg('Site Backup: nothing selected.', -1);
356            return;
357        }
358
359        @set_time_limit(0);
360        @ignore_user_abort(true);
361        @ini_set('memory_limit', '256M');
362
363        $tmpDir = $conf['tmpdir'];
364        if (!is_dir($tmpDir) || !is_writable($tmpDir)) {
365            msg('Site Backup: temp directory is not writable: ' . hsc($tmpDir), -1);
366            return;
367        }
368
369        // Build a hard-to-guess filename. 16 hex chars = 64 bits of entropy from
370        // a CSPRNG. The file also lives under data/.htaccess deny-all so even a
371        // guess wouldn't be enough.
372        $host = $_SERVER['HTTP_HOST'] ?? 'wiki';
373        $host = preg_replace('/[^a-zA-Z0-9._-]+/', '_', $host);
374        $stamp = date('Ymd-His');
375        $archiveDir = $host . '-backup-' . $stamp;             // dir inside the tar
376        $downloadName = $archiveDir . '.tar.gz';               // browser filename
377        $tmpFile = $tmpDir . '/' . self::TMP_PREFIX . bin2hex(random_bytes(8)) . '.tar.gz';
378
379        // Guarantee the temp file is deleted even on connection abort, fatal
380        // error, or `exit` from within the streaming loop.
381        register_shutdown_function(function () use ($tmpFile) {
382            if (is_file($tmpFile)) @unlink($tmpFile);
383        });
384
385        $oldUmask = @umask(0077);
386
387        try {
388            $tar = new Tar();
389            $tar->setCompression(6, Archive::COMPRESS_GZIP);
390            $tar->create($tmpFile);
391
392            // Belt-and-suspenders: explicitly chmod once created, in case the
393            // umask wasn't honored (some filesystems / wrappers ignore it).
394            @chmod($tmpFile, 0600);
395
396            foreach ($this->fileList as [$abs, $rel, $size]) {
397                try {
398                    $tar->addFile($abs, $archiveDir . '/' . $rel);
399                } catch (Exception $e) {
400                    // Skip individual broken files rather than failing the whole backup.
401                    continue;
402                }
403            }
404            $tar->close();
405        } catch (ArchiveIOException $e) {
406            @umask($oldUmask);
407            @unlink($tmpFile);
408            msg('Site Backup: could not create archive: ' . hsc($e->getMessage()), -1);
409            return;
410        }
411
412        @umask($oldUmask);
413
414        if (!is_file($tmpFile) || filesize($tmpFile) === 0) {
415            @unlink($tmpFile);
416            msg('Site Backup: archive was empty or could not be written.', -1);
417            return;
418        }
419
420        $size = filesize($tmpFile);
421
422        // Clear any output buffering DokuWiki / extensions may have started so
423        // headers + binary body go out cleanly.
424        while (ob_get_level() > 0) {
425            @ob_end_clean();
426        }
427
428        header('Content-Type: application/gzip');
429        header('Content-Disposition: attachment; filename="' . $downloadName . '"');
430        header('Content-Length: ' . $size);
431        header('Cache-Control: no-store, no-cache, must-revalidate, private');
432        header('Pragma: no-cache');
433        header('X-Content-Type-Options: nosniff');
434
435        $fp = fopen($tmpFile, 'rb');
436        if ($fp) {
437            while (!feof($fp)) {
438                $chunk = fread($fp, 1024 * 256);
439                if ($chunk === false) break;
440                echo $chunk;
441                @flush();
442            }
443            fclose($fp);
444        }
445        @unlink($tmpFile);
446        exit;
447    }
448
449    /**
450     * Remove leftover temp archives from prior runs that died before unlink.
451     * Anything matching our prefix older than TMP_STALE_AGE is fair game.
452     */
453    protected function sweepStaleTempFiles()
454    {
455        global $conf;
456        $tmpDir = $conf['tmpdir'] ?? null;
457        if (!$tmpDir || !is_dir($tmpDir)) return;
458
459        $cutoff = time() - self::TMP_STALE_AGE;
460        $pattern = $tmpDir . '/' . self::TMP_PREFIX . '*';
461        foreach ((array) @glob($pattern) as $stale) {
462            if (!is_file($stale)) continue;
463            $mtime = @filemtime($stale);
464            if ($mtime !== false && $mtime < $cutoff) {
465                @unlink($stale);
466            }
467        }
468    }
469}
470