sweepStaleTempFiles();
if (!$INPUT->has('sitebackup_action')) return;
if (!checkSecurityToken()) return;
$action = $INPUT->str('sitebackup_action');
if ($action !== 'preview' && $action !== 'download') return;
// Download MUST be POST. Refuse GET / HEAD / etc. so a stray link, browser
// prefetch, or curious co-admin pasting a URL can't trigger a backup.
if ($action === 'download' && $INPUT->server->str('REQUEST_METHOD', 'GET') !== 'POST') {
msg('Site Backup: download must be submitted via POST.', -1);
return;
}
$this->collectFiles();
if ($action === 'download') {
$this->streamArchive();
// streamArchive() exits on success. If it returns, an error was shown
// via msg() and we fall through to html() so the user sees the form.
}
}
/**
* Render the admin page: intro, form, and (if $fileList is populated) preview table.
*/
public function html(): void
{
echo '
Site Backup
';
echo 'Select what to include, click Preview to see the file list and total size, '
. 'then Download tar.gz to receive the archive in your browser.
';
echo ''
. 'Sensitive content warning. The archive may contain password hashes '
. '(conf/users.auth.php), ACL rules, and any secrets stored in '
. 'conf/local.php (DB credentials, SMTP passwords, API keys). '
. 'Treat the download like a credential.'
. '
';
$this->renderForm();
if ($this->fileList) {
$this->renderPreview();
}
}
/* ----------------------------------------------------------------- *
* Form
* ----------------------------------------------------------------- */
/**
* Render the selection form with checkboxes for each backup section.
*/
protected function renderForm(): void
{
global $INPUT;
$hasSubmitted = $INPUT->has('sitebackup_action');
$defaults = [
'pages' => true,
'media' => true,
'meta' => true,
'media_meta' => true,
'attic' => false,
'media_attic' => false,
'index' => false,
'conf' => true,
'plugins' => true,
'tpl' => true,
];
$sel = [];
foreach ($defaults as $k => $def) {
$sel[$k] = $hasSubmitted ? $INPUT->bool('sb_' . $k, false) : $def;
}
$form = new Form(['method' => 'POST', 'id' => 'sitebackup_form']);
$form->setHiddenField('do', 'admin');
$form->setHiddenField('page', 'sitebackup');
$style = 'text-align: left; padding: 0 1em .5em 1em; margin: 1em 0;';
$form->addFieldsetOpen('Wiki content')->attr('style', $style);
$this->addCheckboxRow($form, 'sb_pages', 'Pages (data/pages)', $sel['pages']);
$this->addCheckboxRow($form, 'sb_media', 'Media files (data/media)', $sel['media']);
$this->addCheckboxRow($form, 'sb_meta', 'Page metadata (data/meta)', $sel['meta']);
$this->addCheckboxRow($form, 'sb_media_meta', 'Media metadata (data/media_meta)', $sel['media_meta']);
$this->addCheckboxRow($form, 'sb_attic', 'Page revisions (data/attic) - can be large', $sel['attic']);
$this->addCheckboxRow($form, 'sb_media_attic', 'Media revisions (data/media_attic)', $sel['media_attic']);
$this->addCheckboxRow($form, 'sb_index', 'Search index (data/index) - rebuildable', $sel['index']);
$form->addFieldsetClose();
$form->addFieldsetOpen('Configuration & code')->attr('style', $style);
$this->addCheckboxRow($form, 'sb_conf', 'Configuration (conf/) - includes secrets', $sel['conf']);
$this->addCheckboxRow($form, 'sb_plugins', 'Plugins source (lib/plugins/)', $sel['plugins']);
$this->addCheckboxRow($form, 'sb_tpl', 'Templates source (lib/tpl/)', $sel['tpl']);
$form->addFieldsetClose();
$form->addTagOpen('p');
$form->addButton('sitebackup_action', 'Preview')->val('preview');
$form->addHTML(' ');
$form->addButton('sitebackup_action', 'Download tar.gz')->val('download');
$form->addTagClose('p');
echo $form->toHTML();
}
/**
* Add a labelled checkbox row to the form.
*
* @param Form $form
* @param string $name field name
* @param string $label display label
* @param bool $checked whether the checkbox is pre-checked
*/
protected function addCheckboxRow(Form $form, string $name, string $label, bool $checked): void
{
$form->addTagOpen('div')->attr('style', 'margin:.4em 0;');
$cb = $form->addCheckbox($name, ' ' . $label);
$cb->val('1');
if ($checked) $cb->attr('checked', 'checked');
$form->addTagClose('div');
}
/* ----------------------------------------------------------------- *
* File collection
* ----------------------------------------------------------------- */
/**
* Build $this->fileList from the selected checkboxes in the current request.
*/
protected function collectFiles(): void
{
global $INPUT, $conf;
// Use $conf[...] for the data dirs so relocated savedir installs still work.
$roots = [
'sb_pages' => [$conf['datadir'], 'data/pages'],
'sb_media' => [$conf['mediadir'], 'data/media'],
'sb_meta' => [$conf['metadir'], 'data/meta'],
'sb_media_meta' => [$conf['mediametadir'], 'data/media_meta'],
'sb_attic' => [$conf['olddir'], 'data/attic'],
'sb_media_attic' => [$conf['mediaolddir'], 'data/media_attic'],
'sb_index' => [$conf['indexdir'], 'data/index'],
'sb_conf' => [rtrim(DOKU_CONF, '/'), 'conf'],
'sb_plugins' => [rtrim(DOKU_PLUGIN, '/'), 'lib/plugins'],
'sb_tpl' => [DOKU_INC . 'lib/tpl', 'lib/tpl'],
];
foreach ($roots as $field => $pair) {
if (!$INPUT->bool($field, false)) continue;
[$srcAbs, $archiveRel] = $pair;
$this->walkInto($srcAbs, $archiveRel);
}
}
/**
* Recursively enumerate all readable files under $srcAbs and append them to $this->fileList.
*
* @param string $srcAbs absolute filesystem path (file or directory)
* @param string $archiveRel path prefix to use inside the archive
*/
protected function walkInto(string $srcAbs, string $archiveRel): void
{
if (!file_exists($srcAbs)) return;
if (is_file($srcAbs)) {
$this->appendFile($srcAbs, $archiveRel);
return;
}
try {
$it = new RecursiveIteratorIterator(
new RecursiveDirectoryIterator(
$srcAbs,
FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS
),
RecursiveIteratorIterator::LEAVES_ONLY
);
} catch (Exception $e) {
return;
}
$srcRoot = rtrim($srcAbs, '/');
$rootLen = strlen($srcRoot) + 1;
foreach ($it as $info) {
try {
if (!$info->isFile() || !$info->isReadable()) continue;
$abs = $info->getPathname();
$rel = str_replace('\\', '/', substr($abs, $rootLen));
if ($this->isIgnored($archiveRel, $rel)) continue;
$this->appendFile($abs, $archiveRel . '/' . $rel);
} catch (Exception $e) {
continue;
}
}
}
/**
* Return true if a file should be excluded from the archive.
* Hardcoded (no config) to keep the plugin small.
*
* @param string $archiveRel top-level archive branch, e.g. "conf" or "lib/plugins"
* @param string $rel path within that branch
* @return bool
*/
protected function isIgnored(string $archiveRel, string $rel): bool
{
$base = basename($rel);
// Universal noise.
if ($base === '_dummy') return true;
if ($base === '.DS_Store') return true;
if ($base === 'Thumbs.db') return true;
// Belt-and-suspenders: never include our own scratch files even if
// someone pointed savedir at an unusual location.
if (str_starts_with($base, self::TMP_PREFIX)) return true;
// Skip VCS metadata anywhere in any branch. Local clones / checkouts
// can be huge and aren't part of "live" state.
$segments = explode('/', $rel);
foreach ($segments as $seg) {
if ($seg === '.git') return true;
if ($seg === '.svn') return true;
if ($seg === '.hg') return true;
}
// conf/ branch: drop *.dist / *.example / *.bak sample files. They're
// shipped with DokuWiki and templates, not real configuration.
if ($archiveRel === 'conf') {
if (preg_match('/\.(dist|example|bak)$/i', $base)) return true;
}
return false;
}
/**
* Append a single file entry to the file list.
*
* @param string $abs absolute filesystem path
* @param string $archiveRel path inside the archive
*/
protected function appendFile(string $abs, string $archiveRel): void
{
$size = filesize($abs);
if ($size === false) $size = 0;
$this->fileList[] = [$abs, $archiveRel, $size];
$this->totalBytes += $size;
}
/* ----------------------------------------------------------------- *
* Preview
* ----------------------------------------------------------------- */
/**
* Render a summary table grouping files by top-level archive section.
*/
protected function renderPreview(): void
{
echo 'Preview
';
echo '' . count($this->fileList) . ' files, '
. hsc($this->humanBytes($this->totalBytes)) . ' uncompressed.
';
$perRoot = [];
foreach ($this->fileList as [$abs, $rel, $size]) {
$parts = explode('/', $rel, 4);
$top = isset($parts[1]) ? ($parts[0] . '/' . $parts[1]) : $parts[0];
if (!isset($perRoot[$top])) $perRoot[$top] = ['count' => 0, 'bytes' => 0];
$perRoot[$top]['count']++;
$perRoot[$top]['bytes'] += $size;
}
ksort($perRoot);
echo '| Section | Files | Size |
';
foreach ($perRoot as $section => $stats) {
echo '' . hsc($section) . ' | '
. '' . (int)$stats['count'] . ' | '
. '' . hsc($this->humanBytes($stats['bytes'])) . ' |
';
}
echo '
';
echo 'Click Download tar.gz above to create and download the archive '
. '(compressed size will typically be smaller).
';
}
/**
* Format a byte count as a human-readable string (B, KiB, MiB, GiB, TiB).
*
* @param int $bytes
* @return string
*/
protected function humanBytes(int $bytes): string
{
$units = ['B', 'KiB', 'MiB', 'GiB', 'TiB'];
$i = 0;
$n = (float)$bytes;
while ($n >= 1024 && $i < count($units) - 1) {
$n /= 1024;
$i++;
}
return sprintf($i === 0 ? '%d %s' : '%.2f %s', $n, $units[$i]);
}
/* ----------------------------------------------------------------- *
* Archive creation + streaming
* ----------------------------------------------------------------- */
/**
* Build the archive in data/tmp/, stream it to the browser as a tar.gz download,
* and exit. Returns without exiting only when an error prevents streaming, so the
* caller can fall through to html() and display the form again.
*/
protected function streamArchive(): void
{
global $conf, $INPUT;
// Defense-in-depth: AdminPlugin framework should have blocked non-admins
// before we got here, but verify directly anyway.
if (!auth_isadmin()) {
msg('Site Backup: admin access required.', -1);
return;
}
if (!$this->fileList) {
msg('Site Backup: nothing selected.', -1);
return;
}
set_time_limit(0);
ignore_user_abort(true);
ini_set('memory_limit', '256M');
$tmpDir = $conf['tmpdir'];
if (!is_dir($tmpDir) || !is_writable($tmpDir)) {
msg('Site Backup: temp directory is not writable: ' . hsc($tmpDir), -1);
return;
}
// Build a hard-to-guess filename. 16 hex chars = 64 bits of entropy from
// a CSPRNG. The file also lives under data/.htaccess deny-all so even a
// guess wouldn't be enough.
$host = $INPUT->server->str('HTTP_HOST', 'wiki');
$host = preg_replace('/[^a-zA-Z0-9._-]+/', '_', $host);
$stamp = date('Ymd-His');
$archiveDir = $host . '-backup-' . $stamp; // dir inside the tar
$downloadName = $archiveDir . '.tar.gz'; // browser filename
$tmpFile = $tmpDir . '/' . self::TMP_PREFIX . bin2hex(random_bytes(8)) . '.tar.gz';
// Guarantee the temp file is deleted even on connection abort, fatal
// error, or `exit` from within the streaming loop.
register_shutdown_function(function () use ($tmpFile) {
if (is_file($tmpFile)) unlink($tmpFile);
});
$oldUmask = umask(0077);
try {
$tar = new Tar();
$tar->setCompression(6, Archive::COMPRESS_GZIP);
$tar->create($tmpFile);
// Belt-and-suspenders: explicitly chmod once created, in case the
// umask wasn't honored (some filesystems / wrappers ignore it).
chmod($tmpFile, 0600);
foreach ($this->fileList as [$abs, $rel, $size]) {
try {
$tar->addFile($abs, $archiveDir . '/' . $rel);
} catch (Exception $e) {
// Skip individual broken files rather than failing the whole backup.
continue;
}
}
$tar->close();
} catch (ArchiveIOException $e) {
umask($oldUmask);
if (is_file($tmpFile)) unlink($tmpFile);
msg('Site Backup: could not create archive: ' . hsc($e->getMessage()), -1);
return;
}
umask($oldUmask);
if (!is_file($tmpFile) || filesize($tmpFile) === 0) {
if (is_file($tmpFile)) unlink($tmpFile);
msg('Site Backup: archive was empty or could not be written.', -1);
return;
}
$size = filesize($tmpFile);
// Clear any output buffering DokuWiki / extensions may have started so
// headers + binary body go out cleanly.
while (ob_get_level() > 0) {
ob_end_clean();
}
header('Content-Type: application/gzip');
header('Content-Disposition: attachment; filename="' . $downloadName . '"');
header('Content-Length: ' . $size);
header('Cache-Control: no-store, no-cache, must-revalidate, private');
header('Pragma: no-cache');
header('X-Content-Type-Options: nosniff');
$fp = fopen($tmpFile, 'rb');
if ($fp) {
while (!feof($fp)) {
$chunk = fread($fp, 1024 * 256);
if ($chunk === false) break;
echo $chunk;
flush();
}
fclose($fp);
}
unlink($tmpFile);
exit;
}
/**
* Remove leftover temp archives from prior runs that died before unlink.
* Anything matching our prefix older than TMP_STALE_AGE is fair game.
*/
protected function sweepStaleTempFiles(): void
{
global $conf;
$tmpDir = $conf['tmpdir'] ?? null;
if (!$tmpDir || !is_dir($tmpDir)) return;
$cutoff = time() - self::TMP_STALE_AGE;
$pattern = $tmpDir . '/' . self::TMP_PREFIX . '*';
foreach ((array) glob($pattern) as $stale) {
if (!is_file($stale)) continue;
$mtime = filemtime($stale);
if ($mtime !== false && $mtime < $cutoff) {
unlink($stale);
}
}
}
}