1<?php 2/** 3 * Site Backup admin plugin for DokuWiki. 4 * 5 * Streams a tar.gz of selected wiki parts (pages, media, conf, lib/plugins, lib/tpl) 6 * to the admin's browser. The archive is built in data/tmp/ with a random filename, 7 * streamed out, and deleted immediately. Nothing persists on the server. 8 * 9 * Security model: 10 * - Admin-only: DokuWiki's AdminPlugin framework enforces auth_isadmin() before 11 * handle()/html() are invoked because forAdminOnly() returns true. A second 12 * explicit check inside streamArchive() guards against any framework bypass. 13 * - The temp archive lives in $conf['tmpdir'] (data/tmp/), which DokuWiki ships 14 * with a deny-all .htaccess; it cannot be fetched directly even if the path 15 * were known. 16 * - Filename uses 64 bits of CSPRNG randomness, file is chmod'd to 0600, and is 17 * deleted both at the natural end of streamArchive() and via a shutdown 18 * function in case the connection is aborted partway. 19 * - Stale temp files from previous runs (older than 1 hour) are swept on each 20 * invocation, so even a crash-during-stream leaves nothing for long. 21 * 22 * Treat downloaded archives as credentials: they may include conf/users.auth.php 23 * (password hashes), ACL rules, and any secrets stored in conf/local.php. 24 */ 25 26use dokuwiki\Extension\AdminPlugin; 27use dokuwiki\Form\Form; 28use splitbrain\PHPArchive\Archive; 29use splitbrain\PHPArchive\ArchiveIOException; 30 31// PatchedTar fixes splitbrain/php-archive PR #38 (mtime bug) for the version 32// of the library vendored with DokuWiki Librarian. 33require_once __DIR__ . '/PatchedTar.php'; 34use dokuwiki\plugin\sitebackup\PatchedTar as Tar; 35 36class admin_plugin_sitebackup extends AdminPlugin 37{ 38 /** Prefix used for the temp archive filename in data/tmp/. */ 39 const TMP_PREFIX = 'sitebackup_tmp_'; 40 41 /** Max age (seconds) of leftover temp files before sweep removes them. */ 42 const TMP_STALE_AGE = 3600; 43 44 /** @var array list of [absolute path, archive-relative path, size] of files to include */ 45 protected $fileList = []; 46 47 /** @var int total uncompressed size of selected files */ 48 protected $totalBytes = 0; 49 50 public function forAdminOnly() 51 { 52 return true; 53 } 54 55 public function getMenuSort() 56 { 57 return 1000; 58 } 59 60 public function getMenuText($language) 61 { 62 return 'Site Backup'; 63 } 64 65 /** 66 * Dispatch based on the submitted action. 67 * Valid actions: "preview" (build file list, render summary table), 68 * "download" (build archive, stream as tar.gz). 69 */ 70 public function handle() 71 { 72 global $INPUT; 73 74 // Sweep stale temp files from previous runs every time we enter the page. 75 $this->sweepStaleTempFiles(); 76 77 if (!$INPUT->has('sitebackup_action')) return; 78 if (!checkSecurityToken()) return; 79 80 $action = $INPUT->str('sitebackup_action'); 81 if ($action !== 'preview' && $action !== 'download') return; 82 83 // Download MUST be POST. Refuse GET / HEAD / etc. so a stray link, browser 84 // prefetch, or curious co-admin pasting a URL can't trigger a backup. 85 if ($action === 'download' && ($_SERVER['REQUEST_METHOD'] ?? 'GET') !== 'POST') { 86 msg('Site Backup: download must be submitted via POST.', -1); 87 return; 88 } 89 90 $this->collectFiles(); 91 92 if ($action === 'download') { 93 $this->streamArchive(); 94 // streamArchive() exits on success. If it returns, an error was shown 95 // via msg() and we fall through to html() so the user sees the form. 96 } 97 } 98 99 public function html() 100 { 101 echo '<h1>Site Backup</h1>'; 102 echo '<p>Select what to include, click <em>Preview</em> to see the file list and total size, ' 103 . 'then <em>Download tar.gz</em> to receive the archive in your browser.</p>'; 104 echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">' 105 . '<strong>Sensitive content warning.</strong> The archive may contain password hashes ' 106 . '(<code>conf/users.auth.php</code>), ACL rules, and any secrets stored in ' 107 . '<code>conf/local.php</code> (DB credentials, SMTP passwords, API keys). ' 108 . 'Treat the download like a credential.' 109 . '</p>'; 110 111 $this->renderForm(); 112 113 if ($this->fileList) { 114 $this->renderPreview(); 115 } 116 } 117 118 /* ----------------------------------------------------------------- * 119 * Form 120 * ----------------------------------------------------------------- */ 121 122 protected function renderForm() 123 { 124 global $INPUT; 125 126 $hasSubmitted = $INPUT->has('sitebackup_action'); 127 $defaults = [ 128 'pages' => true, 129 'media' => true, 130 'meta' => true, 131 'media_meta' => true, 132 'attic' => false, 133 'media_attic' => false, 134 'index' => false, 135 'conf' => true, 136 'plugins' => true, 137 'tpl' => true, 138 ]; 139 $sel = []; 140 foreach ($defaults as $k => $def) { 141 $sel[$k] = $hasSubmitted ? $INPUT->bool('sb_' . $k, false) : $def; 142 } 143 144 $form = new Form(['method' => 'POST', 'id' => 'sitebackup_form']); 145 $form->setHiddenField('do', 'admin'); 146 $form->setHiddenField('page', 'sitebackup'); 147 148 $style = 'text-align: left; padding: 0 1em .5em 1em; margin: 1em 0;'; 149 150 $form->addFieldsetOpen('Wiki content')->attr('style', $style); 151 $this->addCheckboxRow($form, 'sb_pages', 'Pages (data/pages)', $sel['pages']); 152 $this->addCheckboxRow($form, 'sb_media', 'Media files (data/media)', $sel['media']); 153 $this->addCheckboxRow($form, 'sb_meta', 'Page metadata (data/meta)', $sel['meta']); 154 $this->addCheckboxRow($form, 'sb_media_meta', 'Media metadata (data/media_meta)', $sel['media_meta']); 155 $this->addCheckboxRow($form, 'sb_attic', 'Page revisions (data/attic) - can be large', $sel['attic']); 156 $this->addCheckboxRow($form, 'sb_media_attic', 'Media revisions (data/media_attic)', $sel['media_attic']); 157 $this->addCheckboxRow($form, 'sb_index', 'Search index (data/index) - rebuildable', $sel['index']); 158 $form->addFieldsetClose(); 159 160 $form->addFieldsetOpen('Configuration & code')->attr('style', $style); 161 $this->addCheckboxRow($form, 'sb_conf', 'Configuration (conf/) - includes secrets', $sel['conf']); 162 $this->addCheckboxRow($form, 'sb_plugins', 'Plugins source (lib/plugins/)', $sel['plugins']); 163 $this->addCheckboxRow($form, 'sb_tpl', 'Templates source (lib/tpl/)', $sel['tpl']); 164 $form->addFieldsetClose(); 165 166 $form->addTagOpen('p'); 167 $form->addButton('sitebackup_action', 'Preview')->val('preview'); 168 $form->addHTML(' '); 169 $form->addButton('sitebackup_action', 'Download tar.gz')->val('download'); 170 $form->addTagClose('p'); 171 172 echo $form->toHTML(); 173 } 174 175 protected function addCheckboxRow(Form $form, $name, $label, $checked) 176 { 177 $form->addTagOpen('div')->attr('style', 'margin:.4em 0;'); 178 $cb = $form->addCheckbox($name, ' ' . $label); 179 $cb->val('1'); 180 if ($checked) $cb->attr('checked', 'checked'); 181 $form->addTagClose('div'); 182 } 183 184 /* ----------------------------------------------------------------- * 185 * File collection 186 * ----------------------------------------------------------------- */ 187 188 protected function collectFiles() 189 { 190 global $INPUT, $conf; 191 192 // Use $conf[...] for the data dirs so relocated savedir installs still work. 193 $roots = [ 194 'sb_pages' => [$conf['datadir'], 'data/pages'], 195 'sb_media' => [$conf['mediadir'], 'data/media'], 196 'sb_meta' => [$conf['metadir'], 'data/meta'], 197 'sb_media_meta' => [$conf['mediametadir'], 'data/media_meta'], 198 'sb_attic' => [$conf['olddir'], 'data/attic'], 199 'sb_media_attic' => [$conf['mediaolddir'], 'data/media_attic'], 200 'sb_index' => [$conf['indexdir'], 'data/index'], 201 'sb_conf' => [rtrim(DOKU_CONF, '/'), 'conf'], 202 'sb_plugins' => [rtrim(DOKU_PLUGIN, '/'), 'lib/plugins'], 203 'sb_tpl' => [DOKU_INC . 'lib/tpl', 'lib/tpl'], 204 ]; 205 206 foreach ($roots as $field => $pair) { 207 if (!$INPUT->bool($field, false)) continue; 208 [$srcAbs, $archiveRel] = $pair; 209 $this->walkInto($srcAbs, $archiveRel); 210 } 211 } 212 213 protected function walkInto($srcAbs, $archiveRel) 214 { 215 if (!file_exists($srcAbs)) return; 216 217 if (is_file($srcAbs)) { 218 $this->appendFile($srcAbs, $archiveRel); 219 return; 220 } 221 222 try { 223 $it = new RecursiveIteratorIterator( 224 new RecursiveDirectoryIterator( 225 $srcAbs, 226 FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS 227 ), 228 RecursiveIteratorIterator::LEAVES_ONLY 229 ); 230 } catch (Exception $e) { 231 return; 232 } 233 234 $srcRoot = rtrim($srcAbs, '/'); 235 $rootLen = strlen($srcRoot) + 1; 236 foreach ($it as $info) { 237 try { 238 if (!$info->isFile() || !$info->isReadable()) continue; 239 $abs = $info->getPathname(); 240 $rel = str_replace('\\', '/', substr($abs, $rootLen)); 241 242 if ($this->isIgnored($archiveRel, $rel)) continue; 243 244 $this->appendFile($abs, $archiveRel . '/' . $rel); 245 } catch (Exception $e) { 246 continue; 247 } 248 } 249 } 250 251 /** 252 * Filename / path-segment ignores. Hardcoded (no config) to keep the plugin small. 253 * 254 * @param string $archiveRel e.g. "conf" or "lib/plugins" - the top-level branch 255 * @param string $rel path within that branch 256 */ 257 protected function isIgnored($archiveRel, $rel) 258 { 259 $base = basename($rel); 260 261 // Universal noise. 262 if ($base === '_dummy') return true; 263 if ($base === '.DS_Store') return true; 264 if ($base === 'Thumbs.db') return true; 265 266 // Belt-and-suspenders: never include our own scratch files even if 267 // someone pointed savedir at an unusual location. 268 if (strpos($base, self::TMP_PREFIX) === 0) return true; 269 270 // Skip VCS metadata anywhere in any branch. Local clones / checkouts 271 // can be huge and aren't part of "live" state. 272 $segments = explode('/', $rel); 273 foreach ($segments as $seg) { 274 if ($seg === '.git') return true; 275 if ($seg === '.svn') return true; 276 if ($seg === '.hg') return true; 277 } 278 279 // conf/ branch: drop *.dist / *.example / *.bak sample files. They're 280 // shipped with DokuWiki and templates, not real configuration. 281 if ($archiveRel === 'conf') { 282 if (preg_match('/\.(dist|example|bak)$/i', $base)) return true; 283 } 284 285 return false; 286 } 287 288 protected function appendFile($abs, $archiveRel) 289 { 290 $size = @filesize($abs); 291 if ($size === false) $size = 0; 292 $this->fileList[] = [$abs, $archiveRel, $size]; 293 $this->totalBytes += $size; 294 } 295 296 /* ----------------------------------------------------------------- * 297 * Preview 298 * ----------------------------------------------------------------- */ 299 300 protected function renderPreview() 301 { 302 echo '<h2>Preview</h2>'; 303 echo '<p>' . count($this->fileList) . ' files, ' 304 . hsc($this->humanBytes($this->totalBytes)) . ' uncompressed.</p>'; 305 306 $perRoot = []; 307 foreach ($this->fileList as [$abs, $rel, $size]) { 308 $parts = explode('/', $rel, 4); 309 $top = isset($parts[1]) ? ($parts[0] . '/' . $parts[1]) : $parts[0]; 310 if (!isset($perRoot[$top])) $perRoot[$top] = ['count' => 0, 'bytes' => 0]; 311 $perRoot[$top]['count']++; 312 $perRoot[$top]['bytes'] += $size; 313 } 314 ksort($perRoot); 315 316 echo '<table class="inline"><thead><tr><th>Section</th><th style="text-align:right;">Files</th><th style="text-align:right;">Size</th></tr></thead><tbody>'; 317 foreach ($perRoot as $section => $stats) { 318 echo '<tr><td><code>' . hsc($section) . '</code></td>' 319 . '<td style="text-align:right;">' . (int)$stats['count'] . '</td>' 320 . '<td style="text-align:right;">' . hsc($this->humanBytes($stats['bytes'])) . '</td></tr>'; 321 } 322 echo '</tbody></table>'; 323 echo '<p>Click <em>Download tar.gz</em> above to create and download the archive ' 324 . '(compressed size will typically be smaller).</p>'; 325 } 326 327 protected function humanBytes($bytes) 328 { 329 $units = ['B', 'KiB', 'MiB', 'GiB', 'TiB']; 330 $i = 0; 331 $n = (float)$bytes; 332 while ($n >= 1024 && $i < count($units) - 1) { 333 $n /= 1024; 334 $i++; 335 } 336 return sprintf($i === 0 ? '%d %s' : '%.2f %s', $n, $units[$i]); 337 } 338 339 /* ----------------------------------------------------------------- * 340 * Archive creation + streaming 341 * ----------------------------------------------------------------- */ 342 343 protected function streamArchive() 344 { 345 global $conf; 346 347 // Defense-in-depth: AdminPlugin framework should have blocked non-admins 348 // before we got here, but verify directly anyway. 349 if (!auth_isadmin()) { 350 msg('Site Backup: admin access required.', -1); 351 return; 352 } 353 354 if (!$this->fileList) { 355 msg('Site Backup: nothing selected.', -1); 356 return; 357 } 358 359 @set_time_limit(0); 360 @ignore_user_abort(true); 361 @ini_set('memory_limit', '256M'); 362 363 $tmpDir = $conf['tmpdir']; 364 if (!is_dir($tmpDir) || !is_writable($tmpDir)) { 365 msg('Site Backup: temp directory is not writable: ' . hsc($tmpDir), -1); 366 return; 367 } 368 369 // Build a hard-to-guess filename. 16 hex chars = 64 bits of entropy from 370 // a CSPRNG. The file also lives under data/.htaccess deny-all so even a 371 // guess wouldn't be enough. 372 $host = $_SERVER['HTTP_HOST'] ?? 'wiki'; 373 $host = preg_replace('/[^a-zA-Z0-9._-]+/', '_', $host); 374 $stamp = date('Ymd-His'); 375 $archiveDir = $host . '-backup-' . $stamp; // dir inside the tar 376 $downloadName = $archiveDir . '.tar.gz'; // browser filename 377 $tmpFile = $tmpDir . '/' . self::TMP_PREFIX . bin2hex(random_bytes(8)) . '.tar.gz'; 378 379 // Guarantee the temp file is deleted even on connection abort, fatal 380 // error, or `exit` from within the streaming loop. 381 register_shutdown_function(function () use ($tmpFile) { 382 if (is_file($tmpFile)) @unlink($tmpFile); 383 }); 384 385 $oldUmask = @umask(0077); 386 387 try { 388 $tar = new Tar(); 389 $tar->setCompression(6, Archive::COMPRESS_GZIP); 390 $tar->create($tmpFile); 391 392 // Belt-and-suspenders: explicitly chmod once created, in case the 393 // umask wasn't honored (some filesystems / wrappers ignore it). 394 @chmod($tmpFile, 0600); 395 396 foreach ($this->fileList as [$abs, $rel, $size]) { 397 try { 398 $tar->addFile($abs, $archiveDir . '/' . $rel); 399 } catch (Exception $e) { 400 // Skip individual broken files rather than failing the whole backup. 401 continue; 402 } 403 } 404 $tar->close(); 405 } catch (ArchiveIOException $e) { 406 @umask($oldUmask); 407 @unlink($tmpFile); 408 msg('Site Backup: could not create archive: ' . hsc($e->getMessage()), -1); 409 return; 410 } 411 412 @umask($oldUmask); 413 414 if (!is_file($tmpFile) || filesize($tmpFile) === 0) { 415 @unlink($tmpFile); 416 msg('Site Backup: archive was empty or could not be written.', -1); 417 return; 418 } 419 420 $size = filesize($tmpFile); 421 422 // Clear any output buffering DokuWiki / extensions may have started so 423 // headers + binary body go out cleanly. 424 while (ob_get_level() > 0) { 425 @ob_end_clean(); 426 } 427 428 header('Content-Type: application/gzip'); 429 header('Content-Disposition: attachment; filename="' . $downloadName . '"'); 430 header('Content-Length: ' . $size); 431 header('Cache-Control: no-store, no-cache, must-revalidate, private'); 432 header('Pragma: no-cache'); 433 header('X-Content-Type-Options: nosniff'); 434 435 $fp = fopen($tmpFile, 'rb'); 436 if ($fp) { 437 while (!feof($fp)) { 438 $chunk = fread($fp, 1024 * 256); 439 if ($chunk === false) break; 440 echo $chunk; 441 @flush(); 442 } 443 fclose($fp); 444 } 445 @unlink($tmpFile); 446 exit; 447 } 448 449 /** 450 * Remove leftover temp archives from prior runs that died before unlink. 451 * Anything matching our prefix older than TMP_STALE_AGE is fair game. 452 */ 453 protected function sweepStaleTempFiles() 454 { 455 global $conf; 456 $tmpDir = $conf['tmpdir'] ?? null; 457 if (!$tmpDir || !is_dir($tmpDir)) return; 458 459 $cutoff = time() - self::TMP_STALE_AGE; 460 $pattern = $tmpDir . '/' . self::TMP_PREFIX . '*'; 461 foreach ((array) @glob($pattern) as $stale) { 462 if (!is_file($stale)) continue; 463 $mtime = @filemtime($stale); 464 if ($mtime !== false && $mtime < $cutoff) { 465 @unlink($stale); 466 } 467 } 468 } 469} 470