1<?php 2if (!defined('DOKU_INC')) die(); 3 4/** 5 * Site Backup admin plugin for DokuWiki. 6 * 7 * Streams a tar.gz of selected wiki parts (pages, media, conf, lib/plugins, lib/tpl) 8 * to the admin's browser. The archive is built in data/tmp/ with a random filename, 9 * streamed out, and deleted immediately. Nothing persists on the server. 10 * 11 * Security model: 12 * - Admin-only: DokuWiki's AdminPlugin framework enforces auth_isadmin() before 13 * handle()/html() are invoked because forAdminOnly() returns true. A second 14 * explicit check inside streamArchive() guards against any framework bypass. 15 * - The temp archive lives in $conf['tmpdir'] (data/tmp/), which DokuWiki ships 16 * with a deny-all .htaccess; it cannot be fetched directly even if the path 17 * were known. 18 * - Filename uses 64 bits of CSPRNG randomness, file is chmod'd to 0600, and is 19 * deleted both at the natural end of streamArchive() and via a shutdown 20 * function in case the connection is aborted partway. 21 * - Stale temp files from previous runs (older than 1 hour) are swept on each 22 * invocation, so even a crash-during-stream leaves nothing for long. 23 * 24 * Treat downloaded archives as credentials: they may include conf/users.auth.php 25 * (password hashes), ACL rules, and any secrets stored in conf/local.php. 26 */ 27 28use dokuwiki\Extension\AdminPlugin; 29use dokuwiki\Form\Form; 30use splitbrain\PHPArchive\Archive; 31use splitbrain\PHPArchive\ArchiveIOException; 32 33// PatchedTar fixes splitbrain/php-archive PR #38 (mtime bug) for the version 34// of the library vendored with DokuWiki Librarian. 35require_once __DIR__ . '/PatchedTar.php'; 36use dokuwiki\plugin\sitebackup\PatchedTar as Tar; 37 38class admin_plugin_sitebackup extends AdminPlugin 39{ 40 /** Prefix used for the temp archive filename in data/tmp/. */ 41 const TMP_PREFIX = 'sitebackup_tmp_'; 42 43 /** Max age (seconds) of leftover temp files before sweep removes them. */ 44 const TMP_STALE_AGE = 3600; 45 46 /** @var array list of [absolute path, archive-relative path, size] of files to include */ 47 protected $fileList = []; 48 49 /** @var int total uncompressed size of selected files */ 50 protected $totalBytes = 0; 51 52 /** 53 * @return bool 54 */ 55 public function forAdminOnly(): bool 56 { 57 return true; 58 } 59 60 /** 61 * @return int 62 */ 63 public function getMenuSort(): int 64 { 65 return 1000; 66 } 67 68 /** 69 * Dispatch based on the submitted action. 70 * Valid actions: "preview" (build file list, render summary table), 71 * "download" (build archive, stream as tar.gz). 72 */ 73 public function handle(): void 74 { 75 global $INPUT; 76 77 // Sweep stale temp files from previous runs every time we enter the page. 78 $this->sweepStaleTempFiles(); 79 80 if (!$INPUT->has('sitebackup_action')) return; 81 if (!checkSecurityToken()) return; 82 83 $action = $INPUT->str('sitebackup_action'); 84 if ($action !== 'preview' && $action !== 'download') return; 85 86 // Download MUST be POST. Refuse GET / HEAD / etc. so a stray link, browser 87 // prefetch, or curious co-admin pasting a URL can't trigger a backup. 88 if ($action === 'download' && $INPUT->server->str('REQUEST_METHOD', 'GET') !== 'POST') { 89 msg('Site Backup: download must be submitted via POST.', -1); 90 return; 91 } 92 93 $this->collectFiles(); 94 95 if ($action === 'download') { 96 $this->streamArchive(); 97 // streamArchive() exits on success. If it returns, an error was shown 98 // via msg() and we fall through to html() so the user sees the form. 99 } 100 } 101 102 /** 103 * Render the admin page: intro, form, and (if $fileList is populated) preview table. 104 */ 105 public function html(): void 106 { 107 echo '<h1>Site Backup</h1>'; 108 echo '<p>Select what to include, click <em>Preview</em> to see the file list and total size, ' 109 . 'then <em>Download tar.gz</em> to receive the archive in your browser.</p>'; 110 echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">' 111 . '<strong>Sensitive content warning.</strong> The archive may contain password hashes ' 112 . '(<code>conf/users.auth.php</code>), ACL rules, and any secrets stored in ' 113 . '<code>conf/local.php</code> (DB credentials, SMTP passwords, API keys). ' 114 . 'Treat the download like a credential.' 115 . '</p>'; 116 117 $this->renderForm(); 118 119 if ($this->fileList) { 120 $this->renderPreview(); 121 } 122 } 123 124 /* ----------------------------------------------------------------- * 125 * Form 126 * ----------------------------------------------------------------- */ 127 128 /** 129 * Render the selection form with checkboxes for each backup section. 130 */ 131 protected function renderForm(): void 132 { 133 global $INPUT; 134 135 $hasSubmitted = $INPUT->has('sitebackup_action'); 136 $defaults = [ 137 'pages' => true, 138 'media' => true, 139 'meta' => true, 140 'media_meta' => true, 141 'attic' => false, 142 'media_attic' => false, 143 'index' => false, 144 'conf' => true, 145 'plugins' => true, 146 'tpl' => true, 147 ]; 148 $sel = []; 149 foreach ($defaults as $k => $def) { 150 $sel[$k] = $hasSubmitted ? $INPUT->bool('sb_' . $k, false) : $def; 151 } 152 153 $form = new Form(['method' => 'POST', 'id' => 'sitebackup_form']); 154 $form->setHiddenField('do', 'admin'); 155 $form->setHiddenField('page', 'sitebackup'); 156 157 $style = 'text-align: left; padding: 0 1em .5em 1em; margin: 1em 0;'; 158 159 $form->addFieldsetOpen('Wiki content')->attr('style', $style); 160 $this->addCheckboxRow($form, 'sb_pages', 'Pages (data/pages)', $sel['pages']); 161 $this->addCheckboxRow($form, 'sb_media', 'Media files (data/media)', $sel['media']); 162 $this->addCheckboxRow($form, 'sb_meta', 'Page metadata (data/meta)', $sel['meta']); 163 $this->addCheckboxRow($form, 'sb_media_meta', 'Media metadata (data/media_meta)', $sel['media_meta']); 164 $this->addCheckboxRow($form, 'sb_attic', 'Page revisions (data/attic) - can be large', $sel['attic']); 165 $this->addCheckboxRow($form, 'sb_media_attic', 'Media revisions (data/media_attic)', $sel['media_attic']); 166 $this->addCheckboxRow($form, 'sb_index', 'Search index (data/index) - rebuildable', $sel['index']); 167 $form->addFieldsetClose(); 168 169 $form->addFieldsetOpen('Configuration & code')->attr('style', $style); 170 $this->addCheckboxRow($form, 'sb_conf', 'Configuration (conf/) - includes secrets', $sel['conf']); 171 $this->addCheckboxRow($form, 'sb_plugins', 'Plugins source (lib/plugins/)', $sel['plugins']); 172 $this->addCheckboxRow($form, 'sb_tpl', 'Templates source (lib/tpl/)', $sel['tpl']); 173 $form->addFieldsetClose(); 174 175 $form->addTagOpen('p'); 176 $form->addButton('sitebackup_action', 'Preview')->val('preview'); 177 $form->addHTML(' '); 178 $form->addButton('sitebackup_action', 'Download tar.gz')->val('download'); 179 $form->addTagClose('p'); 180 181 echo $form->toHTML(); 182 } 183 184 /** 185 * Add a labelled checkbox row to the form. 186 * 187 * @param Form $form 188 * @param string $name field name 189 * @param string $label display label 190 * @param bool $checked whether the checkbox is pre-checked 191 */ 192 protected function addCheckboxRow(Form $form, string $name, string $label, bool $checked): void 193 { 194 $form->addTagOpen('div')->attr('style', 'margin:.4em 0;'); 195 $cb = $form->addCheckbox($name, ' ' . $label); 196 $cb->val('1'); 197 if ($checked) $cb->attr('checked', 'checked'); 198 $form->addTagClose('div'); 199 } 200 201 /* ----------------------------------------------------------------- * 202 * File collection 203 * ----------------------------------------------------------------- */ 204 205 /** 206 * Build $this->fileList from the selected checkboxes in the current request. 207 */ 208 protected function collectFiles(): void 209 { 210 global $INPUT, $conf; 211 212 // Use $conf[...] for the data dirs so relocated savedir installs still work. 213 $roots = [ 214 'sb_pages' => [$conf['datadir'], 'data/pages'], 215 'sb_media' => [$conf['mediadir'], 'data/media'], 216 'sb_meta' => [$conf['metadir'], 'data/meta'], 217 'sb_media_meta' => [$conf['mediametadir'], 'data/media_meta'], 218 'sb_attic' => [$conf['olddir'], 'data/attic'], 219 'sb_media_attic' => [$conf['mediaolddir'], 'data/media_attic'], 220 'sb_index' => [$conf['indexdir'], 'data/index'], 221 'sb_conf' => [rtrim(DOKU_CONF, '/'), 'conf'], 222 'sb_plugins' => [rtrim(DOKU_PLUGIN, '/'), 'lib/plugins'], 223 'sb_tpl' => [DOKU_INC . 'lib/tpl', 'lib/tpl'], 224 ]; 225 226 foreach ($roots as $field => $pair) { 227 if (!$INPUT->bool($field, false)) continue; 228 [$srcAbs, $archiveRel] = $pair; 229 $this->walkInto($srcAbs, $archiveRel); 230 } 231 } 232 233 /** 234 * Recursively enumerate all readable files under $srcAbs and append them to $this->fileList. 235 * 236 * @param string $srcAbs absolute filesystem path (file or directory) 237 * @param string $archiveRel path prefix to use inside the archive 238 */ 239 protected function walkInto(string $srcAbs, string $archiveRel): void 240 { 241 if (!file_exists($srcAbs)) return; 242 243 if (is_file($srcAbs)) { 244 $this->appendFile($srcAbs, $archiveRel); 245 return; 246 } 247 248 try { 249 $it = new RecursiveIteratorIterator( 250 new RecursiveDirectoryIterator( 251 $srcAbs, 252 FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS 253 ), 254 RecursiveIteratorIterator::LEAVES_ONLY 255 ); 256 } catch (Exception $e) { 257 return; 258 } 259 260 $srcRoot = rtrim($srcAbs, '/'); 261 $rootLen = strlen($srcRoot) + 1; 262 foreach ($it as $info) { 263 try { 264 if (!$info->isFile() || !$info->isReadable()) continue; 265 $abs = $info->getPathname(); 266 $rel = str_replace('\\', '/', substr($abs, $rootLen)); 267 268 if ($this->isIgnored($archiveRel, $rel)) continue; 269 270 $this->appendFile($abs, $archiveRel . '/' . $rel); 271 } catch (Exception $e) { 272 continue; 273 } 274 } 275 } 276 277 /** 278 * Return true if a file should be excluded from the archive. 279 * Hardcoded (no config) to keep the plugin small. 280 * 281 * @param string $archiveRel top-level archive branch, e.g. "conf" or "lib/plugins" 282 * @param string $rel path within that branch 283 * @return bool 284 */ 285 protected function isIgnored(string $archiveRel, string $rel): bool 286 { 287 $base = basename($rel); 288 289 // Universal noise. 290 if ($base === '_dummy') return true; 291 if ($base === '.DS_Store') return true; 292 if ($base === 'Thumbs.db') return true; 293 294 // Belt-and-suspenders: never include our own scratch files even if 295 // someone pointed savedir at an unusual location. 296 if (str_starts_with($base, self::TMP_PREFIX)) return true; 297 298 // Skip VCS metadata anywhere in any branch. Local clones / checkouts 299 // can be huge and aren't part of "live" state. 300 $segments = explode('/', $rel); 301 foreach ($segments as $seg) { 302 if ($seg === '.git') return true; 303 if ($seg === '.svn') return true; 304 if ($seg === '.hg') return true; 305 } 306 307 // conf/ branch: drop *.dist / *.example / *.bak sample files. They're 308 // shipped with DokuWiki and templates, not real configuration. 309 if ($archiveRel === 'conf') { 310 if (preg_match('/\.(dist|example|bak)$/i', $base)) return true; 311 } 312 313 return false; 314 } 315 316 /** 317 * Append a single file entry to the file list. 318 * 319 * @param string $abs absolute filesystem path 320 * @param string $archiveRel path inside the archive 321 */ 322 protected function appendFile(string $abs, string $archiveRel): void 323 { 324 $size = filesize($abs); 325 if ($size === false) $size = 0; 326 $this->fileList[] = [$abs, $archiveRel, $size]; 327 $this->totalBytes += $size; 328 } 329 330 /* ----------------------------------------------------------------- * 331 * Preview 332 * ----------------------------------------------------------------- */ 333 334 /** 335 * Render a summary table grouping files by top-level archive section. 336 */ 337 protected function renderPreview(): void 338 { 339 echo '<h2>Preview</h2>'; 340 echo '<p>' . count($this->fileList) . ' files, ' 341 . hsc($this->humanBytes($this->totalBytes)) . ' uncompressed.</p>'; 342 343 $perRoot = []; 344 foreach ($this->fileList as [$abs, $rel, $size]) { 345 $parts = explode('/', $rel, 4); 346 $top = isset($parts[1]) ? ($parts[0] . '/' . $parts[1]) : $parts[0]; 347 if (!isset($perRoot[$top])) $perRoot[$top] = ['count' => 0, 'bytes' => 0]; 348 $perRoot[$top]['count']++; 349 $perRoot[$top]['bytes'] += $size; 350 } 351 ksort($perRoot); 352 353 echo '<table class="inline"><thead><tr><th>Section</th><th style="text-align:right;">Files</th><th style="text-align:right;">Size</th></tr></thead><tbody>'; 354 foreach ($perRoot as $section => $stats) { 355 echo '<tr><td><code>' . hsc($section) . '</code></td>' 356 . '<td style="text-align:right;">' . (int)$stats['count'] . '</td>' 357 . '<td style="text-align:right;">' . hsc($this->humanBytes($stats['bytes'])) . '</td></tr>'; 358 } 359 echo '</tbody></table>'; 360 echo '<p>Click <em>Download tar.gz</em> above to create and download the archive ' 361 . '(compressed size will typically be smaller).</p>'; 362 } 363 364 /** 365 * Format a byte count as a human-readable string (B, KiB, MiB, GiB, TiB). 366 * 367 * @param int $bytes 368 * @return string 369 */ 370 protected function humanBytes(int $bytes): string 371 { 372 $units = ['B', 'KiB', 'MiB', 'GiB', 'TiB']; 373 $i = 0; 374 $n = (float)$bytes; 375 while ($n >= 1024 && $i < count($units) - 1) { 376 $n /= 1024; 377 $i++; 378 } 379 return sprintf($i === 0 ? '%d %s' : '%.2f %s', $n, $units[$i]); 380 } 381 382 /* ----------------------------------------------------------------- * 383 * Archive creation + streaming 384 * ----------------------------------------------------------------- */ 385 386 /** 387 * Build the archive in data/tmp/, stream it to the browser as a tar.gz download, 388 * and exit. Returns without exiting only when an error prevents streaming, so the 389 * caller can fall through to html() and display the form again. 390 */ 391 protected function streamArchive(): void 392 { 393 global $conf, $INPUT; 394 395 // Defense-in-depth: AdminPlugin framework should have blocked non-admins 396 // before we got here, but verify directly anyway. 397 if (!auth_isadmin()) { 398 msg('Site Backup: admin access required.', -1); 399 return; 400 } 401 402 if (!$this->fileList) { 403 msg('Site Backup: nothing selected.', -1); 404 return; 405 } 406 407 set_time_limit(0); 408 ignore_user_abort(true); 409 ini_set('memory_limit', '256M'); 410 411 $tmpDir = $conf['tmpdir']; 412 if (!is_dir($tmpDir) || !is_writable($tmpDir)) { 413 msg('Site Backup: temp directory is not writable: ' . hsc($tmpDir), -1); 414 return; 415 } 416 417 // Build a hard-to-guess filename. 16 hex chars = 64 bits of entropy from 418 // a CSPRNG. The file also lives under data/.htaccess deny-all so even a 419 // guess wouldn't be enough. 420 $host = $INPUT->server->str('HTTP_HOST', 'wiki'); 421 $host = preg_replace('/[^a-zA-Z0-9._-]+/', '_', $host); 422 $stamp = date('Ymd-His'); 423 $archiveDir = $host . '-backup-' . $stamp; // dir inside the tar 424 $downloadName = $archiveDir . '.tar.gz'; // browser filename 425 $tmpFile = $tmpDir . '/' . self::TMP_PREFIX . bin2hex(random_bytes(8)) . '.tar.gz'; 426 427 // Guarantee the temp file is deleted even on connection abort, fatal 428 // error, or `exit` from within the streaming loop. 429 register_shutdown_function(function () use ($tmpFile) { 430 if (is_file($tmpFile)) unlink($tmpFile); 431 }); 432 433 $oldUmask = umask(0077); 434 435 try { 436 $tar = new Tar(); 437 $tar->setCompression(6, Archive::COMPRESS_GZIP); 438 $tar->create($tmpFile); 439 440 // Belt-and-suspenders: explicitly chmod once created, in case the 441 // umask wasn't honored (some filesystems / wrappers ignore it). 442 chmod($tmpFile, 0600); 443 444 foreach ($this->fileList as [$abs, $rel, $size]) { 445 try { 446 $tar->addFile($abs, $archiveDir . '/' . $rel); 447 } catch (Exception $e) { 448 // Skip individual broken files rather than failing the whole backup. 449 continue; 450 } 451 } 452 $tar->close(); 453 } catch (ArchiveIOException $e) { 454 umask($oldUmask); 455 if (is_file($tmpFile)) unlink($tmpFile); 456 msg('Site Backup: could not create archive: ' . hsc($e->getMessage()), -1); 457 return; 458 } 459 460 umask($oldUmask); 461 462 if (!is_file($tmpFile) || filesize($tmpFile) === 0) { 463 if (is_file($tmpFile)) unlink($tmpFile); 464 msg('Site Backup: archive was empty or could not be written.', -1); 465 return; 466 } 467 468 $size = filesize($tmpFile); 469 470 // Clear any output buffering DokuWiki / extensions may have started so 471 // headers + binary body go out cleanly. 472 while (ob_get_level() > 0) { 473 ob_end_clean(); 474 } 475 476 header('Content-Type: application/gzip'); 477 header('Content-Disposition: attachment; filename="' . $downloadName . '"'); 478 header('Content-Length: ' . $size); 479 header('Cache-Control: no-store, no-cache, must-revalidate, private'); 480 header('Pragma: no-cache'); 481 header('X-Content-Type-Options: nosniff'); 482 483 $fp = fopen($tmpFile, 'rb'); 484 if ($fp) { 485 while (!feof($fp)) { 486 $chunk = fread($fp, 1024 * 256); 487 if ($chunk === false) break; 488 echo $chunk; 489 flush(); 490 } 491 fclose($fp); 492 } 493 unlink($tmpFile); 494 exit; 495 } 496 497 /** 498 * Remove leftover temp archives from prior runs that died before unlink. 499 * Anything matching our prefix older than TMP_STALE_AGE is fair game. 500 */ 501 protected function sweepStaleTempFiles(): void 502 { 503 global $conf; 504 $tmpDir = $conf['tmpdir'] ?? null; 505 if (!$tmpDir || !is_dir($tmpDir)) return; 506 507 $cutoff = time() - self::TMP_STALE_AGE; 508 $pattern = $tmpDir . '/' . self::TMP_PREFIX . '*'; 509 foreach ((array) glob($pattern) as $stale) { 510 if (!is_file($stale)) continue; 511 $mtime = filemtime($stale); 512 if ($mtime !== false && $mtime < $cutoff) { 513 unlink($stale); 514 } 515 } 516 } 517} 518