1<?php 2if (!defined('DOKU_INC')) die(); 3 4/** 5 * Site Backup admin plugin for DokuWiki. 6 * 7 * Streams a tar.gz of selected wiki parts (pages, media, conf, lib/plugins, lib/tpl) 8 * to the admin's browser. The archive is built in data/tmp/ with a random filename, 9 * streamed out, and deleted immediately. Nothing persists on the server. 10 * 11 * Security model: 12 * - Admin-only: DokuWiki's AdminPlugin framework enforces auth_isadmin() before 13 * handle()/html() are invoked because forAdminOnly() returns true. A second 14 * explicit check inside streamArchive() guards against any framework bypass. 15 * - The temp archive lives in $conf['tmpdir'] (data/tmp/), which DokuWiki ships 16 * with a deny-all .htaccess; it cannot be fetched directly even if the path 17 * were known. 18 * - Filename uses 64 bits of CSPRNG randomness, file is chmod'd to 0600, and is 19 * deleted both at the natural end of streamArchive() and via a shutdown 20 * function in case the connection is aborted partway. 21 * - Stale temp files from previous runs (older than 1 hour) are swept on each 22 * invocation, so even a crash-during-stream leaves nothing for long. 23 * 24 * Treat downloaded archives as credentials: they may include conf/users.auth.php 25 * (password hashes), ACL rules, and any secrets stored in conf/local.php. 26 */ 27 28use dokuwiki\Extension\AdminPlugin; 29use dokuwiki\Form\Form; 30use splitbrain\PHPArchive\Archive; 31use splitbrain\PHPArchive\ArchiveIOException; 32 33// PatchedTar fixes splitbrain/php-archive PR #38 (mtime bug) for the version 34// of the library vendored with DokuWiki Librarian. 35// The class lives in PatchedTar.php and is autoloaded via DokuWiki's PSR-4 loader 36// (dokuwiki\plugin\sitebackup namespace -> lib/plugins/sitebackup/). 37use dokuwiki\plugin\sitebackup\PatchedTar as Tar; 38 39class admin_plugin_sitebackup extends AdminPlugin 40{ 41 /** Prefix used for the temp archive filename in data/tmp/. */ 42 const TMP_PREFIX = 'sitebackup_tmp_'; 43 44 /** Max age (seconds) of leftover temp files before sweep removes them. */ 45 const TMP_STALE_AGE = 3600; 46 47 /** @var array list of [absolute path, archive-relative path, size] of files to include */ 48 protected $fileList = []; 49 50 /** @var int total uncompressed size of selected files */ 51 protected $totalBytes = 0; 52 53 /** 54 * Tracks real paths already added to the archive to prevent double-inclusion 55 * via symlinks pointing to the same file. 56 * 57 * @var array<string, true> 58 */ 59 protected $visitedPaths = []; 60 61 /** 62 * @return bool 63 */ 64 public function forAdminOnly(): bool 65 { 66 return true; 67 } 68 69 /** 70 * @return int 71 */ 72 public function getMenuSort(): int 73 { 74 return 1000; 75 } 76 77 /** 78 * Dispatch based on the submitted action. 79 * Valid actions: "preview" (build file list, render summary table), 80 * "download" (build archive, stream as tar.gz). 81 */ 82 public function handle(): void 83 { 84 global $INPUT; 85 86 // Sweep stale temp files from previous runs every time we enter the page. 87 $this->sweepStaleTempFiles(); 88 89 if (!$INPUT->has('sitebackup_action')) return; 90 if (!checkSecurityToken()) return; 91 92 $action = $INPUT->str('sitebackup_action'); 93 if ($action !== 'preview' && $action !== 'download') return; 94 95 // Download MUST be POST. Refuse GET / HEAD / etc. so a stray link, browser 96 // prefetch, or curious co-admin pasting a URL can't trigger a backup. 97 if ($action === 'download' && $INPUT->server->str('REQUEST_METHOD', 'GET') !== 'POST') { 98 msg($this->getLang('err_post'), -1); 99 return; 100 } 101 102 $this->collectFiles(); 103 104 if ($action === 'download') { 105 $this->streamArchive(); 106 // streamArchive() exits on success. If it returns, an error was shown 107 // via msg() and we fall through to html() so the user sees the form. 108 } 109 } 110 111 /** 112 * Render the admin page: intro, form, and (if $fileList is populated) preview table. 113 */ 114 public function html(): void 115 { 116 echo '<h1>' . hsc($this->getLang('menu')) . '</h1>'; 117 echo '<p>' . $this->getLang('intro') . '</p>'; 118 echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">' 119 . '<strong>' . hsc($this->getLang('warn_title')) . '</strong> ' 120 . $this->getLang('warn_body') 121 . '</p>'; 122 123 $this->renderForm(); 124 125 if ($this->fileList) { 126 $this->renderPreview(); 127 } 128 } 129 130 /* ----------------------------------------------------------------- * 131 * Form 132 * ----------------------------------------------------------------- */ 133 134 /** 135 * Render the selection form with checkboxes for each backup section. 136 */ 137 protected function renderForm(): void 138 { 139 global $INPUT; 140 141 $hasSubmitted = $INPUT->has('sitebackup_action'); 142 $defaults = [ 143 'pages' => true, 144 'media' => true, 145 'meta' => true, 146 'media_meta' => true, 147 'attic' => false, 148 'media_attic' => false, 149 'index' => false, 150 'conf' => true, 151 'plugins' => true, 152 'tpl' => true, 153 ]; 154 $sel = []; 155 foreach ($defaults as $k => $def) { 156 $sel[$k] = $hasSubmitted ? $INPUT->bool('sb_' . $k, false) : $def; 157 } 158 159 $form = new Form(['method' => 'POST', 'id' => 'sitebackup_form']); 160 $form->setHiddenField('do', 'admin'); 161 $form->setHiddenField('page', 'sitebackup'); 162 163 $style = 'text-align: left; padding: 0 1em .5em 1em; margin: 1em 0;'; 164 165 $form->addFieldsetOpen($this->getLang('fs_content'))->attr('style', $style); 166 $this->addCheckboxRow($form, 'sb_pages', $this->getLang('opt_pages'), $sel['pages']); 167 $this->addCheckboxRow($form, 'sb_media', $this->getLang('opt_media'), $sel['media']); 168 $this->addCheckboxRow($form, 'sb_meta', $this->getLang('opt_meta'), $sel['meta']); 169 $this->addCheckboxRow($form, 'sb_media_meta', $this->getLang('opt_media_meta'), $sel['media_meta']); 170 $this->addCheckboxRow($form, 'sb_attic', $this->getLang('opt_attic'), $sel['attic']); 171 $this->addCheckboxRow($form, 'sb_media_attic', $this->getLang('opt_media_attic'), $sel['media_attic']); 172 $this->addCheckboxRow($form, 'sb_index', $this->getLang('opt_index'), $sel['index']); 173 $form->addFieldsetClose(); 174 175 $form->addFieldsetOpen($this->getLang('fs_code'))->attr('style', $style); 176 $this->addCheckboxRow($form, 'sb_conf', $this->getLang('opt_conf'), $sel['conf']); 177 $this->addCheckboxRow($form, 'sb_plugins', $this->getLang('opt_plugins'), $sel['plugins']); 178 $this->addCheckboxRow($form, 'sb_tpl', $this->getLang('opt_tpl'), $sel['tpl']); 179 $form->addFieldsetClose(); 180 181 $form->addTagOpen('p'); 182 $form->addButton('sitebackup_action', $this->getLang('btn_preview'))->val('preview'); 183 $form->addHTML(' '); 184 $form->addButton('sitebackup_action', $this->getLang('btn_download'))->val('download'); 185 $form->addTagClose('p'); 186 187 echo $form->toHTML(); 188 } 189 190 /** 191 * Add a labelled checkbox row to the form. 192 * 193 * @param Form $form 194 * @param string $name field name 195 * @param string $label display label 196 * @param bool $checked whether the checkbox is pre-checked 197 */ 198 protected function addCheckboxRow(Form $form, string $name, string $label, bool $checked): void 199 { 200 $form->addTagOpen('div')->attr('style', 'margin:.4em 0;'); 201 $cb = $form->addCheckbox($name, ' ' . $label); 202 $cb->val('1'); 203 if ($checked) $cb->attr('checked', 'checked'); 204 $form->addTagClose('div'); 205 } 206 207 /* ----------------------------------------------------------------- * 208 * File collection 209 * ----------------------------------------------------------------- */ 210 211 /** 212 * Build $this->fileList from the selected checkboxes in the current request. 213 */ 214 protected function collectFiles(): void 215 { 216 global $INPUT, $conf; 217 218 $this->fileList = []; 219 $this->totalBytes = 0; 220 $this->visitedPaths = []; 221 222 // Use $conf[...] for the data dirs so relocated savedir installs still work. 223 $roots = [ 224 'sb_pages' => [$conf['datadir'], 'data/pages'], 225 'sb_media' => [$conf['mediadir'], 'data/media'], 226 'sb_meta' => [$conf['metadir'], 'data/meta'], 227 'sb_media_meta' => [$conf['mediametadir'], 'data/media_meta'], 228 'sb_attic' => [$conf['olddir'], 'data/attic'], 229 'sb_media_attic' => [$conf['mediaolddir'], 'data/media_attic'], 230 'sb_index' => [$conf['indexdir'], 'data/index'], 231 'sb_conf' => [rtrim(DOKU_CONF, '/'), 'conf'], 232 'sb_plugins' => [rtrim(DOKU_PLUGIN, '/'), 'lib/plugins'], 233 'sb_tpl' => [DOKU_INC . 'lib/tpl', 'lib/tpl'], 234 ]; 235 236 foreach ($roots as $field => $pair) { 237 if (!$INPUT->bool($field, false)) continue; 238 [$srcAbs, $archiveRel] = $pair; 239 $this->walkInto($srcAbs, $archiveRel); 240 } 241 } 242 243 /** 244 * Recursively enumerate all readable files under $srcAbs and append them to $this->fileList. 245 * 246 * @param string $srcAbs absolute filesystem path (file or directory) 247 * @param string $archiveRel path prefix to use inside the archive 248 */ 249 protected function walkInto(string $srcAbs, string $archiveRel): void 250 { 251 if (!file_exists($srcAbs)) return; 252 253 if (is_file($srcAbs)) { 254 $this->appendFile($srcAbs, $archiveRel); 255 return; 256 } 257 258 try { 259 $it = new RecursiveIteratorIterator( 260 new RecursiveDirectoryIterator( 261 $srcAbs, 262 FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS 263 ), 264 RecursiveIteratorIterator::LEAVES_ONLY 265 ); 266 } catch (Exception $e) { 267 return; 268 } 269 270 $srcRoot = rtrim($srcAbs, '/'); 271 $rootLen = strlen($srcRoot) + 1; 272 foreach ($it as $info) { 273 try { 274 if (!$info->isFile() || !$info->isReadable()) continue; 275 276 // Skip files already included via a different symlink path. 277 $realPath = $info->getRealPath(); 278 if ($realPath === false) continue; 279 if (isset($this->visitedPaths[$realPath])) continue; 280 $this->visitedPaths[$realPath] = true; 281 282 $abs = $info->getPathname(); 283 $rel = str_replace('\\', '/', substr($abs, $rootLen)); 284 285 if ($this->isIgnored($archiveRel, $rel)) continue; 286 287 $this->appendFile($abs, $archiveRel . '/' . $rel); 288 } catch (Exception $e) { 289 continue; 290 } 291 } 292 } 293 294 /** 295 * Return true if a file should be excluded from the archive. 296 * Hardcoded (no config) to keep the plugin small. 297 * 298 * @param string $archiveRel top-level archive branch, e.g. "conf" or "lib/plugins" 299 * @param string $rel path within that branch 300 * @return bool 301 */ 302 protected function isIgnored(string $archiveRel, string $rel): bool 303 { 304 $base = basename($rel); 305 306 // Universal noise. 307 if ($base === '_dummy') return true; 308 if ($base === '.DS_Store') return true; 309 if ($base === 'Thumbs.db') return true; 310 311 // Belt-and-suspenders: never include our own scratch files even if 312 // someone pointed savedir at an unusual location. 313 if (str_starts_with($base, self::TMP_PREFIX)) return true; 314 315 // Skip VCS metadata anywhere in any branch. Local clones / checkouts 316 // can be huge and aren't part of "live" state. 317 $segments = explode('/', $rel); 318 foreach ($segments as $seg) { 319 if ($seg === '.git') return true; 320 if ($seg === '.svn') return true; 321 if ($seg === '.hg') return true; 322 } 323 324 // conf/ branch: drop *.dist / *.example / *.bak sample files. They're 325 // shipped with DokuWiki and templates, not real configuration. 326 if ($archiveRel === 'conf') { 327 if (preg_match('/\.(dist|example|bak)$/i', $base)) return true; 328 } 329 330 return false; 331 } 332 333 /** 334 * Append a single file entry to the file list. 335 * 336 * @param string $abs absolute filesystem path 337 * @param string $archiveRel path inside the archive 338 */ 339 protected function appendFile(string $abs, string $archiveRel): void 340 { 341 $size = filesize($abs); 342 if ($size === false) $size = 0; 343 $this->fileList[] = [$abs, $archiveRel, $size]; 344 $this->totalBytes += $size; 345 } 346 347 /* ----------------------------------------------------------------- * 348 * Preview 349 * ----------------------------------------------------------------- */ 350 351 /** 352 * Render a summary table grouping files by top-level archive section. 353 */ 354 protected function renderPreview(): void 355 { 356 echo '<h2>' . hsc($this->getLang('preview_head')) . '</h2>'; 357 echo '<p>' . sprintf( 358 $this->getLang('preview_summary'), 359 count($this->fileList), 360 hsc($this->humanBytes($this->totalBytes)) 361 ) . '</p>'; 362 363 $perRoot = []; 364 foreach ($this->fileList as [$abs, $rel, $size]) { 365 $parts = explode('/', $rel, 4); 366 $top = isset($parts[1]) ? ($parts[0] . '/' . $parts[1]) : $parts[0]; 367 if (!isset($perRoot[$top])) $perRoot[$top] = ['count' => 0, 'bytes' => 0]; 368 $perRoot[$top]['count']++; 369 $perRoot[$top]['bytes'] += $size; 370 } 371 ksort($perRoot); 372 373 echo '<table class="inline"><thead><tr>' 374 . '<th>' . hsc($this->getLang('col_section')) . '</th>' 375 . '<th style="text-align:right;">' . hsc($this->getLang('col_files')) . '</th>' 376 . '<th style="text-align:right;">' . hsc($this->getLang('col_size')) . '</th>' 377 . '</tr></thead><tbody>'; 378 foreach ($perRoot as $section => $stats) { 379 echo '<tr><td><code>' . hsc($section) . '</code></td>' 380 . '<td style="text-align:right;">' . (int)$stats['count'] . '</td>' 381 . '<td style="text-align:right;">' . hsc($this->humanBytes($stats['bytes'])) . '</td></tr>'; 382 } 383 echo '</tbody></table>'; 384 echo '<p>' . $this->getLang('preview_hint') . '</p>'; 385 } 386 387 /** 388 * Format a byte count as a human-readable string (B, KiB, MiB, GiB, TiB). 389 * 390 * @param int $bytes 391 * @return string 392 */ 393 protected function humanBytes(int $bytes): string 394 { 395 $units = ['B', 'KiB', 'MiB', 'GiB', 'TiB']; 396 $i = 0; 397 $n = (float)$bytes; 398 while ($n >= 1024 && $i < count($units) - 1) { 399 $n /= 1024; 400 $i++; 401 } 402 return sprintf($i === 0 ? '%d %s' : '%.2f %s', $n, $units[$i]); 403 } 404 405 /* ----------------------------------------------------------------- * 406 * Archive creation + streaming 407 * ----------------------------------------------------------------- */ 408 409 /** 410 * Build the archive in data/tmp/, stream it to the browser as a tar.gz download, 411 * and exit. Returns without exiting only when an error prevents streaming, so the 412 * caller can fall through to html() and display the form again. 413 */ 414 protected function streamArchive(): void 415 { 416 global $conf, $INPUT; 417 418 // Defense-in-depth: AdminPlugin framework should have blocked non-admins 419 // before we got here, but verify directly anyway. 420 if (!auth_isadmin()) { 421 msg($this->getLang('err_admin'), -1); 422 return; 423 } 424 425 if (!$this->fileList) { 426 msg($this->getLang('err_empty'), -1); 427 return; 428 } 429 430 set_time_limit(0); 431 ignore_user_abort(true); 432 433 // Only raise the memory limit, never lower it. 434 $rawLimit = ini_get('memory_limit'); 435 $unit = strtolower(substr($rawLimit, -1)); 436 $limitVal = (int)$rawLimit; 437 switch ($unit) { 438 case 'g': $limitBytes = $limitVal * 1073741824; break; 439 case 'm': $limitBytes = $limitVal * 1048576; break; 440 case 'k': $limitBytes = $limitVal * 1024; break; 441 default: $limitBytes = $limitVal; break; 442 } 443 if ($limitBytes !== -1 && $limitBytes < 268435456) { 444 ini_set('memory_limit', '256M'); 445 } 446 447 $tmpDir = $conf['tmpdir']; 448 if (!is_dir($tmpDir) || !is_writable($tmpDir)) { 449 msg(sprintf($this->getLang('err_tmp'), hsc($tmpDir)), -1); 450 return; 451 } 452 453 // Build a hard-to-guess filename. 16 hex chars = 64 bits of entropy from 454 // a CSPRNG. The file also lives under data/.htaccess deny-all so even a 455 // guess wouldn't be enough. 456 $host = $INPUT->server->str('HTTP_HOST', 'wiki'); 457 $host = preg_replace('/[^a-zA-Z0-9._-]+/', '_', $host); 458 $stamp = date('Ymd-His'); 459 $archiveDir = $host . '-backup-' . $stamp; // dir inside the tar 460 $downloadName = $archiveDir . '.tar.gz'; // browser filename 461 $tmpFile = $tmpDir . '/' . self::TMP_PREFIX . bin2hex(random_bytes(8)) . '.tar.gz'; 462 463 // Guarantee the temp file is deleted even on connection abort, fatal 464 // error, or `exit` from within the streaming loop. 465 register_shutdown_function(function () use ($tmpFile) { 466 if (is_file($tmpFile)) unlink($tmpFile); 467 }); 468 469 $oldUmask = umask(0077); 470 471 try { 472 $tar = new Tar(); 473 $tar->setCompression(6, Archive::COMPRESS_GZIP); 474 $tar->create($tmpFile); 475 476 // Belt-and-suspenders: explicitly chmod once created, in case the 477 // umask wasn't honored (some filesystems / wrappers ignore it). 478 chmod($tmpFile, 0600); 479 480 foreach ($this->fileList as [$abs, $rel, $size]) { 481 try { 482 $tar->addFile($abs, $archiveDir . '/' . $rel); 483 } catch (Exception $e) { 484 // Skip individual broken files rather than failing the whole backup. 485 continue; 486 } 487 } 488 $tar->close(); 489 } catch (ArchiveIOException $e) { 490 umask($oldUmask); 491 if (is_file($tmpFile)) unlink($tmpFile); 492 msg(sprintf($this->getLang('err_create'), hsc($e->getMessage())), -1); 493 return; 494 } 495 496 umask($oldUmask); 497 498 if (!is_file($tmpFile) || filesize($tmpFile) === 0) { 499 if (is_file($tmpFile)) unlink($tmpFile); 500 msg($this->getLang('err_archive'), -1); 501 return; 502 } 503 504 $size = filesize($tmpFile); 505 506 // Clear any output buffering DokuWiki / extensions may have started so 507 // headers + binary body go out cleanly. 508 while (ob_get_level() > 0) { 509 ob_end_clean(); 510 } 511 512 header('Content-Type: application/gzip'); 513 header('Content-Disposition: attachment; filename="' . $downloadName . '"'); 514 header('Content-Length: ' . $size); 515 header('Cache-Control: no-store, no-cache, must-revalidate, private'); 516 header('Pragma: no-cache'); 517 header('X-Content-Type-Options: nosniff'); 518 519 $fp = fopen($tmpFile, 'rb'); 520 if ($fp) { 521 while (!feof($fp)) { 522 $chunk = fread($fp, 1024 * 256); 523 if ($chunk === false) break; 524 echo $chunk; 525 flush(); 526 } 527 fclose($fp); 528 } 529 unlink($tmpFile); 530 exit; 531 } 532 533 /** 534 * Remove leftover temp archives from prior runs that died before unlink. 535 * Anything matching our prefix older than TMP_STALE_AGE is fair game. 536 */ 537 protected function sweepStaleTempFiles(): void 538 { 539 global $conf; 540 $tmpDir = $conf['tmpdir'] ?? null; 541 if (!$tmpDir || !is_dir($tmpDir)) return; 542 543 $cutoff = time() - self::TMP_STALE_AGE; 544 $pattern = $tmpDir . '/' . self::TMP_PREFIX . '*'; 545 foreach ((array) glob($pattern) as $stale) { 546 if (!is_file($stale)) continue; 547 $mtime = filemtime($stale); 548 if ($mtime !== false && $mtime < $cutoff) { 549 unlink($stale); 550 } 551 } 552 } 553} 554