1<?php 2/** 3 * Site Backup admin plugin for DokuWiki. 4 * 5 * Streams a tar.gz of selected wiki parts (pages, media, conf, lib/plugins, lib/tpl) 6 * to the admin's browser. The archive is built in data/tmp/ with a random filename, 7 * streamed out, and deleted immediately. Nothing persists on the server. 8 * 9 * Security model: 10 * - Admin-only: DokuWiki's AdminPlugin framework enforces auth_isadmin() before 11 * handle()/html() are invoked because forAdminOnly() returns true. A second 12 * explicit check inside streamArchive() guards against any framework bypass. 13 * - The temp archive lives in $conf['tmpdir'] (data/tmp/), which DokuWiki ships 14 * with a deny-all .htaccess; it cannot be fetched directly even if the path 15 * were known. 16 * - Filename uses 64 bits of CSPRNG randomness, file is chmod'd to 0600, and is 17 * deleted both at the natural end of streamArchive() and via a shutdown 18 * function in case the connection is aborted partway. 19 * - Stale temp files from previous runs (older than 1 hour) are swept on each 20 * invocation, so even a crash-during-stream leaves nothing for long. 21 * 22 * Treat downloaded archives as credentials: they may include conf/users.auth.php 23 * (password hashes), ACL rules, and any secrets stored in conf/local.php. 24 */ 25 26use dokuwiki\Extension\AdminPlugin; 27use dokuwiki\Form\Form; 28use splitbrain\PHPArchive\Archive; 29use splitbrain\PHPArchive\ArchiveIOException; 30 31// PatchedTar fixes splitbrain/php-archive PR #38 (mtime bug) for the version 32// of the library vendored with DokuWiki Librarian. 33require_once __DIR__ . '/PatchedTar.php'; 34use dokuwiki\plugin\sitebackup\PatchedTar as Tar; 35 36class admin_plugin_sitebackup extends AdminPlugin 37{ 38 /** Prefix used for the temp archive filename in data/tmp/. */ 39 const TMP_PREFIX = 'sitebackup_tmp_'; 40 41 /** Max age (seconds) of leftover temp files before sweep removes them. */ 42 const TMP_STALE_AGE = 3600; 43 44 /** @var array list of [absolute path, archive-relative path, size] of files to include */ 45 protected $fileList = []; 46 47 /** @var int total uncompressed size of selected files */ 48 protected $totalBytes = 0; 49 50 public function forAdminOnly() 51 { 52 return true; 53 } 54 55 public function getMenuSort() 56 { 57 return 1000; 58 } 59 60 public function getMenuText($language) 61 { 62 return 'Site Backup'; 63 } 64 65 /** 66 * Dispatch based on the submitted action. 67 * Valid actions: "preview" (build file list, render summary table), 68 * "download" (build archive, stream as tar.gz). 69 */ 70 public function handle() 71 { 72 global $INPUT; 73 74 // Sweep stale temp files from previous runs every time we enter the page. 75 $this->sweepStaleTempFiles(); 76 77 if (!$INPUT->has('sitebackup_action')) return; 78 if (!checkSecurityToken()) return; 79 80 $action = $INPUT->str('sitebackup_action'); 81 if ($action !== 'preview' && $action !== 'download') return; 82 83 // Download MUST be POST. Refuse GET / HEAD / etc. so a stray link, browser 84 // prefetch, or curious co-admin pasting a URL can't trigger a backup. 85 if ($action === 'download' && ($_SERVER['REQUEST_METHOD'] ?? 'GET') !== 'POST') { 86 msg('Site Backup: download must be submitted via POST.', -1); 87 return; 88 } 89 90 $this->collectFiles(); 91 92 if ($action === 'download') { 93 $this->streamArchive(); 94 // streamArchive() exits on success. If it returns, an error was shown 95 // via msg() and we fall through to html() so the user sees the form. 96 } 97 } 98 99 public function html() 100 { 101 echo '<h1>Site Backup</h1>'; 102 echo '<p>Select what to include, click <em>Preview</em> to see the file list and total size, ' 103 . 'then <em>Download tar.gz</em> to receive the archive in your browser.</p>'; 104 echo '<p style="background:#fff3cd;border:1px solid #ffeeba;padding:8px;border-radius:4px;">' 105 . '<strong>Sensitive content warning.</strong> The archive may contain password hashes ' 106 . '(<code>conf/users.auth.php</code>), ACL rules, and any secrets stored in ' 107 . '<code>conf/local.php</code> (DB credentials, SMTP passwords, API keys). ' 108 . 'Treat the download like a credential.' 109 . '</p>'; 110 111 $this->renderForm(); 112 113 if ($this->fileList) { 114 $this->renderPreview(); 115 } 116 } 117 118 /* ----------------------------------------------------------------- * 119 * Form 120 * ----------------------------------------------------------------- */ 121 122 protected function renderForm() 123 { 124 global $INPUT; 125 126 $hasSubmitted = $INPUT->has('sitebackup_action'); 127 $defaults = [ 128 'pages' => true, 129 'media' => true, 130 'meta' => true, 131 'media_meta' => true, 132 'attic' => false, 133 'media_attic' => false, 134 'index' => false, 135 'conf' => true, 136 'plugins' => true, 137 'tpl' => true, 138 ]; 139 $sel = []; 140 foreach ($defaults as $k => $def) { 141 $sel[$k] = $hasSubmitted ? $INPUT->bool('sb_' . $k, false) : $def; 142 } 143 144 $form = new Form(['method' => 'POST', 'id' => 'sitebackup_form']); 145 $form->setHiddenField('do', 'admin'); 146 $form->setHiddenField('page', 'sitebackup'); 147 148 $form->addFieldsetOpen('Wiki content'); 149 $this->addCheckboxRow($form, 'sb_pages', 'Pages (data/pages)', $sel['pages']); 150 $this->addCheckboxRow($form, 'sb_media', 'Media files (data/media)', $sel['media']); 151 $this->addCheckboxRow($form, 'sb_meta', 'Page metadata (data/meta)', $sel['meta']); 152 $this->addCheckboxRow($form, 'sb_media_meta', 'Media metadata (data/media_meta)', $sel['media_meta']); 153 $this->addCheckboxRow($form, 'sb_attic', 'Page revisions (data/attic) - can be large', $sel['attic']); 154 $this->addCheckboxRow($form, 'sb_media_attic', 'Media revisions (data/media_attic)', $sel['media_attic']); 155 $this->addCheckboxRow($form, 'sb_index', 'Search index (data/index) - rebuildable', $sel['index']); 156 $form->addFieldsetClose(); 157 158 $form->addFieldsetOpen('Configuration & code'); 159 $this->addCheckboxRow($form, 'sb_conf', 'Configuration (conf/) - includes secrets', $sel['conf']); 160 $this->addCheckboxRow($form, 'sb_plugins', 'Plugins source (lib/plugins/)', $sel['plugins']); 161 $this->addCheckboxRow($form, 'sb_tpl', 'Templates source (lib/tpl/)', $sel['tpl']); 162 $form->addFieldsetClose(); 163 164 $form->addTagOpen('p'); 165 $form->addButton('sitebackup_action', 'Preview')->val('preview'); 166 $form->addHTML(' '); 167 $form->addButton('sitebackup_action', 'Download tar.gz')->val('download'); 168 $form->addTagClose('p'); 169 170 echo $form->toHTML(); 171 } 172 173 protected function addCheckboxRow(Form $form, $name, $label, $checked) 174 { 175 $form->addTagOpen('div')->attr('style', 'margin:4px 0;'); 176 $cb = $form->addCheckbox($name, ' ' . $label); 177 $cb->val('1'); 178 if ($checked) $cb->attr('checked', 'checked'); 179 $form->addTagClose('div'); 180 } 181 182 /* ----------------------------------------------------------------- * 183 * File collection 184 * ----------------------------------------------------------------- */ 185 186 protected function collectFiles() 187 { 188 global $INPUT, $conf; 189 190 // Use $conf[...] for the data dirs so relocated savedir installs still work. 191 $roots = [ 192 'sb_pages' => [$conf['datadir'], 'data/pages'], 193 'sb_media' => [$conf['mediadir'], 'data/media'], 194 'sb_meta' => [$conf['metadir'], 'data/meta'], 195 'sb_media_meta' => [$conf['mediametadir'], 'data/media_meta'], 196 'sb_attic' => [$conf['olddir'], 'data/attic'], 197 'sb_media_attic' => [$conf['mediaolddir'], 'data/media_attic'], 198 'sb_index' => [$conf['indexdir'], 'data/index'], 199 'sb_conf' => [rtrim(DOKU_CONF, '/'), 'conf'], 200 'sb_plugins' => [rtrim(DOKU_PLUGIN, '/'), 'lib/plugins'], 201 'sb_tpl' => [DOKU_INC . 'lib/tpl', 'lib/tpl'], 202 ]; 203 204 foreach ($roots as $field => $pair) { 205 if (!$INPUT->bool($field, false)) continue; 206 [$srcAbs, $archiveRel] = $pair; 207 $this->walkInto($srcAbs, $archiveRel); 208 } 209 } 210 211 protected function walkInto($srcAbs, $archiveRel) 212 { 213 if (!file_exists($srcAbs)) return; 214 215 if (is_file($srcAbs)) { 216 $this->appendFile($srcAbs, $archiveRel); 217 return; 218 } 219 220 try { 221 $it = new RecursiveIteratorIterator( 222 new RecursiveDirectoryIterator( 223 $srcAbs, 224 FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS 225 ), 226 RecursiveIteratorIterator::LEAVES_ONLY 227 ); 228 } catch (Exception $e) { 229 return; 230 } 231 232 $srcRoot = rtrim($srcAbs, '/'); 233 $rootLen = strlen($srcRoot) + 1; 234 foreach ($it as $info) { 235 try { 236 if (!$info->isFile() || !$info->isReadable()) continue; 237 $abs = $info->getPathname(); 238 $rel = str_replace('\\', '/', substr($abs, $rootLen)); 239 240 if ($this->isIgnored($archiveRel, $rel)) continue; 241 242 $this->appendFile($abs, $archiveRel . '/' . $rel); 243 } catch (Exception $e) { 244 continue; 245 } 246 } 247 } 248 249 /** 250 * Filename / path-segment ignores. Hardcoded (no config) to keep the plugin small. 251 * 252 * @param string $archiveRel e.g. "conf" or "lib/plugins" - the top-level branch 253 * @param string $rel path within that branch 254 */ 255 protected function isIgnored($archiveRel, $rel) 256 { 257 $base = basename($rel); 258 259 // Universal noise. 260 if ($base === '_dummy') return true; 261 if ($base === '.DS_Store') return true; 262 if ($base === 'Thumbs.db') return true; 263 264 // Belt-and-suspenders: never include our own scratch files even if 265 // someone pointed savedir at an unusual location. 266 if (strpos($base, self::TMP_PREFIX) === 0) return true; 267 268 // Skip VCS metadata anywhere in any branch. Local clones / checkouts 269 // can be huge and aren't part of "live" state. 270 $segments = explode('/', $rel); 271 foreach ($segments as $seg) { 272 if ($seg === '.git') return true; 273 if ($seg === '.svn') return true; 274 if ($seg === '.hg') return true; 275 } 276 277 // conf/ branch: drop *.dist / *.example / *.bak sample files. They're 278 // shipped with DokuWiki and templates, not real configuration. 279 if ($archiveRel === 'conf') { 280 if (preg_match('/\.(dist|example|bak)$/i', $base)) return true; 281 } 282 283 return false; 284 } 285 286 protected function appendFile($abs, $archiveRel) 287 { 288 $size = @filesize($abs); 289 if ($size === false) $size = 0; 290 $this->fileList[] = [$abs, $archiveRel, $size]; 291 $this->totalBytes += $size; 292 } 293 294 /* ----------------------------------------------------------------- * 295 * Preview 296 * ----------------------------------------------------------------- */ 297 298 protected function renderPreview() 299 { 300 echo '<h2>Preview</h2>'; 301 echo '<p>' . count($this->fileList) . ' files, ' 302 . hsc($this->humanBytes($this->totalBytes)) . ' uncompressed.</p>'; 303 304 $perRoot = []; 305 foreach ($this->fileList as [$abs, $rel, $size]) { 306 $parts = explode('/', $rel, 4); 307 $top = isset($parts[1]) ? ($parts[0] . '/' . $parts[1]) : $parts[0]; 308 if (!isset($perRoot[$top])) $perRoot[$top] = ['count' => 0, 'bytes' => 0]; 309 $perRoot[$top]['count']++; 310 $perRoot[$top]['bytes'] += $size; 311 } 312 ksort($perRoot); 313 314 echo '<table class="inline"><thead><tr><th>Section</th><th>Files</th><th>Size</th></tr></thead><tbody>'; 315 foreach ($perRoot as $section => $stats) { 316 echo '<tr><td><code>' . hsc($section) . '</code></td>' 317 . '<td style="text-align:right;">' . (int)$stats['count'] . '</td>' 318 . '<td style="text-align:right;">' . hsc($this->humanBytes($stats['bytes'])) . '</td></tr>'; 319 } 320 echo '</tbody></table>'; 321 echo '<p>Click <em>Download tar.gz</em> above to create and download the archive ' 322 . '(compressed size will typically be smaller).</p>'; 323 } 324 325 protected function humanBytes($bytes) 326 { 327 $units = ['B', 'KiB', 'MiB', 'GiB', 'TiB']; 328 $i = 0; 329 $n = (float)$bytes; 330 while ($n >= 1024 && $i < count($units) - 1) { 331 $n /= 1024; 332 $i++; 333 } 334 return sprintf($i === 0 ? '%d %s' : '%.2f %s', $n, $units[$i]); 335 } 336 337 /* ----------------------------------------------------------------- * 338 * Archive creation + streaming 339 * ----------------------------------------------------------------- */ 340 341 protected function streamArchive() 342 { 343 global $conf; 344 345 // Defense-in-depth: AdminPlugin framework should have blocked non-admins 346 // before we got here, but verify directly anyway. 347 if (!auth_isadmin()) { 348 msg('Site Backup: admin access required.', -1); 349 return; 350 } 351 352 if (!$this->fileList) { 353 msg('Site Backup: nothing selected.', -1); 354 return; 355 } 356 357 @set_time_limit(0); 358 @ignore_user_abort(true); 359 @ini_set('memory_limit', '256M'); 360 361 $tmpDir = $conf['tmpdir']; 362 if (!is_dir($tmpDir) || !is_writable($tmpDir)) { 363 msg('Site Backup: temp directory is not writable: ' . hsc($tmpDir), -1); 364 return; 365 } 366 367 // Build a hard-to-guess filename. 16 hex chars = 64 bits of entropy from 368 // a CSPRNG. The file also lives under data/.htaccess deny-all so even a 369 // guess wouldn't be enough. 370 $host = $_SERVER['HTTP_HOST'] ?? 'wiki'; 371 $host = preg_replace('/[^a-zA-Z0-9._-]+/', '_', $host); 372 $stamp = date('Ymd-His'); 373 $archiveDir = $host . '-backup-' . $stamp; // dir inside the tar 374 $downloadName = $archiveDir . '.tar.gz'; // browser filename 375 $tmpFile = $tmpDir . '/' . self::TMP_PREFIX . bin2hex(random_bytes(8)) . '.tar.gz'; 376 377 // Guarantee the temp file is deleted even on connection abort, fatal 378 // error, or `exit` from within the streaming loop. 379 register_shutdown_function(function () use ($tmpFile) { 380 if (is_file($tmpFile)) @unlink($tmpFile); 381 }); 382 383 $oldUmask = @umask(0077); 384 385 try { 386 $tar = new Tar(); 387 $tar->setCompression(6, Archive::COMPRESS_GZIP); 388 $tar->create($tmpFile); 389 390 // Belt-and-suspenders: explicitly chmod once created, in case the 391 // umask wasn't honored (some filesystems / wrappers ignore it). 392 @chmod($tmpFile, 0600); 393 394 foreach ($this->fileList as [$abs, $rel, $size]) { 395 try { 396 $tar->addFile($abs, $archiveDir . '/' . $rel); 397 } catch (Exception $e) { 398 // Skip individual broken files rather than failing the whole backup. 399 continue; 400 } 401 } 402 $tar->close(); 403 } catch (ArchiveIOException $e) { 404 @umask($oldUmask); 405 @unlink($tmpFile); 406 msg('Site Backup: could not create archive: ' . hsc($e->getMessage()), -1); 407 return; 408 } 409 410 @umask($oldUmask); 411 412 if (!is_file($tmpFile) || filesize($tmpFile) === 0) { 413 @unlink($tmpFile); 414 msg('Site Backup: archive was empty or could not be written.', -1); 415 return; 416 } 417 418 $size = filesize($tmpFile); 419 420 // Clear any output buffering DokuWiki / extensions may have started so 421 // headers + binary body go out cleanly. 422 while (ob_get_level() > 0) { 423 @ob_end_clean(); 424 } 425 426 header('Content-Type: application/gzip'); 427 header('Content-Disposition: attachment; filename="' . $downloadName . '"'); 428 header('Content-Length: ' . $size); 429 header('Cache-Control: no-store, no-cache, must-revalidate, private'); 430 header('Pragma: no-cache'); 431 header('X-Content-Type-Options: nosniff'); 432 433 $fp = fopen($tmpFile, 'rb'); 434 if ($fp) { 435 while (!feof($fp)) { 436 $chunk = fread($fp, 1024 * 256); 437 if ($chunk === false) break; 438 echo $chunk; 439 @flush(); 440 } 441 fclose($fp); 442 } 443 @unlink($tmpFile); 444 exit; 445 } 446 447 /** 448 * Remove leftover temp archives from prior runs that died before unlink. 449 * Anything matching our prefix older than TMP_STALE_AGE is fair game. 450 */ 451 protected function sweepStaleTempFiles() 452 { 453 global $conf; 454 $tmpDir = $conf['tmpdir'] ?? null; 455 if (!$tmpDir || !is_dir($tmpDir)) return; 456 457 $cutoff = time() - self::TMP_STALE_AGE; 458 $pattern = $tmpDir . '/' . self::TMP_PREFIX . '*'; 459 foreach ((array) @glob($pattern) as $stale) { 460 if (!is_file($stale)) continue; 461 $mtime = @filemtime($stale); 462 if ($mtime !== false && $mtime < $cutoff) { 463 @unlink($stale); 464 } 465 } 466 } 467} 468