1<?php 2/** 3 * Site Backup admin plugin for DokuWiki. 4 * 5 * Streams a tar.gz of selected wiki parts (pages, media, conf, lib/plugins, lib/tpl) 6 * to the admin's browser. The archive is built in data/tmp/ with a random filename, 7 * streamed out, and deleted immediately. Nothing persists on the server. 8 * 9 * Security model: 10 * - Admin-only: DokuWiki's AdminPlugin framework enforces auth_isadmin() before 11 * handle()/html() are invoked because forAdminOnly() returns true. A second 12 * explicit check inside streamArchive() guards against any framework bypass. 13 * - The temp archive lives in $conf['tmpdir'] (data/tmp/), which DokuWiki ships 14 * with a deny-all .htaccess; it cannot be fetched directly even if the path 15 * were known. 16 * - Filename uses 64 bits of CSPRNG randomness, file is chmod'd to 0600, and is 17 * deleted both at the natural end of streamArchive() and via a shutdown 18 * function in case the connection is aborted partway. 19 * - Stale temp files from previous runs (older than 1 hour) are swept on each 20 * invocation, so even a crash-during-stream leaves nothing for long. 21 * 22 * Treat downloaded archives as credentials: they may include conf/users.auth.php 23 * (password hashes), ACL rules, and any secrets stored in conf/local.php. 24 */ 25 26use dokuwiki\Extension\AdminPlugin; 27use dokuwiki\Form\Form; 28use splitbrain\PHPArchive\Tar; 29use splitbrain\PHPArchive\Archive; 30use splitbrain\PHPArchive\ArchiveIOException; 31 32class admin_plugin_sitebackup extends AdminPlugin 33{ 34 /** Prefix used for the temp archive filename in data/tmp/. */ 35 const TMP_PREFIX = 'sitebackup_tmp_'; 36 37 /** Max age (seconds) of leftover temp files before sweep removes them. */ 38 const TMP_STALE_AGE = 3600; 39 40 /** @var array list of [absolute path, archive-relative path, size] of files to include */ 41 protected $fileList = []; 42 43 /** @var int total uncompressed size of selected files */ 44 protected $totalBytes = 0; 45 46 public function forAdminOnly() 47 { 48 return true; 49 } 50 51 public function getMenuSort() 52 { 53 return 1000; 54 } 55 56 public function getMenuText($language) 57 { 58 return 'Site Backup'; 59 } 60 61 /** 62 * Dispatch based on the submitted action. 63 * Valid actions: "preview" (build file list, render summary table), 64 * "download" (build archive, stream as tar.gz). 65 */ 66 public function handle() 67 { 68 global $INPUT; 69 70 // Sweep stale temp files from previous runs every time we enter the page. 71 $this->sweepStaleTempFiles(); 72 73 if (!$INPUT->has('sitebackup_action')) return; 74 if (!checkSecurityToken()) return; 75 76 $action = $INPUT->str('sitebackup_action'); 77 if ($action !== 'preview' && $action !== 'download') return; 78 79 // Download MUST be POST. Refuse GET / HEAD / etc. so a stray link, browser 80 // prefetch, or curious co-admin pasting a URL can't trigger a backup. 81 if ($action === 'download' && ($_SERVER['REQUEST_METHOD'] ?? 'GET') !== 'POST') { 82 msg('Site Backup: download must be submitted via POST.', -1); 83 return; 84 } 85 86 $this->collectFiles(); 87 88 if ($action === 'download') { 89 $this->streamArchive(); 90 // streamArchive() exits on success. If it returns, an error was shown 91 // via msg() and we fall through to html() so the user sees the form. 92 } 93 } 94 95 public function html() 96 { 97 echo '<h1>Site Backup</h1>'; 98 echo '<p>Select what to include, click <em>Preview</em> to see the file list and total size, ' 99 . 'then <em>Download tar.gz</em> to receive the archive in your browser.</p>'; 100 echo '<p style="background:#fff3cd;border:1px solid #ffeeba;padding:8px;border-radius:4px;">' 101 . '<strong>Sensitive content warning.</strong> The archive may contain password hashes ' 102 . '(<code>conf/users.auth.php</code>), ACL rules, and any secrets stored in ' 103 . '<code>conf/local.php</code> (DB credentials, SMTP passwords, API keys). ' 104 . 'Treat the download like a credential.' 105 . '</p>'; 106 107 $this->renderForm(); 108 109 if ($this->fileList) { 110 $this->renderPreview(); 111 } 112 } 113 114 /* ----------------------------------------------------------------- * 115 * Form 116 * ----------------------------------------------------------------- */ 117 118 protected function renderForm() 119 { 120 global $INPUT; 121 122 $hasSubmitted = $INPUT->has('sitebackup_action'); 123 $defaults = [ 124 'pages' => true, 125 'media' => true, 126 'meta' => true, 127 'media_meta' => true, 128 'attic' => false, 129 'media_attic' => false, 130 'index' => false, 131 'conf' => true, 132 'plugins' => true, 133 'tpl' => true, 134 ]; 135 $sel = []; 136 foreach ($defaults as $k => $def) { 137 $sel[$k] = $hasSubmitted ? $INPUT->bool('sb_' . $k, false) : $def; 138 } 139 140 $form = new Form(['method' => 'POST', 'id' => 'sitebackup_form']); 141 $form->setHiddenField('do', 'admin'); 142 $form->setHiddenField('page', 'sitebackup'); 143 144 $form->addFieldsetOpen('Wiki content'); 145 $this->addCheckboxRow($form, 'sb_pages', 'Pages (data/pages)', $sel['pages']); 146 $this->addCheckboxRow($form, 'sb_media', 'Media files (data/media)', $sel['media']); 147 $this->addCheckboxRow($form, 'sb_meta', 'Page metadata (data/meta)', $sel['meta']); 148 $this->addCheckboxRow($form, 'sb_media_meta', 'Media metadata (data/media_meta)', $sel['media_meta']); 149 $this->addCheckboxRow($form, 'sb_attic', 'Page revisions (data/attic) - can be large', $sel['attic']); 150 $this->addCheckboxRow($form, 'sb_media_attic', 'Media revisions (data/media_attic)', $sel['media_attic']); 151 $this->addCheckboxRow($form, 'sb_index', 'Search index (data/index) - rebuildable', $sel['index']); 152 $form->addFieldsetClose(); 153 154 $form->addFieldsetOpen('Configuration & code'); 155 $this->addCheckboxRow($form, 'sb_conf', 'Configuration (conf/) - includes secrets', $sel['conf']); 156 $this->addCheckboxRow($form, 'sb_plugins', 'Plugins source (lib/plugins/)', $sel['plugins']); 157 $this->addCheckboxRow($form, 'sb_tpl', 'Templates source (lib/tpl/)', $sel['tpl']); 158 $form->addFieldsetClose(); 159 160 $form->addTagOpen('p'); 161 $form->addButton('sitebackup_action', 'Preview')->val('preview'); 162 $form->addHTML(' '); 163 $form->addButton('sitebackup_action', 'Download tar.gz')->val('download'); 164 $form->addTagClose('p'); 165 166 echo $form->toHTML(); 167 } 168 169 protected function addCheckboxRow(Form $form, $name, $label, $checked) 170 { 171 $form->addTagOpen('div')->attr('style', 'margin:4px 0;'); 172 $cb = $form->addCheckbox($name, ' ' . $label); 173 $cb->val('1'); 174 if ($checked) $cb->attr('checked', 'checked'); 175 $form->addTagClose('div'); 176 } 177 178 /* ----------------------------------------------------------------- * 179 * File collection 180 * ----------------------------------------------------------------- */ 181 182 protected function collectFiles() 183 { 184 global $INPUT, $conf; 185 186 // Use $conf[...] for the data dirs so relocated savedir installs still work. 187 $roots = [ 188 'sb_pages' => [$conf['datadir'], 'data/pages'], 189 'sb_media' => [$conf['mediadir'], 'data/media'], 190 'sb_meta' => [$conf['metadir'], 'data/meta'], 191 'sb_media_meta' => [$conf['mediametadir'], 'data/media_meta'], 192 'sb_attic' => [$conf['olddir'], 'data/attic'], 193 'sb_media_attic' => [$conf['mediaolddir'], 'data/media_attic'], 194 'sb_index' => [$conf['indexdir'], 'data/index'], 195 'sb_conf' => [rtrim(DOKU_CONF, '/'), 'conf'], 196 'sb_plugins' => [rtrim(DOKU_PLUGIN, '/'), 'lib/plugins'], 197 'sb_tpl' => [DOKU_INC . 'lib/tpl', 'lib/tpl'], 198 ]; 199 200 foreach ($roots as $field => $pair) { 201 if (!$INPUT->bool($field, false)) continue; 202 [$srcAbs, $archiveRel] = $pair; 203 $this->walkInto($srcAbs, $archiveRel); 204 } 205 } 206 207 protected function walkInto($srcAbs, $archiveRel) 208 { 209 if (!file_exists($srcAbs)) return; 210 211 if (is_file($srcAbs)) { 212 $this->appendFile($srcAbs, $archiveRel); 213 return; 214 } 215 216 try { 217 $it = new RecursiveIteratorIterator( 218 new RecursiveDirectoryIterator( 219 $srcAbs, 220 FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS 221 ), 222 RecursiveIteratorIterator::LEAVES_ONLY 223 ); 224 } catch (Exception $e) { 225 return; 226 } 227 228 $srcRoot = rtrim($srcAbs, '/'); 229 $rootLen = strlen($srcRoot) + 1; 230 foreach ($it as $info) { 231 try { 232 if (!$info->isFile() || !$info->isReadable()) continue; 233 $abs = $info->getPathname(); 234 $rel = str_replace('\\', '/', substr($abs, $rootLen)); 235 236 if ($this->isIgnored($archiveRel, $rel)) continue; 237 238 $this->appendFile($abs, $archiveRel . '/' . $rel); 239 } catch (Exception $e) { 240 continue; 241 } 242 } 243 } 244 245 /** 246 * Filename / path-segment ignores. Hardcoded (no config) to keep the plugin small. 247 * 248 * @param string $archiveRel e.g. "conf" or "lib/plugins" - the top-level branch 249 * @param string $rel path within that branch 250 */ 251 protected function isIgnored($archiveRel, $rel) 252 { 253 $base = basename($rel); 254 255 // Universal noise. 256 if ($base === '_dummy') return true; 257 if ($base === '.DS_Store') return true; 258 if ($base === 'Thumbs.db') return true; 259 260 // Belt-and-suspenders: never include our own scratch files even if 261 // someone pointed savedir at an unusual location. 262 if (strpos($base, self::TMP_PREFIX) === 0) return true; 263 264 // Skip VCS metadata anywhere in any branch. Local clones / checkouts 265 // can be huge and aren't part of "live" state. 266 $segments = explode('/', $rel); 267 foreach ($segments as $seg) { 268 if ($seg === '.git') return true; 269 if ($seg === '.svn') return true; 270 if ($seg === '.hg') return true; 271 } 272 273 // conf/ branch: drop *.dist / *.example / *.bak sample files. They're 274 // shipped with DokuWiki and templates, not real configuration. 275 if ($archiveRel === 'conf') { 276 if (preg_match('/\.(dist|example|bak)$/i', $base)) return true; 277 } 278 279 return false; 280 } 281 282 protected function appendFile($abs, $archiveRel) 283 { 284 $size = @filesize($abs); 285 if ($size === false) $size = 0; 286 $this->fileList[] = [$abs, $archiveRel, $size]; 287 $this->totalBytes += $size; 288 } 289 290 /* ----------------------------------------------------------------- * 291 * Preview 292 * ----------------------------------------------------------------- */ 293 294 protected function renderPreview() 295 { 296 echo '<h2>Preview</h2>'; 297 echo '<p>' . count($this->fileList) . ' files, ' 298 . hsc($this->humanBytes($this->totalBytes)) . ' uncompressed.</p>'; 299 300 $perRoot = []; 301 foreach ($this->fileList as [$abs, $rel, $size]) { 302 $parts = explode('/', $rel, 4); 303 $top = isset($parts[1]) ? ($parts[0] . '/' . $parts[1]) : $parts[0]; 304 if (!isset($perRoot[$top])) $perRoot[$top] = ['count' => 0, 'bytes' => 0]; 305 $perRoot[$top]['count']++; 306 $perRoot[$top]['bytes'] += $size; 307 } 308 ksort($perRoot); 309 310 echo '<table class="inline"><thead><tr><th>Section</th><th>Files</th><th>Size</th></tr></thead><tbody>'; 311 foreach ($perRoot as $section => $stats) { 312 echo '<tr><td><code>' . hsc($section) . '</code></td>' 313 . '<td style="text-align:right;">' . (int)$stats['count'] . '</td>' 314 . '<td style="text-align:right;">' . hsc($this->humanBytes($stats['bytes'])) . '</td></tr>'; 315 } 316 echo '</tbody></table>'; 317 echo '<p>Click <em>Download tar.gz</em> above to create and download the archive ' 318 . '(compressed size will typically be smaller).</p>'; 319 } 320 321 protected function humanBytes($bytes) 322 { 323 $units = ['B', 'KiB', 'MiB', 'GiB', 'TiB']; 324 $i = 0; 325 $n = (float)$bytes; 326 while ($n >= 1024 && $i < count($units) - 1) { 327 $n /= 1024; 328 $i++; 329 } 330 return sprintf($i === 0 ? '%d %s' : '%.2f %s', $n, $units[$i]); 331 } 332 333 /* ----------------------------------------------------------------- * 334 * Archive creation + streaming 335 * ----------------------------------------------------------------- */ 336 337 protected function streamArchive() 338 { 339 global $conf; 340 341 // Defense-in-depth: AdminPlugin framework should have blocked non-admins 342 // before we got here, but verify directly anyway. 343 if (!auth_isadmin()) { 344 msg('Site Backup: admin access required.', -1); 345 return; 346 } 347 348 if (!$this->fileList) { 349 msg('Site Backup: nothing selected.', -1); 350 return; 351 } 352 353 @set_time_limit(0); 354 @ignore_user_abort(true); 355 @ini_set('memory_limit', '256M'); 356 357 $tmpDir = $conf['tmpdir']; 358 if (!is_dir($tmpDir) || !is_writable($tmpDir)) { 359 msg('Site Backup: temp directory is not writable: ' . hsc($tmpDir), -1); 360 return; 361 } 362 363 // Build a hard-to-guess filename. 16 hex chars = 64 bits of entropy from 364 // a CSPRNG. The file also lives under data/.htaccess deny-all so even a 365 // guess wouldn't be enough. 366 $host = $_SERVER['HTTP_HOST'] ?? 'wiki'; 367 $host = preg_replace('/[^a-zA-Z0-9._-]+/', '_', $host); 368 $stamp = date('Ymd-His'); 369 $archiveDir = $host . '-backup-' . $stamp; // dir inside the tar 370 $downloadName = $archiveDir . '.tar.gz'; // browser filename 371 $tmpFile = $tmpDir . '/' . self::TMP_PREFIX . bin2hex(random_bytes(8)) . '.tar.gz'; 372 373 // Guarantee the temp file is deleted even on connection abort, fatal 374 // error, or `exit` from within the streaming loop. 375 register_shutdown_function(function () use ($tmpFile) { 376 if (is_file($tmpFile)) @unlink($tmpFile); 377 }); 378 379 $oldUmask = @umask(0077); 380 381 try { 382 $tar = new Tar(); 383 $tar->setCompression(6, Archive::COMPRESS_GZIP); 384 $tar->create($tmpFile); 385 386 // Belt-and-suspenders: explicitly chmod once created, in case the 387 // umask wasn't honored (some filesystems / wrappers ignore it). 388 @chmod($tmpFile, 0600); 389 390 foreach ($this->fileList as [$abs, $rel, $size]) { 391 try { 392 $tar->addFile($abs, $archiveDir . '/' . $rel); 393 } catch (Exception $e) { 394 // Skip individual broken files rather than failing the whole backup. 395 continue; 396 } 397 } 398 $tar->close(); 399 } catch (ArchiveIOException $e) { 400 @umask($oldUmask); 401 @unlink($tmpFile); 402 msg('Site Backup: could not create archive: ' . hsc($e->getMessage()), -1); 403 return; 404 } 405 406 @umask($oldUmask); 407 408 if (!is_file($tmpFile) || filesize($tmpFile) === 0) { 409 @unlink($tmpFile); 410 msg('Site Backup: archive was empty or could not be written.', -1); 411 return; 412 } 413 414 $size = filesize($tmpFile); 415 416 // Clear any output buffering DokuWiki / extensions may have started so 417 // headers + binary body go out cleanly. 418 while (ob_get_level() > 0) { 419 @ob_end_clean(); 420 } 421 422 header('Content-Type: application/gzip'); 423 header('Content-Disposition: attachment; filename="' . $downloadName . '"'); 424 header('Content-Length: ' . $size); 425 header('Cache-Control: no-store, no-cache, must-revalidate, private'); 426 header('Pragma: no-cache'); 427 header('X-Content-Type-Options: nosniff'); 428 429 $fp = fopen($tmpFile, 'rb'); 430 if ($fp) { 431 while (!feof($fp)) { 432 $chunk = fread($fp, 1024 * 256); 433 if ($chunk === false) break; 434 echo $chunk; 435 @flush(); 436 } 437 fclose($fp); 438 } 439 @unlink($tmpFile); 440 exit; 441 } 442 443 /** 444 * Remove leftover temp archives from prior runs that died before unlink. 445 * Anything matching our prefix older than TMP_STALE_AGE is fair game. 446 */ 447 protected function sweepStaleTempFiles() 448 { 449 global $conf; 450 $tmpDir = $conf['tmpdir'] ?? null; 451 if (!$tmpDir || !is_dir($tmpDir)) return; 452 453 $cutoff = time() - self::TMP_STALE_AGE; 454 $pattern = $tmpDir . '/' . self::TMP_PREFIX . '*'; 455 foreach ((array) @glob($pattern) as $stale) { 456 if (!is_file($stale)) continue; 457 $mtime = @filemtime($stale); 458 if ($mtime !== false && $mtime < $cutoff) { 459 @unlink($stale); 460 } 461 } 462 } 463} 464