1<?php 2if (!defined('DOKU_INC')) die(); 3 4/** 5 * Hide IP — admin component. 6 * 7 * Admin-only page that walks the historical IP-bearing files DokuWiki has 8 * accumulated and rewrites every IP field with the placeholder used by the 9 * action component. Scope is intentionally narrow: 10 * 11 * - $conf['metadir']/**.changes page changelogs (per-page + master) 12 * - $conf['mediametadir']/**.changes media changelogs (per-media + master) 13 * - $conf['metadir']/**.meta page metadata (last_change.ip) 14 * 15 * NOT touched (per the project's explicit scope): 16 * - data/attic/, data/media_attic/ historical .gz revision archives 17 * - data/cache/, data/tmp/, data/log/ ephemeral / regenerated 18 * 19 * Authorship (user field) and timestamps (date field) are preserved; only 20 * the IP field is rewritten. File mtimes are preserved across the rewrite. 21 * 22 * Atomicity: every write goes to a sibling tmp file with a random suffix and 23 * is then rename()d into place. rename() is atomic on a single filesystem, 24 * so a concurrent reader either sees the old file or the new file. 25 * 26 * Idempotent: running scrub twice is a no-op on lines that already hold the 27 * placeholder. 28 */ 29 30use dokuwiki\Extension\AdminPlugin; 31use dokuwiki\Form\Form; 32 33class admin_plugin_hideip extends AdminPlugin 34{ 35 /** Mirror of action_plugin_hideip::PLACEHOLDER_IP. Kept inline so this 36 * admin component can run without the action component being loaded. */ 37 public const PLACEHOLDER_IP = '0.0.0.0'; 38 39 /** Random suffix length for tmp files; .hideip_tmp_<8 hex>. */ 40 public const TMP_SUFFIX_BYTES = 4; 41 42 /** 43 * @return bool 44 */ 45 public function forAdminOnly() 46 { 47 return true; 48 } 49 50 /** 51 * @return int 52 */ 53 public function getMenuSort() 54 { 55 return 1000; 56 } 57 58 /** 59 * @param string $language 60 * @return string 61 */ 62 public function getMenuText($language) 63 { 64 return $this->getLang('menu'); 65 } 66 67 /* ----------------------------------------------------------------- * 68 * Dispatch 69 * ----------------------------------------------------------------- */ 70 71 /** @var array|null per-section preview results: [section => [files, ipLines]] */ 72 protected $preview = null; 73 74 /** @var array|null per-section scrub results: [section => [files, ipLines, errors]] */ 75 protected $scrub = null; 76 77 /** 78 * Process form submissions (preview and scrub actions). 79 * 80 * @return void 81 */ 82 public function handle() 83 { 84 global $INPUT; 85 86 if (!$INPUT->has('hideip_action')) return; 87 if (!checkSecurityToken()) return; 88 89 $action = $INPUT->str('hideip_action'); 90 if ($action !== 'preview' && $action !== 'scrub') return; 91 92 if ($action === 'scrub' && $INPUT->server->str('REQUEST_METHOD', 'GET') !== 'POST') { 93 msg('Hide IP: scrub must be submitted via POST.', -1); 94 return; 95 } 96 97 if ($action === 'preview') { 98 $this->preview = $this->runScan(false); 99 } else { 100 // Defense-in-depth admin re-check (framework already gates via 101 // forAdminOnly + isAccessibleByCurrentUser, but the scrub mutates 102 // production data; one more check is cheap). 103 if (!auth_isadmin()) { 104 msg('Hide IP: admin access required.', -1); 105 return; 106 } 107 $this->scrub = $this->runScan(true); 108 } 109 } 110 111 /** 112 * Render the admin page. 113 * 114 * @return void 115 */ 116 public function html() 117 { 118 echo '<h1>Hide IP</h1>'; 119 echo '<p>This page rewrites historical IP addresses on disk to ' 120 . '<code>' . hsc(self::PLACEHOLDER_IP) . '</code>.<br>New edits are already ' 121 . 'anonymised by the action component of this plugin (loads on every request).<br>' 122 . 'Timestamps and authorship are preserved.</p>'; 123 124 echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">' 125 . '<strong>This action is destructive.</strong><br>Real IP addresses recorded in ' 126 . 'page and media changelogs and in page metadata will be replaced and cannot ' 127 . 'be recovered from these files.<br>The <code>data/attic/</code> revision archives are ' 128 . 'not modified — if your wiki retains those, IPs from saved revisions remain ' 129 . 'inside them.<br>Take a backup with the Site Backup plugin first if you want ' 130 . 'a recovery point.' 131 . '</p>'; 132 133 $this->renderForm(); 134 135 if ($this->preview !== null) { 136 $this->renderResults('Preview', $this->preview, false); 137 } 138 if ($this->scrub !== null) { 139 $this->renderResults('Scrub complete', $this->scrub, true); 140 } 141 } 142 143 /* ----------------------------------------------------------------- * 144 * Form 145 * ----------------------------------------------------------------- */ 146 147 /** 148 * Render the preview/scrub action form. 149 * 150 * @return void 151 */ 152 protected function renderForm() 153 { 154 $form = new Form(['method' => 'POST', 'id' => 'hideip_form']); 155 $form->setHiddenField('do', 'admin'); 156 $form->setHiddenField('page', 'hideip'); 157 158 $form->addTagOpen('p'); 159 $form->addButton('hideip_action', 'Preview (count only)')->val('preview'); 160 $form->addHTML(' '); 161 $form->addButton('hideip_action', 'Scrub now')->val('scrub'); 162 $form->addTagClose('p'); 163 164 echo $form->toHTML(); 165 } 166 167 /* ----------------------------------------------------------------- * 168 * Scan/scrub orchestrator 169 * ----------------------------------------------------------------- */ 170 171 /** 172 * Walk all target files and either count IP-bearing entries or rewrite them. 173 * 174 * @param bool $mutate false = preview only, true = rewrite on disk 175 * @return array[] [section_label => [files, lines, errors]] 176 */ 177 protected function runScan($mutate) 178 { 179 global $conf; 180 181 if (function_exists('set_time_limit')) set_time_limit(0); 182 if (function_exists('ignore_user_abort')) ignore_user_abort(true); 183 184 $sections = [ 185 'Page changelogs (data/meta/*.changes)' => [ 186 'root' => $conf['metadir'], 187 'kind' => 'changes', 188 ], 189 'Media changelogs (data/media_meta/*.changes)' => [ 190 'root' => $conf['mediametadir'], 191 'kind' => 'changes', 192 ], 193 'Page metadata (data/meta/*.meta)' => [ 194 'root' => $conf['metadir'], 195 'kind' => 'meta', 196 ], 197 ]; 198 199 $results = []; 200 foreach ($sections as $label => $cfg) { 201 $results[$label] = $this->walkSection($cfg['root'], $cfg['kind'], $mutate); 202 } 203 return $results; 204 } 205 206 /** 207 * Walk one section root, dispatching each candidate file to the right scrubber. 208 * 209 * @return array{files:int,lines:int,errors:array} 210 */ 211 protected function walkSection($root, $kind, $mutate) 212 { 213 $stats = ['files' => 0, 'lines' => 0, 'errors' => []]; 214 215 if (!is_dir($root)) return $stats; 216 217 try { 218 $it = new RecursiveIteratorIterator( 219 new RecursiveDirectoryIterator( 220 $root, 221 FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS 222 ), 223 RecursiveIteratorIterator::LEAVES_ONLY 224 ); 225 } catch (Exception $e) { 226 $stats['errors'][] = $root . ': ' . $e->getMessage(); 227 return $stats; 228 } 229 230 foreach ($it as $info) { 231 try { 232 if (!$info->isFile() || !$info->isReadable()) continue; 233 $path = $info->getPathname(); 234 $base = basename($path); 235 236 // Filter by extension matching the section we're walking. 237 if ($kind === 'changes' && !str_ends_with($base, '.changes')) continue; 238 if ($kind === 'meta' && !str_ends_with($base, '.meta')) continue; 239 240 $count = ($kind === 'changes') 241 ? $this->processChangelog($path, $mutate) 242 : $this->processMetaFile($path, $mutate); 243 244 if ($count > 0) { 245 $stats['files']++; 246 $stats['lines'] += $count; 247 } 248 } catch (Exception $e) { 249 $stats['errors'][] = ($path ?? '?') . ': ' . $e->getMessage(); 250 } 251 } 252 return $stats; 253 } 254 255 /* ----------------------------------------------------------------- * 256 * Changelog (.changes) scrubber — TSV format 257 * ----------------------------------------------------------------- */ 258 259 /** 260 * Process one .changes file. 261 * 262 * Line format (DokuWiki convention, tab-separated): 263 * timestamp \t ip \t type \t pageid \t user \t summary \t extra \t sizechange \n 264 * 265 * The IP field is field index 1. We rewrite it to PLACEHOLDER_IP unless it 266 * already equals the placeholder (idempotent) or is empty (already scrubbed 267 * by an older tool like the GDPR plugin which blanked it). 268 * 269 * @param string $path 270 * @param bool $mutate false = count lines that would change, true = rewrite 271 * @return int number of lines counted/changed 272 */ 273 protected function processChangelog($path, $mutate) 274 { 275 $content = file_get_contents($path); 276 if ($content === false) { 277 throw new RuntimeException('cannot read'); 278 } 279 280 // Use \n split so we can rejoin without modification. Trailing newline 281 // (if any) becomes an empty final element we filter when rebuilding. 282 $lines = explode("\n", $content); 283 $hadTrailingNewline = ($content !== '' && substr($content, -1) === "\n"); 284 if ($hadTrailingNewline) array_pop($lines); // drop the empty tail 285 286 $changed = 0; 287 foreach ($lines as $i => $line) { 288 if ($line === '') continue; // skip blank lines in-place 289 $fields = explode("\t", $line); 290 if (count($fields) < 2) continue; // malformed; leave alone 291 292 $ip = $fields[1]; 293 if ($ip === self::PLACEHOLDER_IP) continue; // already scrubbed 294 if (trim($ip) === '') continue; // already blanked (GDPR-style) 295 296 $fields[1] = self::PLACEHOLDER_IP; 297 $lines[$i] = implode("\t", $fields); 298 $changed++; 299 } 300 301 if ($changed === 0) return 0; 302 if (!$mutate) return $changed; 303 304 $newContent = implode("\n", $lines); 305 if ($hadTrailingNewline) $newContent .= "\n"; 306 307 $this->atomicWrite($path, $newContent); 308 return $changed; 309 } 310 311 /* ----------------------------------------------------------------- * 312 * Page metadata (.meta) scrubber — PHP serialize format 313 * ----------------------------------------------------------------- */ 314 315 /** 316 * Process one .meta file. 317 * 318 * .meta is a serialize()d ['current' => [...], 'persistent' => [...]] 319 * structure (see inc/parserutils.php::p_save_metadata). The IP can live 320 * under last_change.ip in either branch. 321 * 322 * @param string $path 323 * @param bool $mutate 324 * @return int number of ip slots changed (0..2 per file) 325 */ 326 protected function processMetaFile($path, $mutate) 327 { 328 $raw = file_get_contents($path); 329 if ($raw === false) throw new RuntimeException('cannot read'); 330 if ($raw === '') return 0; 331 332 $meta = unserialize($raw, ['allowed_classes' => false]); 333 if (!is_array($meta)) return 0; // corrupt or non-meta - leave alone 334 335 $changed = 0; 336 foreach (['current', 'persistent'] as $branch) { 337 if ( 338 isset($meta[$branch]['last_change']['ip']) 339 && $meta[$branch]['last_change']['ip'] !== self::PLACEHOLDER_IP 340 ) { 341 $meta[$branch]['last_change']['ip'] = self::PLACEHOLDER_IP; 342 $changed++; 343 } 344 } 345 346 if ($changed === 0) return 0; 347 if (!$mutate) return $changed; 348 349 $this->atomicWrite($path, serialize($meta)); 350 return $changed; 351 } 352 353 /* ----------------------------------------------------------------- * 354 * Safe write helper 355 * ----------------------------------------------------------------- */ 356 357 /** 358 * Write $content to $path atomically, preserving the original mtime. 359 * 360 * @throws RuntimeException on any unrecoverable failure 361 */ 362 protected function atomicWrite($path, $content) 363 { 364 $origMtime = filemtime($path); 365 $tmp = $path . '.hideip_tmp_' . bin2hex(random_bytes(self::TMP_SUFFIX_BYTES)); 366 367 $ok = file_put_contents($tmp, $content, LOCK_EX); 368 if ($ok === false) { 369 @unlink($tmp); 370 throw new RuntimeException('failed to write temp file'); 371 } 372 373 // Copy permissions from the original so the rename doesn't change them. 374 $origPerms = fileperms($path); 375 if ($origPerms !== false) chmod($tmp, $origPerms & 0777); 376 377 if (!rename($tmp, $path)) { 378 @unlink($tmp); 379 throw new RuntimeException('atomic rename failed'); 380 } 381 382 if ($origMtime !== false) touch($path, $origMtime); 383 } 384 385 /* ----------------------------------------------------------------- * 386 * Presentation 387 * ----------------------------------------------------------------- */ 388 389 /** 390 * Render the results table for a preview or scrub run. 391 * 392 * @param string $heading 393 * @param array[] $results [section_label => [files, lines, errors]] 394 * @param bool $wasScrub 395 * @return void 396 */ 397 protected function renderResults($heading, array $results, $wasScrub) 398 { 399 echo '<h2>' . hsc($heading) . '</h2>'; 400 401 $totalFiles = 0; 402 $totalLines = 0; 403 $totalErrors = 0; 404 foreach ($results as $stats) { 405 $totalFiles += $stats['files']; 406 $totalLines += $stats['lines']; 407 $totalErrors += count($stats['errors']); 408 } 409 410 if ($wasScrub) { 411 echo '<p><strong>Done.</strong> Rewrote ' . (int)$totalLines 412 . ' IP slot(s) across ' . (int)$totalFiles . ' file(s).</p>'; 413 } else { 414 echo '<p>Would rewrite ' . (int)$totalLines . ' IP slot(s) across ' 415 . (int)$totalFiles . ' file(s).</p>'; 416 } 417 418 echo '<table class="inline"><thead><tr>' 419 . '<th>Section</th>' 420 . '<th>Files affected</th>' 421 . '<th>IP slots ' . ($wasScrub ? 'rewritten' : 'to rewrite') . '</th>' 422 . '<th>Errors</th>' 423 . '</tr></thead><tbody>'; 424 foreach ($results as $label => $stats) { 425 echo '<tr>' 426 . '<td>' . hsc($label) . '</td>' 427 . '<td style="text-align:right;">' . (int)$stats['files'] . '</td>' 428 . '<td style="text-align:right;">' . (int)$stats['lines'] . '</td>' 429 . '<td style="text-align:right;">' . count($stats['errors']) . '</td>' 430 . '</tr>'; 431 } 432 echo '</tbody></table>'; 433 434 if ($totalErrors > 0) { 435 echo '<h3>Errors</h3><ul>'; 436 foreach ($results as $stats) { 437 foreach ($stats['errors'] as $err) { 438 echo '<li><code>' . hsc($err) . '</code></li>'; 439 } 440 } 441 echo '</ul>'; 442 } 443 } 444} 445