1<?php 2/** 3 * Hide IP — admin component. 4 * 5 * Admin-only page that walks the historical IP-bearing files DokuWiki has 6 * accumulated and rewrites every IP field with the placeholder used by the 7 * action component. Scope is intentionally narrow: 8 * 9 * - $conf['metadir']/**.changes page changelogs (per-page + master) 10 * - $conf['mediametadir']/**.changes media changelogs (per-media + master) 11 * - $conf['metadir']/**.meta page metadata (last_change.ip) 12 * 13 * NOT touched (per the project's explicit scope): 14 * - data/attic/, data/media_attic/ historical .gz revision archives 15 * - data/cache/, data/tmp/, data/log/ ephemeral / regenerated 16 * 17 * Authorship (user field) and timestamps (date field) are preserved; only 18 * the IP field is rewritten. File mtimes are preserved across the rewrite. 19 * 20 * Atomicity: every write goes to a sibling tmp file with a random suffix and 21 * is then rename()d into place. rename() is atomic on a single filesystem, 22 * so a concurrent reader either sees the old file or the new file. 23 * 24 * Idempotent: running scrub twice is a no-op on lines that already hold the 25 * placeholder. 26 */ 27 28use dokuwiki\Extension\AdminPlugin; 29use dokuwiki\Form\Form; 30 31class admin_plugin_hideip extends AdminPlugin 32{ 33 /** Mirror of action_plugin_hideip::PLACEHOLDER_IP. Kept inline so this 34 * admin component can run without the action component being loaded. */ 35 const PLACEHOLDER_IP = '0.0.0.0'; 36 37 /** Random suffix length for tmp files; .tmp_<8 hex>. */ 38 const TMP_SUFFIX_BYTES = 4; 39 40 public function forAdminOnly() 41 { 42 return true; 43 } 44 45 public function getMenuSort() 46 { 47 return 1010; // sit just after sitebackup (1000) 48 } 49 50 public function getMenuText($language) 51 { 52 return 'Hide IP'; 53 } 54 55 /* ----------------------------------------------------------------- * 56 * Dispatch 57 * ----------------------------------------------------------------- */ 58 59 /** @var array|null per-section preview results: [section => [files, ipLines]] */ 60 protected $preview = null; 61 62 /** @var array|null per-section scrub results: [section => [files, ipLines, errors]] */ 63 protected $scrub = null; 64 65 public function handle() 66 { 67 global $INPUT; 68 69 if (!$INPUT->has('hideip_action')) return; 70 if (!checkSecurityToken()) return; 71 72 $action = $INPUT->str('hideip_action'); 73 if ($action !== 'preview' && $action !== 'scrub') return; 74 75 if ($action === 'scrub' && ($_SERVER['REQUEST_METHOD'] ?? 'GET') !== 'POST') { 76 msg('Hide IP: scrub must be submitted via POST.', -1); 77 return; 78 } 79 80 if ($action === 'preview') { 81 $this->preview = $this->runScan(false); 82 } else { 83 // Defense-in-depth admin re-check (framework already gates via 84 // forAdminOnly + isAccessibleByCurrentUser, but the scrub mutates 85 // production data; one more check is cheap). 86 if (!auth_isadmin()) { 87 msg('Hide IP: admin access required.', -1); 88 return; 89 } 90 $this->scrub = $this->runScan(true); 91 } 92 } 93 94 public function html() 95 { 96 echo '<h1>Hide IP</h1>'; 97 echo '<p>This page rewrites historical IP addresses on disk to ' 98 . '<code>' . hsc(self::PLACEHOLDER_IP) . '</code>.<br>New edits are already ' 99 . 'anonymised by the action component of this plugin (loads on every request).<br>' 100 . 'Timestamps and authorship are preserved.</p>'; 101 102 echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">' 103 . '<strong>This action is destructive.</strong><br>Real IP addresses recorded in ' 104 . 'page and media changelogs and in page metadata will be replaced and cannot ' 105 . 'be recovered from these files.<br>The <code>data/attic/</code> revision archives are ' 106 . 'not modified — if your wiki retains those, IPs from saved revisions remain ' 107 . 'inside them.<br>Take a backup with the Site Backup plugin first if you want ' 108 . 'a recovery point.' 109 . '</p>'; 110 111 $this->renderForm(); 112 113 if ($this->preview !== null) { 114 $this->renderResults('Preview', $this->preview, false); 115 } 116 if ($this->scrub !== null) { 117 $this->renderResults('Scrub complete', $this->scrub, true); 118 } 119 } 120 121 /* ----------------------------------------------------------------- * 122 * Form 123 * ----------------------------------------------------------------- */ 124 125 protected function renderForm() 126 { 127 $form = new Form(['method' => 'POST', 'id' => 'hideip_form']); 128 $form->setHiddenField('do', 'admin'); 129 $form->setHiddenField('page', 'hideip'); 130 131 $form->addTagOpen('p'); 132 $form->addButton('hideip_action', 'Preview (count only)')->val('preview'); 133 $form->addHTML(' '); 134 $form->addButton('hideip_action', 'Scrub now')->val('scrub'); 135 $form->addTagClose('p'); 136 137 echo $form->toHTML(); 138 } 139 140 /* ----------------------------------------------------------------- * 141 * Scan/scrub orchestrator 142 * ----------------------------------------------------------------- */ 143 144 /** 145 * Walk all target files and either count IP-bearing entries or rewrite them. 146 * 147 * @param bool $mutate false = preview only, true = rewrite on disk 148 * @return array[] [section_label => [files, lines, errors]] 149 */ 150 protected function runScan($mutate) 151 { 152 global $conf; 153 154 @set_time_limit(0); 155 @ignore_user_abort(true); 156 157 $sections = [ 158 'Page changelogs (data/meta/*.changes)' => [ 159 'root' => $conf['metadir'], 160 'kind' => 'changes', 161 ], 162 'Media changelogs (data/media_meta/*.changes)' => [ 163 'root' => $conf['mediametadir'], 164 'kind' => 'changes', 165 ], 166 'Page metadata (data/meta/*.meta)' => [ 167 'root' => $conf['metadir'], 168 'kind' => 'meta', 169 ], 170 ]; 171 172 $results = []; 173 foreach ($sections as $label => $cfg) { 174 $results[$label] = $this->walkSection($cfg['root'], $cfg['kind'], $mutate); 175 } 176 return $results; 177 } 178 179 /** 180 * Walk one section root, dispatching each candidate file to the right scrubber. 181 * 182 * @return array{files:int,lines:int,errors:array} 183 */ 184 protected function walkSection($root, $kind, $mutate) 185 { 186 $stats = ['files' => 0, 'lines' => 0, 'errors' => []]; 187 188 if (!is_dir($root)) return $stats; 189 190 try { 191 $it = new RecursiveIteratorIterator( 192 new RecursiveDirectoryIterator( 193 $root, 194 FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS 195 ), 196 RecursiveIteratorIterator::LEAVES_ONLY 197 ); 198 } catch (Exception $e) { 199 $stats['errors'][] = $root . ': ' . $e->getMessage(); 200 return $stats; 201 } 202 203 foreach ($it as $info) { 204 try { 205 if (!$info->isFile() || !$info->isReadable()) continue; 206 $path = $info->getPathname(); 207 $base = basename($path); 208 209 // Filter by extension matching the section we're walking. 210 if ($kind === 'changes' && substr($base, -8) !== '.changes') continue; 211 if ($kind === 'meta' && substr($base, -5) !== '.meta') continue; 212 213 $count = ($kind === 'changes') 214 ? $this->processChangelog($path, $mutate) 215 : $this->processMetaFile($path, $mutate); 216 217 if ($count > 0) { 218 $stats['files']++; 219 $stats['lines'] += $count; 220 } 221 } catch (Exception $e) { 222 $stats['errors'][] = ($path ?? '?') . ': ' . $e->getMessage(); 223 } 224 } 225 return $stats; 226 } 227 228 /* ----------------------------------------------------------------- * 229 * Changelog (.changes) scrubber — TSV format 230 * ----------------------------------------------------------------- */ 231 232 /** 233 * Process one .changes file. 234 * 235 * Line format (DokuWiki convention, tab-separated): 236 * timestamp \t ip \t type \t pageid \t user \t summary \t extra \t sizechange \n 237 * 238 * The IP field is field index 1. We rewrite it to PLACEHOLDER_IP unless it 239 * already equals the placeholder (idempotent) or is empty (already scrubbed 240 * by an older tool like the GDPR plugin which blanked it). 241 * 242 * @param string $path 243 * @param bool $mutate false = count lines that would change, true = rewrite 244 * @return int number of lines counted/changed 245 */ 246 protected function processChangelog($path, $mutate) 247 { 248 $content = @file_get_contents($path); 249 if ($content === false) { 250 throw new RuntimeException('cannot read'); 251 } 252 253 // Use \n split so we can rejoin without modification. Trailing newline 254 // (if any) becomes an empty final element we filter when rebuilding. 255 $lines = explode("\n", $content); 256 $hadTrailingNewline = ($content !== '' && substr($content, -1) === "\n"); 257 if ($hadTrailingNewline) array_pop($lines); // drop the empty tail 258 259 $changed = 0; 260 foreach ($lines as $i => $line) { 261 if ($line === '') continue; // skip blank lines in-place 262 $fields = explode("\t", $line); 263 if (count($fields) < 2) continue; // malformed; leave alone 264 265 $ip = $fields[1]; 266 if ($ip === self::PLACEHOLDER_IP) continue; // already scrubbed 267 if (trim($ip) === '') continue; // already blanked (GDPR-style) 268 269 $fields[1] = self::PLACEHOLDER_IP; 270 $lines[$i] = implode("\t", $fields); 271 $changed++; 272 } 273 274 if ($changed === 0) return 0; 275 if (!$mutate) return $changed; 276 277 $newContent = implode("\n", $lines); 278 if ($hadTrailingNewline) $newContent .= "\n"; 279 280 $this->atomicWrite($path, $newContent); 281 return $changed; 282 } 283 284 /* ----------------------------------------------------------------- * 285 * Page metadata (.meta) scrubber — PHP serialize format 286 * ----------------------------------------------------------------- */ 287 288 /** 289 * Process one .meta file. 290 * 291 * .meta is a serialize()d ['current' => [...], 'persistent' => [...]] 292 * structure (see inc/parserutils.php::p_save_metadata). The IP can live 293 * under last_change.ip in either branch. 294 * 295 * @param string $path 296 * @param bool $mutate 297 * @return int number of ip slots changed (0..2 per file) 298 */ 299 protected function processMetaFile($path, $mutate) 300 { 301 $raw = @file_get_contents($path); 302 if ($raw === false) throw new RuntimeException('cannot read'); 303 if ($raw === '') return 0; 304 305 // Use the standard error-silenced unserialize. Suppress notices because 306 // unknown classes inside the serialized data are not our problem here. 307 $meta = @unserialize($raw, ['allowed_classes' => false]); 308 if (!is_array($meta)) return 0; // corrupt or non-meta - leave alone 309 310 $changed = 0; 311 foreach (['current', 'persistent'] as $branch) { 312 if ( 313 isset($meta[$branch]['last_change']['ip']) 314 && $meta[$branch]['last_change']['ip'] !== self::PLACEHOLDER_IP 315 ) { 316 $meta[$branch]['last_change']['ip'] = self::PLACEHOLDER_IP; 317 $changed++; 318 } 319 } 320 321 if ($changed === 0) return 0; 322 if (!$mutate) return $changed; 323 324 $this->atomicWrite($path, serialize($meta)); 325 return $changed; 326 } 327 328 /* ----------------------------------------------------------------- * 329 * Safe write helper 330 * ----------------------------------------------------------------- */ 331 332 /** 333 * Write $content to $path atomically, preserving the original mtime. 334 * 335 * @throws RuntimeException on any unrecoverable failure 336 */ 337 protected function atomicWrite($path, $content) 338 { 339 $origMtime = @filemtime($path); 340 $tmp = $path . '.hideip_tmp_' . bin2hex(random_bytes(self::TMP_SUFFIX_BYTES)); 341 342 $ok = @file_put_contents($tmp, $content, LOCK_EX); 343 if ($ok === false) { 344 @unlink($tmp); 345 throw new RuntimeException('failed to write temp file'); 346 } 347 348 // Copy permissions from the original so the rename doesn't change them. 349 $origPerms = @fileperms($path); 350 if ($origPerms !== false) @chmod($tmp, $origPerms & 0777); 351 352 if (!@rename($tmp, $path)) { 353 @unlink($tmp); 354 throw new RuntimeException('atomic rename failed'); 355 } 356 357 if ($origMtime !== false) @touch($path, $origMtime); 358 } 359 360 /* ----------------------------------------------------------------- * 361 * Presentation 362 * ----------------------------------------------------------------- */ 363 364 protected function renderResults($heading, array $results, $wasScrub) 365 { 366 echo '<h2>' . hsc($heading) . '</h2>'; 367 368 $totalFiles = 0; 369 $totalLines = 0; 370 $totalErrors = 0; 371 foreach ($results as $stats) { 372 $totalFiles += $stats['files']; 373 $totalLines += $stats['lines']; 374 $totalErrors += count($stats['errors']); 375 } 376 377 if ($wasScrub) { 378 echo '<p><strong>Done.</strong> Rewrote ' . (int)$totalLines 379 . ' IP slot(s) across ' . (int)$totalFiles . ' file(s).</p>'; 380 } else { 381 echo '<p>Would rewrite ' . (int)$totalLines . ' IP slot(s) across ' 382 . (int)$totalFiles . ' file(s).</p>'; 383 } 384 385 echo '<table class="inline"><thead><tr>' 386 . '<th>Section</th>' 387 . '<th>Files affected</th>' 388 . '<th>IP slots ' . ($wasScrub ? 'rewritten' : 'to rewrite') . '</th>' 389 . '<th>Errors</th>' 390 . '</tr></thead><tbody>'; 391 foreach ($results as $label => $stats) { 392 echo '<tr>' 393 . '<td>' . hsc($label) . '</td>' 394 . '<td style="text-align:right;">' . (int)$stats['files'] . '</td>' 395 . '<td style="text-align:right;">' . (int)$stats['lines'] . '</td>' 396 . '<td style="text-align:right;">' . count($stats['errors']) . '</td>' 397 . '</tr>'; 398 } 399 echo '</tbody></table>'; 400 401 if ($totalErrors > 0) { 402 echo '<h3>Errors</h3><ul>'; 403 foreach ($results as $stats) { 404 foreach ($stats['errors'] as $err) { 405 echo '<li><code>' . hsc($err) . '</code></li>'; 406 } 407 } 408 echo '</ul>'; 409 } 410 } 411} 412