1<?php 2if (!defined('DOKU_INC')) die(); 3 4/** 5 * Hide IP — admin component. 6 * 7 * Admin-only page that walks the historical IP-bearing files DokuWiki has 8 * accumulated and rewrites every IP field with the placeholder used by the 9 * action component. Scope is intentionally narrow: 10 * 11 * - $conf['metadir']/**.changes page changelogs (per-page + master) 12 * - $conf['mediametadir']/**.changes media changelogs (per-media + master) 13 * - $conf['metadir']/**.meta page metadata (last_change.ip) 14 * 15 * NOT touched (per the project's explicit scope): 16 * - data/attic/, data/media_attic/ historical .gz revision archives 17 * - data/cache/, data/tmp/, data/log/ ephemeral / regenerated 18 * 19 * Authorship (user field) and timestamps (date field) are preserved; only 20 * the IP field is rewritten. File mtimes are preserved across the rewrite. 21 * 22 * Atomicity: every write goes to a sibling tmp file with a random suffix and 23 * is then rename()d into place. rename() is atomic on a single filesystem, 24 * so a concurrent reader either sees the old file or the new file. 25 * 26 * Concurrency: processChangelog() and processMetaFile() hold io_lock() across 27 * the full read-modify-write cycle when mutating, so concurrent DokuWiki 28 * changelog appends (which also use io_lock) are properly serialized. 29 * 30 * Idempotent: running scrub twice is a no-op on lines that already hold the 31 * placeholder. 32 */ 33 34use dokuwiki\Extension\AdminPlugin; 35use dokuwiki\Form\Form; 36 37class admin_plugin_hideip extends AdminPlugin 38{ 39 /** Mirror of action_plugin_hideip::PLACEHOLDER_IP. Kept inline so this 40 * admin component can run without the action component being loaded. */ 41 public const PLACEHOLDER_IP = '0.0.0.0'; 42 43 /** DokuWiki's hardcoded "external edit" marker. Not a real visitor IP and 44 * not something this plugin can intercept in real time — see isExemptIp(). */ 45 public const LOOPBACK_IP = '127.0.0.1'; 46 47 /** Random suffix length for tmp files; .hideip_tmp_<8 hex>. */ 48 public const TMP_SUFFIX_BYTES = 4; 49 50 /** 51 * @return bool 52 */ 53 public function forAdminOnly() 54 { 55 return true; 56 } 57 58 /** 59 * @return int 60 */ 61 public function getMenuSort() 62 { 63 return 1000; 64 } 65 66 /** 67 * @param string $language 68 * @return string 69 */ 70 public function getMenuText($language) 71 { 72 return $this->getLang('menu'); 73 } 74 75 /* ----------------------------------------------------------------- * 76 * Dispatch 77 * ----------------------------------------------------------------- */ 78 79 /** @var array|null per-section preview results: [section => [files, ipLines]] */ 80 protected $preview = null; 81 82 /** @var array|null per-section scrub results: [section => [files, ipLines, errors]] */ 83 protected $scrub = null; 84 85 /** 86 * Process form submissions (preview and scrub actions). 87 * 88 * @return void 89 */ 90 public function handle() 91 { 92 global $INPUT; 93 94 if (!$INPUT->has('hideip_action')) return; 95 if (!checkSecurityToken()) return; 96 97 $action = $INPUT->str('hideip_action'); 98 if ($action !== 'preview' && $action !== 'scrub') return; 99 100 if ($action === 'scrub' && $INPUT->server->str('REQUEST_METHOD', 'GET') !== 'POST') { 101 msg($this->getLang('err_post_only'), -1); 102 return; 103 } 104 105 if ($action === 'preview') { 106 $this->preview = $this->runScan(false); 107 } else { 108 // Defense-in-depth admin re-check (framework already gates via 109 // forAdminOnly + isAccessibleByCurrentUser, but the scrub mutates 110 // production data; one more check is cheap). 111 if (!auth_isadmin()) { 112 msg($this->getLang('err_admin_required'), -1); 113 return; 114 } 115 $this->scrub = $this->runScan(true); 116 } 117 } 118 119 /** 120 * Render the admin page. 121 * 122 * @return void 123 */ 124 public function html() 125 { 126 echo '<h1>' . hsc($this->getLang('menu')) . '</h1>'; 127 echo '<p>' 128 . sprintf($this->getLang('intro_rewrite'), '<code>' . hsc(self::PLACEHOLDER_IP) . '</code>') 129 . '<br>' 130 . $this->getLang('intro_realtime') 131 . '<br>' 132 . $this->getLang('intro_preserved') 133 . '</p>'; 134 135 echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">' 136 . '<strong>' . $this->getLang('warn_heading') . '</strong><br>' 137 . $this->getLang('warn_data') . '<br>' 138 . sprintf($this->getLang('warn_attic'), '<code>data/attic/</code>') . '<br>' 139 . $this->getLang('warn_backup') 140 . '</p>'; 141 142 $this->renderForm(); 143 144 if ($this->preview !== null) { 145 $this->renderResults($this->getLang('heading_preview'), $this->preview, false); 146 } 147 if ($this->scrub !== null) { 148 $this->renderResults($this->getLang('heading_scrub_done'), $this->scrub, true); 149 } 150 } 151 152 /* ----------------------------------------------------------------- * 153 * Form 154 * ----------------------------------------------------------------- */ 155 156 /** 157 * Render the preview/scrub action form. 158 * 159 * @return void 160 */ 161 protected function renderForm() 162 { 163 $form = new Form(['method' => 'POST', 'id' => 'hideip_form']); 164 $form->setHiddenField('do', 'admin'); 165 $form->setHiddenField('page', 'hideip'); 166 167 $form->addTagOpen('p'); 168 $form->addButton('hideip_action', $this->getLang('btn_preview'))->val('preview'); 169 $form->addHTML(' '); 170 $form->addButton('hideip_action', $this->getLang('btn_scrub'))->val('scrub'); 171 $form->addTagClose('p'); 172 173 echo $form->toHTML(); 174 } 175 176 /* ----------------------------------------------------------------- * 177 * Scan/scrub orchestrator 178 * ----------------------------------------------------------------- */ 179 180 /** 181 * Walk all target files and either count IP-bearing entries or rewrite them. 182 * 183 * @param bool $mutate false = preview only, true = rewrite on disk 184 * @return array[] [section_label => [files, lines, errors]] 185 */ 186 protected function runScan($mutate) 187 { 188 global $conf; 189 190 if (function_exists('set_time_limit')) set_time_limit(0); 191 if (function_exists('ignore_user_abort')) ignore_user_abort(true); 192 193 $sections = [ 194 $this->getLang('section_page_changes') => [ 195 'root' => $conf['metadir'], 196 'kind' => 'changes', 197 ], 198 $this->getLang('section_media_changes') => [ 199 'root' => $conf['mediametadir'], 200 'kind' => 'changes', 201 ], 202 $this->getLang('section_page_meta') => [ 203 'root' => $conf['metadir'], 204 'kind' => 'meta', 205 ], 206 ]; 207 208 $results = []; 209 foreach ($sections as $label => $cfg) { 210 $results[$label] = $this->walkSection($cfg['root'], $cfg['kind'], $mutate); 211 } 212 return $results; 213 } 214 215 /** 216 * Walk one section root, dispatching each candidate file to the right scrubber. 217 * 218 * @param string $root 219 * @param string $kind 'changes' or 'meta' 220 * @param bool $mutate 221 * @return array{files:int,lines:int,errors:array} 222 */ 223 protected function walkSection($root, $kind, $mutate) 224 { 225 $stats = ['files' => 0, 'lines' => 0, 'errors' => []]; 226 227 if (!is_dir($root)) return $stats; 228 229 try { 230 $it = new RecursiveIteratorIterator( 231 new RecursiveDirectoryIterator( 232 $root, 233 FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS 234 ), 235 RecursiveIteratorIterator::LEAVES_ONLY 236 ); 237 } catch (Exception $e) { 238 $stats['errors'][] = $root . ': ' . $e->getMessage(); 239 return $stats; 240 } 241 242 foreach ($it as $info) { 243 $path = '?'; 244 try { 245 if (!$info->isFile() || !$info->isReadable()) continue; 246 $path = $info->getPathname(); 247 $base = basename($path); 248 249 // Filter by extension matching the section we're walking. 250 if ($kind === 'changes' && !str_ends_with($base, '.changes')) continue; 251 if ($kind === 'meta' && !str_ends_with($base, '.meta')) continue; 252 253 $count = ($kind === 'changes') 254 ? $this->processChangelog($path, $mutate) 255 : $this->processMetaFile($path, $mutate); 256 257 if ($count > 0) { 258 $stats['files']++; 259 $stats['lines'] += $count; 260 } 261 } catch (Exception $e) { 262 $stats['errors'][] = $path . ': ' . $e->getMessage(); 263 } 264 } 265 return $stats; 266 } 267 268 /** 269 * Whether an IP value needs no action from the scrub. 270 * 271 * Three cases are exempt: 272 * - the placeholder itself ('0.0.0.0') — already anonymised (idempotent); 273 * - blank — already stripped by an older tool (e.g. the GDPR plugin); 274 * - loopback '127.0.0.1' — DokuWiki hardcodes this as its "external edit" 275 * marker (inc/ChangeLog/ChangeLog.php) whenever a page file's on-disk 276 * mtime no longer matches its changelog. It is re-synthesised on every 277 * view (page metadata) and on the next save (changelog) of such a page, 278 * so rewriting it is a treadmill. It is also a loopback address, not a 279 * real visitor IP, so it leaks nothing. We leave it untouched. 280 * 281 * @param string $ip 282 * @return bool 283 */ 284 protected function isExemptIp($ip) 285 { 286 $ip = trim($ip); 287 return $ip === '' 288 || $ip === self::PLACEHOLDER_IP 289 || $ip === self::LOOPBACK_IP; 290 } 291 292 /* ----------------------------------------------------------------- * 293 * Changelog (.changes) scrubber — TSV format 294 * ----------------------------------------------------------------- */ 295 296 /** 297 * Process one .changes file. 298 * 299 * Line format (DokuWiki convention, tab-separated): 300 * timestamp \t ip \t type \t pageid \t user \t summary \t extra \t sizechange \n 301 * 302 * The IP field is field index 1. We rewrite it to PLACEHOLDER_IP unless it 303 * already equals the placeholder (idempotent) or is empty (already scrubbed 304 * by an older tool like the GDPR plugin which blanked it). 305 * 306 * When mutating, io_lock() is held for the full read-modify-write cycle so 307 * concurrent changelog appends (which also use io_lock) are serialized. 308 * 309 * @param string $path 310 * @param bool $mutate false = count lines that would change, true = rewrite 311 * @return int number of lines counted/changed 312 */ 313 protected function processChangelog($path, $mutate) 314 { 315 if ($mutate) io_lock($path); 316 try { 317 $content = file_get_contents($path); 318 if ($content === false) { 319 throw new RuntimeException('cannot read'); 320 } 321 322 // Use \n split so we can rejoin without modification. Trailing newline 323 // (if any) becomes an empty final element we filter when rebuilding. 324 $lines = explode("\n", $content); 325 $hadTrailingNewline = ($content !== '' && substr($content, -1) === "\n"); 326 if ($hadTrailingNewline) array_pop($lines); // drop the empty tail 327 328 $changed = 0; 329 foreach ($lines as $i => $line) { 330 if ($line === '') continue; // skip blank lines in-place 331 $fields = explode("\t", $line); 332 if (count($fields) < 2) continue; // malformed; leave alone 333 334 $ip = $fields[1]; 335 if ($this->isExemptIp($ip)) continue; // placeholder, blank, or loopback marker 336 337 $fields[1] = self::PLACEHOLDER_IP; 338 $lines[$i] = implode("\t", $fields); 339 $changed++; 340 } 341 342 if ($changed === 0) return 0; 343 if (!$mutate) return $changed; 344 345 $newContent = implode("\n", $lines); 346 if ($hadTrailingNewline) $newContent .= "\n"; 347 348 $this->atomicWrite($path, $newContent); 349 return $changed; 350 } finally { 351 if ($mutate) io_unlock($path); 352 } 353 } 354 355 /* ----------------------------------------------------------------- * 356 * Page metadata (.meta) scrubber — PHP serialize format 357 * ----------------------------------------------------------------- */ 358 359 /** 360 * Process one .meta file. 361 * 362 * .meta is a serialize()d ['current' => [...], 'persistent' => [...]] 363 * structure (see inc/parserutils.php::p_save_metadata). The IP can live 364 * under last_change.ip in either branch. 365 * 366 * When mutating, io_lock() is held for the full read-modify-write cycle so 367 * concurrent metadata saves (which also use io_lock) are serialized. 368 * 369 * @param string $path 370 * @param bool $mutate 371 * @return int number of ip slots changed (0..2 per file) 372 */ 373 protected function processMetaFile($path, $mutate) 374 { 375 if ($mutate) io_lock($path); 376 try { 377 $raw = file_get_contents($path); 378 if ($raw === false) throw new RuntimeException('cannot read'); 379 if ($raw === '') return 0; 380 381 $meta = unserialize($raw, ['allowed_classes' => false]); 382 if (!is_array($meta)) return 0; // corrupt or non-meta - leave alone 383 384 $changed = 0; 385 foreach (['current', 'persistent'] as $branch) { 386 if ( 387 isset($meta[$branch]['last_change']['ip']) 388 && !$this->isExemptIp($meta[$branch]['last_change']['ip']) 389 ) { 390 $meta[$branch]['last_change']['ip'] = self::PLACEHOLDER_IP; 391 $changed++; 392 } 393 } 394 395 if ($changed === 0) return 0; 396 if (!$mutate) return $changed; 397 398 $this->atomicWrite($path, serialize($meta)); 399 return $changed; 400 } finally { 401 if ($mutate) io_unlock($path); 402 } 403 } 404 405 /* ----------------------------------------------------------------- * 406 * Safe write helper 407 * ----------------------------------------------------------------- */ 408 409 /** 410 * Write $content to $path atomically, preserving the original mtime. 411 * 412 * The caller must already hold io_lock($path) when mutating to prevent 413 * concurrent writes from being lost by the rename. 414 * 415 * @param string $path 416 * @param string $content 417 * @throws RuntimeException on any unrecoverable failure 418 */ 419 protected function atomicWrite($path, $content) 420 { 421 $origMtime = filemtime($path); 422 $tmp = $path . '.hideip_tmp_' . bin2hex(random_bytes(self::TMP_SUFFIX_BYTES)); 423 424 $ok = file_put_contents($tmp, $content, LOCK_EX); 425 if ($ok === false) { 426 if (is_file($tmp)) unlink($tmp); 427 throw new RuntimeException('failed to write temp file'); 428 } 429 430 // Copy permissions from the original so the rename doesn't change them. 431 $origPerms = fileperms($path); 432 if ($origPerms !== false) chmod($tmp, $origPerms & 0777); 433 434 if (!rename($tmp, $path)) { 435 if (is_file($tmp)) unlink($tmp); 436 throw new RuntimeException('atomic rename failed'); 437 } 438 439 if ($origMtime !== false) touch($path, $origMtime); 440 } 441 442 /* ----------------------------------------------------------------- * 443 * Presentation 444 * ----------------------------------------------------------------- */ 445 446 /** 447 * Render the results table for a preview or scrub run. 448 * 449 * @param string $heading pre-translated heading string 450 * @param array[] $results [section_label => [files, lines, errors]] 451 * @param bool $wasScrub 452 * @return void 453 */ 454 protected function renderResults($heading, array $results, $wasScrub) 455 { 456 echo '<h2>' . hsc($heading) . '</h2>'; 457 458 $totalFiles = 0; 459 $totalLines = 0; 460 $totalErrors = 0; 461 foreach ($results as $stats) { 462 $totalFiles += $stats['files']; 463 $totalLines += $stats['lines']; 464 $totalErrors += count($stats['errors']); 465 } 466 467 if ($wasScrub) { 468 echo '<p>' . sprintf($this->getLang('done_summary'), $totalLines, $totalFiles) . '</p>'; 469 } else { 470 echo '<p>' . sprintf($this->getLang('preview_summary'), $totalLines, $totalFiles) . '</p>'; 471 } 472 473 $colSlots = $wasScrub 474 ? $this->getLang('col_slots_rewritten') 475 : $this->getLang('col_slots_pending'); 476 477 echo '<table class="inline"><thead><tr>' 478 . '<th>' . hsc($this->getLang('col_section')) . '</th>' 479 . '<th>' . hsc($this->getLang('col_files')) . '</th>' 480 . '<th>' . hsc($colSlots) . '</th>' 481 . '<th>' . hsc($this->getLang('col_errors')) . '</th>' 482 . '</tr></thead><tbody>'; 483 foreach ($results as $label => $stats) { 484 echo '<tr>' 485 . '<td>' . hsc($label) . '</td>' 486 . '<td style="text-align:right;">' . (int)$stats['files'] . '</td>' 487 . '<td style="text-align:right;">' . (int)$stats['lines'] . '</td>' 488 . '<td style="text-align:right;">' . count($stats['errors']) . '</td>' 489 . '</tr>'; 490 } 491 echo '</tbody></table>'; 492 493 if ($totalErrors > 0) { 494 echo '<h3>' . hsc($this->getLang('errors_heading')) . '</h3><ul>'; 495 foreach ($results as $stats) { 496 foreach ($stats['errors'] as $err) { 497 echo '<li><code>' . hsc($err) . '</code></li>'; 498 } 499 } 500 echo '</ul>'; 501 } 502 } 503} 504