1<?php 2if (!defined('DOKU_INC')) die(); 3 4/** 5 * Hide IP — admin component. 6 * 7 * Admin-only page that walks the historical IP-bearing files DokuWiki has 8 * accumulated and rewrites every IP field with the placeholder used by the 9 * action component. Scope is intentionally narrow: 10 * 11 * - $conf['metadir']/**.changes page changelogs (per-page + master) 12 * - $conf['mediametadir']/**.changes media changelogs (per-media + master) 13 * - $conf['metadir']/**.meta page metadata (last_change.ip) 14 * 15 * NOT touched (per the project's explicit scope): 16 * - data/attic/, data/media_attic/ historical .gz revision archives 17 * - data/cache/, data/tmp/, data/log/ ephemeral / regenerated 18 * 19 * Authorship (user field) and timestamps (date field) are preserved; only 20 * the IP field is rewritten. File mtimes are preserved across the rewrite. 21 * 22 * Atomicity: every write goes to a sibling tmp file with a random suffix and 23 * is then rename()d into place. rename() is atomic on a single filesystem, 24 * so a concurrent reader either sees the old file or the new file. 25 * 26 * Concurrency: processChangelog() and processMetaFile() hold io_lock() across 27 * the full read-modify-write cycle when mutating, so concurrent DokuWiki 28 * changelog appends (which also use io_lock) are properly serialized. 29 * 30 * Idempotent: running scrub twice is a no-op on lines that already hold the 31 * placeholder. 32 */ 33 34use dokuwiki\Extension\AdminPlugin; 35use dokuwiki\Form\Form; 36 37class admin_plugin_hideip extends AdminPlugin 38{ 39 /** Mirror of action_plugin_hideip::PLACEHOLDER_IP. Kept inline so this 40 * admin component can run without the action component being loaded. */ 41 public const PLACEHOLDER_IP = '0.0.0.0'; 42 43 /** Random suffix length for tmp files; .hideip_tmp_<8 hex>. */ 44 public const TMP_SUFFIX_BYTES = 4; 45 46 /** 47 * @return bool 48 */ 49 public function forAdminOnly() 50 { 51 return true; 52 } 53 54 /** 55 * @return int 56 */ 57 public function getMenuSort() 58 { 59 return 1000; 60 } 61 62 /** 63 * @param string $language 64 * @return string 65 */ 66 public function getMenuText($language) 67 { 68 return $this->getLang('menu'); 69 } 70 71 /* ----------------------------------------------------------------- * 72 * Dispatch 73 * ----------------------------------------------------------------- */ 74 75 /** @var array|null per-section preview results: [section => [files, ipLines]] */ 76 protected $preview = null; 77 78 /** @var array|null per-section scrub results: [section => [files, ipLines, errors]] */ 79 protected $scrub = null; 80 81 /** 82 * Process form submissions (preview and scrub actions). 83 * 84 * @return void 85 */ 86 public function handle() 87 { 88 global $INPUT; 89 90 if (!$INPUT->has('hideip_action')) return; 91 if (!checkSecurityToken()) return; 92 93 $action = $INPUT->str('hideip_action'); 94 if ($action !== 'preview' && $action !== 'scrub') return; 95 96 if ($action === 'scrub' && $INPUT->server->str('REQUEST_METHOD', 'GET') !== 'POST') { 97 msg($this->getLang('err_post_only'), -1); 98 return; 99 } 100 101 if ($action === 'preview') { 102 $this->preview = $this->runScan(false); 103 } else { 104 // Defense-in-depth admin re-check (framework already gates via 105 // forAdminOnly + isAccessibleByCurrentUser, but the scrub mutates 106 // production data; one more check is cheap). 107 if (!auth_isadmin()) { 108 msg($this->getLang('err_admin_required'), -1); 109 return; 110 } 111 $this->scrub = $this->runScan(true); 112 } 113 } 114 115 /** 116 * Render the admin page. 117 * 118 * @return void 119 */ 120 public function html() 121 { 122 echo '<h1>' . hsc($this->getLang('menu')) . '</h1>'; 123 echo '<p>' 124 . sprintf($this->getLang('intro_rewrite'), '<code>' . hsc(self::PLACEHOLDER_IP) . '</code>') 125 . '<br>' 126 . $this->getLang('intro_realtime') 127 . '<br>' 128 . $this->getLang('intro_preserved') 129 . '</p>'; 130 131 echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">' 132 . '<strong>' . $this->getLang('warn_heading') . '</strong><br>' 133 . $this->getLang('warn_data') . '<br>' 134 . sprintf($this->getLang('warn_attic'), '<code>data/attic/</code>') . '<br>' 135 . $this->getLang('warn_backup') 136 . '</p>'; 137 138 $this->renderForm(); 139 140 if ($this->preview !== null) { 141 $this->renderResults($this->getLang('heading_preview'), $this->preview, false); 142 } 143 if ($this->scrub !== null) { 144 $this->renderResults($this->getLang('heading_scrub_done'), $this->scrub, true); 145 } 146 } 147 148 /* ----------------------------------------------------------------- * 149 * Form 150 * ----------------------------------------------------------------- */ 151 152 /** 153 * Render the preview/scrub action form. 154 * 155 * @return void 156 */ 157 protected function renderForm() 158 { 159 $form = new Form(['method' => 'POST', 'id' => 'hideip_form']); 160 $form->setHiddenField('do', 'admin'); 161 $form->setHiddenField('page', 'hideip'); 162 163 $form->addTagOpen('p'); 164 $form->addButton('hideip_action', $this->getLang('btn_preview'))->val('preview'); 165 $form->addHTML(' '); 166 $form->addButton('hideip_action', $this->getLang('btn_scrub'))->val('scrub'); 167 $form->addTagClose('p'); 168 169 echo $form->toHTML(); 170 } 171 172 /* ----------------------------------------------------------------- * 173 * Scan/scrub orchestrator 174 * ----------------------------------------------------------------- */ 175 176 /** 177 * Walk all target files and either count IP-bearing entries or rewrite them. 178 * 179 * @param bool $mutate false = preview only, true = rewrite on disk 180 * @return array[] [section_label => [files, lines, errors]] 181 */ 182 protected function runScan($mutate) 183 { 184 global $conf; 185 186 if (function_exists('set_time_limit')) set_time_limit(0); 187 if (function_exists('ignore_user_abort')) ignore_user_abort(true); 188 189 $sections = [ 190 $this->getLang('section_page_changes') => [ 191 'root' => $conf['metadir'], 192 'kind' => 'changes', 193 ], 194 $this->getLang('section_media_changes') => [ 195 'root' => $conf['mediametadir'], 196 'kind' => 'changes', 197 ], 198 $this->getLang('section_page_meta') => [ 199 'root' => $conf['metadir'], 200 'kind' => 'meta', 201 ], 202 ]; 203 204 $results = []; 205 foreach ($sections as $label => $cfg) { 206 $results[$label] = $this->walkSection($cfg['root'], $cfg['kind'], $mutate); 207 } 208 return $results; 209 } 210 211 /** 212 * Walk one section root, dispatching each candidate file to the right scrubber. 213 * 214 * @param string $root 215 * @param string $kind 'changes' or 'meta' 216 * @param bool $mutate 217 * @return array{files:int,lines:int,errors:array} 218 */ 219 protected function walkSection($root, $kind, $mutate) 220 { 221 $stats = ['files' => 0, 'lines' => 0, 'errors' => []]; 222 223 if (!is_dir($root)) return $stats; 224 225 try { 226 $it = new RecursiveIteratorIterator( 227 new RecursiveDirectoryIterator( 228 $root, 229 FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS 230 ), 231 RecursiveIteratorIterator::LEAVES_ONLY 232 ); 233 } catch (Exception $e) { 234 $stats['errors'][] = $root . ': ' . $e->getMessage(); 235 return $stats; 236 } 237 238 foreach ($it as $info) { 239 $path = '?'; 240 try { 241 if (!$info->isFile() || !$info->isReadable()) continue; 242 $path = $info->getPathname(); 243 $base = basename($path); 244 245 // Filter by extension matching the section we're walking. 246 if ($kind === 'changes' && !str_ends_with($base, '.changes')) continue; 247 if ($kind === 'meta' && !str_ends_with($base, '.meta')) continue; 248 249 $count = ($kind === 'changes') 250 ? $this->processChangelog($path, $mutate) 251 : $this->processMetaFile($path, $mutate); 252 253 if ($count > 0) { 254 $stats['files']++; 255 $stats['lines'] += $count; 256 } 257 } catch (Exception $e) { 258 $stats['errors'][] = $path . ': ' . $e->getMessage(); 259 } 260 } 261 return $stats; 262 } 263 264 /* ----------------------------------------------------------------- * 265 * Changelog (.changes) scrubber — TSV format 266 * ----------------------------------------------------------------- */ 267 268 /** 269 * Process one .changes file. 270 * 271 * Line format (DokuWiki convention, tab-separated): 272 * timestamp \t ip \t type \t pageid \t user \t summary \t extra \t sizechange \n 273 * 274 * The IP field is field index 1. We rewrite it to PLACEHOLDER_IP unless it 275 * already equals the placeholder (idempotent) or is empty (already scrubbed 276 * by an older tool like the GDPR plugin which blanked it). 277 * 278 * When mutating, io_lock() is held for the full read-modify-write cycle so 279 * concurrent changelog appends (which also use io_lock) are serialized. 280 * 281 * @param string $path 282 * @param bool $mutate false = count lines that would change, true = rewrite 283 * @return int number of lines counted/changed 284 */ 285 protected function processChangelog($path, $mutate) 286 { 287 if ($mutate) io_lock($path); 288 try { 289 $content = file_get_contents($path); 290 if ($content === false) { 291 throw new RuntimeException('cannot read'); 292 } 293 294 // Use \n split so we can rejoin without modification. Trailing newline 295 // (if any) becomes an empty final element we filter when rebuilding. 296 $lines = explode("\n", $content); 297 $hadTrailingNewline = ($content !== '' && substr($content, -1) === "\n"); 298 if ($hadTrailingNewline) array_pop($lines); // drop the empty tail 299 300 $changed = 0; 301 foreach ($lines as $i => $line) { 302 if ($line === '') continue; // skip blank lines in-place 303 $fields = explode("\t", $line); 304 if (count($fields) < 2) continue; // malformed; leave alone 305 306 $ip = $fields[1]; 307 if ($ip === self::PLACEHOLDER_IP) continue; // already scrubbed 308 if (trim($ip) === '') continue; // already blanked (GDPR-style) 309 310 $fields[1] = self::PLACEHOLDER_IP; 311 $lines[$i] = implode("\t", $fields); 312 $changed++; 313 } 314 315 if ($changed === 0) return 0; 316 if (!$mutate) return $changed; 317 318 $newContent = implode("\n", $lines); 319 if ($hadTrailingNewline) $newContent .= "\n"; 320 321 $this->atomicWrite($path, $newContent); 322 return $changed; 323 } finally { 324 if ($mutate) io_unlock($path); 325 } 326 } 327 328 /* ----------------------------------------------------------------- * 329 * Page metadata (.meta) scrubber — PHP serialize format 330 * ----------------------------------------------------------------- */ 331 332 /** 333 * Process one .meta file. 334 * 335 * .meta is a serialize()d ['current' => [...], 'persistent' => [...]] 336 * structure (see inc/parserutils.php::p_save_metadata). The IP can live 337 * under last_change.ip in either branch. 338 * 339 * When mutating, io_lock() is held for the full read-modify-write cycle so 340 * concurrent metadata saves (which also use io_lock) are serialized. 341 * 342 * @param string $path 343 * @param bool $mutate 344 * @return int number of ip slots changed (0..2 per file) 345 */ 346 protected function processMetaFile($path, $mutate) 347 { 348 if ($mutate) io_lock($path); 349 try { 350 $raw = file_get_contents($path); 351 if ($raw === false) throw new RuntimeException('cannot read'); 352 if ($raw === '') return 0; 353 354 $meta = unserialize($raw, ['allowed_classes' => false]); 355 if (!is_array($meta)) return 0; // corrupt or non-meta - leave alone 356 357 $changed = 0; 358 foreach (['current', 'persistent'] as $branch) { 359 if ( 360 isset($meta[$branch]['last_change']['ip']) 361 && $meta[$branch]['last_change']['ip'] !== self::PLACEHOLDER_IP 362 ) { 363 $meta[$branch]['last_change']['ip'] = self::PLACEHOLDER_IP; 364 $changed++; 365 } 366 } 367 368 if ($changed === 0) return 0; 369 if (!$mutate) return $changed; 370 371 $this->atomicWrite($path, serialize($meta)); 372 return $changed; 373 } finally { 374 if ($mutate) io_unlock($path); 375 } 376 } 377 378 /* ----------------------------------------------------------------- * 379 * Safe write helper 380 * ----------------------------------------------------------------- */ 381 382 /** 383 * Write $content to $path atomically, preserving the original mtime. 384 * 385 * The caller must already hold io_lock($path) when mutating to prevent 386 * concurrent writes from being lost by the rename. 387 * 388 * @param string $path 389 * @param string $content 390 * @throws RuntimeException on any unrecoverable failure 391 */ 392 protected function atomicWrite($path, $content) 393 { 394 $origMtime = filemtime($path); 395 $tmp = $path . '.hideip_tmp_' . bin2hex(random_bytes(self::TMP_SUFFIX_BYTES)); 396 397 $ok = file_put_contents($tmp, $content, LOCK_EX); 398 if ($ok === false) { 399 if (is_file($tmp)) unlink($tmp); 400 throw new RuntimeException('failed to write temp file'); 401 } 402 403 // Copy permissions from the original so the rename doesn't change them. 404 $origPerms = fileperms($path); 405 if ($origPerms !== false) chmod($tmp, $origPerms & 0777); 406 407 if (!rename($tmp, $path)) { 408 if (is_file($tmp)) unlink($tmp); 409 throw new RuntimeException('atomic rename failed'); 410 } 411 412 if ($origMtime !== false) touch($path, $origMtime); 413 } 414 415 /* ----------------------------------------------------------------- * 416 * Presentation 417 * ----------------------------------------------------------------- */ 418 419 /** 420 * Render the results table for a preview or scrub run. 421 * 422 * @param string $heading pre-translated heading string 423 * @param array[] $results [section_label => [files, lines, errors]] 424 * @param bool $wasScrub 425 * @return void 426 */ 427 protected function renderResults($heading, array $results, $wasScrub) 428 { 429 echo '<h2>' . hsc($heading) . '</h2>'; 430 431 $totalFiles = 0; 432 $totalLines = 0; 433 $totalErrors = 0; 434 foreach ($results as $stats) { 435 $totalFiles += $stats['files']; 436 $totalLines += $stats['lines']; 437 $totalErrors += count($stats['errors']); 438 } 439 440 if ($wasScrub) { 441 echo '<p>' . sprintf($this->getLang('done_summary'), $totalLines, $totalFiles) . '</p>'; 442 } else { 443 echo '<p>' . sprintf($this->getLang('preview_summary'), $totalLines, $totalFiles) . '</p>'; 444 } 445 446 $colSlots = $wasScrub 447 ? $this->getLang('col_slots_rewritten') 448 : $this->getLang('col_slots_pending'); 449 450 echo '<table class="inline"><thead><tr>' 451 . '<th>' . hsc($this->getLang('col_section')) . '</th>' 452 . '<th>' . hsc($this->getLang('col_files')) . '</th>' 453 . '<th>' . hsc($colSlots) . '</th>' 454 . '<th>' . hsc($this->getLang('col_errors')) . '</th>' 455 . '</tr></thead><tbody>'; 456 foreach ($results as $label => $stats) { 457 echo '<tr>' 458 . '<td>' . hsc($label) . '</td>' 459 . '<td style="text-align:right;">' . (int)$stats['files'] . '</td>' 460 . '<td style="text-align:right;">' . (int)$stats['lines'] . '</td>' 461 . '<td style="text-align:right;">' . count($stats['errors']) . '</td>' 462 . '</tr>'; 463 } 464 echo '</tbody></table>'; 465 466 if ($totalErrors > 0) { 467 echo '<h3>' . hsc($this->getLang('errors_heading')) . '</h3><ul>'; 468 foreach ($results as $stats) { 469 foreach ($stats['errors'] as $err) { 470 echo '<li><code>' . hsc($err) . '</code></li>'; 471 } 472 } 473 echo '</ul>'; 474 } 475 } 476} 477