. */ public const TMP_SUFFIX_BYTES = 4; /** * @return bool */ public function forAdminOnly() { return true; } /** * @return int */ public function getMenuSort() { return 1000; } /** * @param string $language * @return string */ public function getMenuText($language) { return $this->getLang('menu'); } /* ----------------------------------------------------------------- * * Dispatch * ----------------------------------------------------------------- */ /** @var array|null per-section preview results: [section => [files, ipLines]] */ protected $preview = null; /** @var array|null per-section scrub results: [section => [files, ipLines, errors]] */ protected $scrub = null; /** * Process form submissions (preview and scrub actions). * * @return void */ public function handle() { global $INPUT; if (!$INPUT->has('hideip_action')) return; if (!checkSecurityToken()) return; $action = $INPUT->str('hideip_action'); if ($action !== 'preview' && $action !== 'scrub') return; if ($action === 'scrub' && $INPUT->server->str('REQUEST_METHOD', 'GET') !== 'POST') { msg($this->getLang('err_post_only'), -1); return; } if ($action === 'preview') { $this->preview = $this->runScan(false); } else { // Defense-in-depth admin re-check (framework already gates via // forAdminOnly + isAccessibleByCurrentUser, but the scrub mutates // production data; one more check is cheap). if (!auth_isadmin()) { msg($this->getLang('err_admin_required'), -1); return; } $this->scrub = $this->runScan(true); } } /** * Render the admin page. * * @return void */ public function html() { echo '

' . hsc($this->getLang('menu')) . '

'; echo '

' . sprintf($this->getLang('intro_rewrite'), '' . hsc(self::PLACEHOLDER_IP) . '') . '
' . $this->getLang('intro_realtime') . '
' . $this->getLang('intro_preserved') . '

'; echo '

' . '' . $this->getLang('warn_heading') . '
' . $this->getLang('warn_data') . '
' . sprintf($this->getLang('warn_attic'), 'data/attic/') . '
' . $this->getLang('warn_backup') . '

'; $this->renderForm(); if ($this->preview !== null) { $this->renderResults($this->getLang('heading_preview'), $this->preview, false); } if ($this->scrub !== null) { $this->renderResults($this->getLang('heading_scrub_done'), $this->scrub, true); } } /* ----------------------------------------------------------------- * * Form * ----------------------------------------------------------------- */ /** * Render the preview/scrub action form. * * @return void */ protected function renderForm() { $form = new Form(['method' => 'POST', 'id' => 'hideip_form']); $form->setHiddenField('do', 'admin'); $form->setHiddenField('page', 'hideip'); $form->addTagOpen('p'); $form->addButton('hideip_action', $this->getLang('btn_preview'))->val('preview'); $form->addHTML('    '); $form->addButton('hideip_action', $this->getLang('btn_scrub'))->val('scrub'); $form->addTagClose('p'); echo $form->toHTML(); } /* ----------------------------------------------------------------- * * Scan/scrub orchestrator * ----------------------------------------------------------------- */ /** * Walk all target files and either count IP-bearing entries or rewrite them. * * @param bool $mutate false = preview only, true = rewrite on disk * @return array[] [section_label => [files, lines, errors]] */ protected function runScan($mutate) { global $conf; if (function_exists('set_time_limit')) set_time_limit(0); if (function_exists('ignore_user_abort')) ignore_user_abort(true); $sections = [ $this->getLang('section_page_changes') => [ 'root' => $conf['metadir'], 'kind' => 'changes', ], $this->getLang('section_media_changes') => [ 'root' => $conf['mediametadir'], 'kind' => 'changes', ], $this->getLang('section_page_meta') => [ 'root' => $conf['metadir'], 'kind' => 'meta', ], ]; $results = []; foreach ($sections as $label => $cfg) { $results[$label] = $this->walkSection($cfg['root'], $cfg['kind'], $mutate); } return $results; } /** * Walk one section root, dispatching each candidate file to the right scrubber. * * @param string $root * @param string $kind 'changes' or 'meta' * @param bool $mutate * @return array{files:int,lines:int,errors:array} */ protected function walkSection($root, $kind, $mutate) { $stats = ['files' => 0, 'lines' => 0, 'errors' => []]; if (!is_dir($root)) return $stats; try { $it = new RecursiveIteratorIterator( new RecursiveDirectoryIterator( $root, FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS ), RecursiveIteratorIterator::LEAVES_ONLY ); } catch (Exception $e) { $stats['errors'][] = $root . ': ' . $e->getMessage(); return $stats; } foreach ($it as $info) { $path = '?'; try { if (!$info->isFile() || !$info->isReadable()) continue; $path = $info->getPathname(); $base = basename($path); // Filter by extension matching the section we're walking. if ($kind === 'changes' && !str_ends_with($base, '.changes')) continue; if ($kind === 'meta' && !str_ends_with($base, '.meta')) continue; $count = ($kind === 'changes') ? $this->processChangelog($path, $mutate) : $this->processMetaFile($path, $mutate); if ($count > 0) { $stats['files']++; $stats['lines'] += $count; } } catch (Exception $e) { $stats['errors'][] = $path . ': ' . $e->getMessage(); } } return $stats; } /** * Whether an IP value needs no action from the scrub. * * Three cases are exempt: * - the placeholder itself ('0.0.0.0') — already anonymised (idempotent); * - blank — already stripped by an older tool (e.g. the GDPR plugin); * - loopback '127.0.0.1' — DokuWiki hardcodes this as its "external edit" * marker (inc/ChangeLog/ChangeLog.php) whenever a page file's on-disk * mtime no longer matches its changelog. It is re-synthesised on every * view (page metadata) and on the next save (changelog) of such a page, * so rewriting it is a treadmill. It is also a loopback address, not a * real visitor IP, so it leaks nothing. We leave it untouched. * * @param string $ip * @return bool */ protected function isExemptIp($ip) { $ip = trim($ip); return $ip === '' || $ip === self::PLACEHOLDER_IP || $ip === self::LOOPBACK_IP; } /* ----------------------------------------------------------------- * * Changelog (.changes) scrubber — TSV format * ----------------------------------------------------------------- */ /** * Process one .changes file. * * Line format (DokuWiki convention, tab-separated): * timestamp \t ip \t type \t pageid \t user \t summary \t extra \t sizechange \n * * The IP field is field index 1. We rewrite it to PLACEHOLDER_IP unless it * already equals the placeholder (idempotent) or is empty (already scrubbed * by an older tool like the GDPR plugin which blanked it). * * When mutating, io_lock() is held for the full read-modify-write cycle so * concurrent changelog appends (which also use io_lock) are serialized. * * @param string $path * @param bool $mutate false = count lines that would change, true = rewrite * @return int number of lines counted/changed */ protected function processChangelog($path, $mutate) { if ($mutate) io_lock($path); try { $content = file_get_contents($path); if ($content === false) { throw new RuntimeException('cannot read'); } // Use \n split so we can rejoin without modification. Trailing newline // (if any) becomes an empty final element we filter when rebuilding. $lines = explode("\n", $content); $hadTrailingNewline = ($content !== '' && substr($content, -1) === "\n"); if ($hadTrailingNewline) array_pop($lines); // drop the empty tail $changed = 0; foreach ($lines as $i => $line) { if ($line === '') continue; // skip blank lines in-place $fields = explode("\t", $line); if (count($fields) < 2) continue; // malformed; leave alone $ip = $fields[1]; if ($this->isExemptIp($ip)) continue; // placeholder, blank, or loopback marker $fields[1] = self::PLACEHOLDER_IP; $lines[$i] = implode("\t", $fields); $changed++; } if ($changed === 0) return 0; if (!$mutate) return $changed; $newContent = implode("\n", $lines); if ($hadTrailingNewline) $newContent .= "\n"; $this->atomicWrite($path, $newContent); return $changed; } finally { if ($mutate) io_unlock($path); } } /* ----------------------------------------------------------------- * * Page metadata (.meta) scrubber — PHP serialize format * ----------------------------------------------------------------- */ /** * Process one .meta file. * * .meta is a serialize()d ['current' => [...], 'persistent' => [...]] * structure (see inc/parserutils.php::p_save_metadata). The IP can live * under last_change.ip in either branch. * * When mutating, io_lock() is held for the full read-modify-write cycle so * concurrent metadata saves (which also use io_lock) are serialized. * * @param string $path * @param bool $mutate * @return int number of ip slots changed (0..2 per file) */ protected function processMetaFile($path, $mutate) { if ($mutate) io_lock($path); try { $raw = file_get_contents($path); if ($raw === false) throw new RuntimeException('cannot read'); if ($raw === '') return 0; $meta = unserialize($raw, ['allowed_classes' => false]); if (!is_array($meta)) return 0; // corrupt or non-meta - leave alone $changed = 0; foreach (['current', 'persistent'] as $branch) { if ( isset($meta[$branch]['last_change']['ip']) && !$this->isExemptIp($meta[$branch]['last_change']['ip']) ) { $meta[$branch]['last_change']['ip'] = self::PLACEHOLDER_IP; $changed++; } } if ($changed === 0) return 0; if (!$mutate) return $changed; $this->atomicWrite($path, serialize($meta)); return $changed; } finally { if ($mutate) io_unlock($path); } } /* ----------------------------------------------------------------- * * Safe write helper * ----------------------------------------------------------------- */ /** * Write $content to $path atomically, preserving the original mtime. * * The caller must already hold io_lock($path) when mutating to prevent * concurrent writes from being lost by the rename. * * @param string $path * @param string $content * @throws RuntimeException on any unrecoverable failure */ protected function atomicWrite($path, $content) { $origMtime = filemtime($path); $tmp = $path . '.hideip_tmp_' . bin2hex(random_bytes(self::TMP_SUFFIX_BYTES)); $ok = file_put_contents($tmp, $content, LOCK_EX); if ($ok === false) { if (is_file($tmp)) unlink($tmp); throw new RuntimeException('failed to write temp file'); } // Copy permissions from the original so the rename doesn't change them. $origPerms = fileperms($path); if ($origPerms !== false) chmod($tmp, $origPerms & 0777); if (!rename($tmp, $path)) { if (is_file($tmp)) unlink($tmp); throw new RuntimeException('atomic rename failed'); } if ($origMtime !== false) touch($path, $origMtime); } /* ----------------------------------------------------------------- * * Presentation * ----------------------------------------------------------------- */ /** * Render the results table for a preview or scrub run. * * @param string $heading pre-translated heading string * @param array[] $results [section_label => [files, lines, errors]] * @param bool $wasScrub * @return void */ protected function renderResults($heading, array $results, $wasScrub) { echo '

' . hsc($heading) . '

'; $totalFiles = 0; $totalLines = 0; $totalErrors = 0; foreach ($results as $stats) { $totalFiles += $stats['files']; $totalLines += $stats['lines']; $totalErrors += count($stats['errors']); } if ($wasScrub) { echo '

' . sprintf($this->getLang('done_summary'), $totalLines, $totalFiles) . '

'; } else { echo '

' . sprintf($this->getLang('preview_summary'), $totalLines, $totalFiles) . '

'; } $colSlots = $wasScrub ? $this->getLang('col_slots_rewritten') : $this->getLang('col_slots_pending'); echo '' . '' . '' . '' . '' . ''; foreach ($results as $label => $stats) { echo '' . '' . '' . '' . '' . ''; } echo '
' . hsc($this->getLang('col_section')) . '' . hsc($this->getLang('col_files')) . '' . hsc($colSlots) . '' . hsc($this->getLang('col_errors')) . '
' . hsc($label) . '' . (int)$stats['files'] . '' . (int)$stats['lines'] . '' . count($stats['errors']) . '
'; if ($totalErrors > 0) { echo '

' . hsc($this->getLang('errors_heading')) . '

'; } } }