xref: /plugin/hideip/admin.php (revision 047cf1274130bf2cc2a8d4fd101f0662c03da350)
1<?php
2if (!defined('DOKU_INC')) die();
3
4/**
5 * Hide IP — admin component.
6 *
7 * Admin-only page that walks the historical IP-bearing files DokuWiki has
8 * accumulated and rewrites every IP field with the placeholder used by the
9 * action component. Scope is intentionally narrow:
10 *
11 *   - $conf['metadir']/**.changes        page changelogs (per-page + master)
12 *   - $conf['mediametadir']/**.changes   media changelogs (per-media + master)
13 *   - $conf['metadir']/**.meta           page metadata (last_change.ip)
14 *
15 * NOT touched (per the project's explicit scope):
16 *   - data/attic/, data/media_attic/     historical .gz revision archives
17 *   - data/cache/, data/tmp/, data/log/  ephemeral / regenerated
18 *
19 * Authorship (user field) and timestamps (date field) are preserved; only
20 * the IP field is rewritten. File mtimes are preserved across the rewrite.
21 *
22 * Atomicity: every write goes to a sibling tmp file with a random suffix and
23 * is then rename()d into place. rename() is atomic on a single filesystem,
24 * so a concurrent reader either sees the old file or the new file.
25 *
26 * Idempotent: running scrub twice is a no-op on lines that already hold the
27 * placeholder.
28 */
29
30use dokuwiki\Extension\AdminPlugin;
31use dokuwiki\Form\Form;
32
33class admin_plugin_hideip extends AdminPlugin
34{
35    /** Mirror of action_plugin_hideip::PLACEHOLDER_IP. Kept inline so this
36     *  admin component can run without the action component being loaded. */
37    public const PLACEHOLDER_IP = '0.0.0.0';
38
39    /** Random suffix length for tmp files; .hideip_tmp_<8 hex>. */
40    public const TMP_SUFFIX_BYTES = 4;
41
42    /**
43     * @return bool
44     */
45    public function forAdminOnly()
46    {
47        return true;
48    }
49
50    /**
51     * @return int
52     */
53    public function getMenuSort()
54    {
55        return 1000;
56    }
57
58    /**
59     * @param string $language
60     * @return string
61     */
62    public function getMenuText($language)
63    {
64        return $this->getLang('menu');
65    }
66
67    /* ----------------------------------------------------------------- *
68     *  Dispatch
69     * ----------------------------------------------------------------- */
70
71    /** @var array|null per-section preview results: [section => [files, ipLines]] */
72    protected $preview = null;
73
74    /** @var array|null per-section scrub results: [section => [files, ipLines, errors]] */
75    protected $scrub = null;
76
77    /**
78     * Process form submissions (preview and scrub actions).
79     *
80     * @return void
81     */
82    public function handle()
83    {
84        global $INPUT;
85
86        if (!$INPUT->has('hideip_action')) return;
87        if (!checkSecurityToken()) return;
88
89        $action = $INPUT->str('hideip_action');
90        if ($action !== 'preview' && $action !== 'scrub') return;
91
92        if ($action === 'scrub' && $INPUT->server->str('REQUEST_METHOD', 'GET') !== 'POST') {
93            msg('Hide IP: scrub must be submitted via POST.', -1);
94            return;
95        }
96
97        if ($action === 'preview') {
98            $this->preview = $this->runScan(false);
99        } else {
100            // Defense-in-depth admin re-check (framework already gates via
101            // forAdminOnly + isAccessibleByCurrentUser, but the scrub mutates
102            // production data; one more check is cheap).
103            if (!auth_isadmin()) {
104                msg('Hide IP: admin access required.', -1);
105                return;
106            }
107            $this->scrub = $this->runScan(true);
108        }
109    }
110
111    /**
112     * Render the admin page.
113     *
114     * @return void
115     */
116    public function html()
117    {
118        echo '<h1>Hide IP</h1>';
119        echo '<p>This page rewrites historical IP addresses on disk to '
120            . '<code>' . hsc(self::PLACEHOLDER_IP) . '</code>.<br>New edits are already '
121            . 'anonymised by the action component of this plugin (loads on every request).<br>'
122            . 'Timestamps and authorship are preserved.</p>';
123
124        echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">'
125            . '<strong>This action is destructive.</strong><br>Real IP addresses recorded in '
126            . 'page and media changelogs and in page metadata will be replaced and cannot '
127            . 'be recovered from these files.<br>The <code>data/attic/</code> revision archives are '
128            . 'not modified — if your wiki retains those, IPs from saved revisions remain '
129            . 'inside them.<br>Take a backup with the Site Backup plugin first if you want '
130            . 'a recovery point.'
131            . '</p>';
132
133        $this->renderForm();
134
135        if ($this->preview !== null) {
136            $this->renderResults('Preview', $this->preview, false);
137        }
138        if ($this->scrub !== null) {
139            $this->renderResults('Scrub complete', $this->scrub, true);
140        }
141    }
142
143    /* ----------------------------------------------------------------- *
144     *  Form
145     * ----------------------------------------------------------------- */
146
147    /**
148     * Render the preview/scrub action form.
149     *
150     * @return void
151     */
152    protected function renderForm()
153    {
154        $form = new Form(['method' => 'POST', 'id' => 'hideip_form']);
155        $form->setHiddenField('do', 'admin');
156        $form->setHiddenField('page', 'hideip');
157
158        $form->addTagOpen('p');
159        $form->addButton('hideip_action', 'Preview (count only)')->val('preview');
160        $form->addHTML(' &nbsp;&nbsp; ');
161        $form->addButton('hideip_action', 'Scrub now')->val('scrub');
162        $form->addTagClose('p');
163
164        echo $form->toHTML();
165    }
166
167    /* ----------------------------------------------------------------- *
168     *  Scan/scrub orchestrator
169     * ----------------------------------------------------------------- */
170
171    /**
172     * Walk all target files and either count IP-bearing entries or rewrite them.
173     *
174     * @param bool $mutate  false = preview only, true = rewrite on disk
175     * @return array[]      [section_label => [files, lines, errors]]
176     */
177    protected function runScan($mutate)
178    {
179        global $conf;
180
181        if (function_exists('set_time_limit')) set_time_limit(0);
182        if (function_exists('ignore_user_abort')) ignore_user_abort(true);
183
184        $sections = [
185            'Page changelogs (data/meta/*.changes)' => [
186                'root' => $conf['metadir'],
187                'kind' => 'changes',
188            ],
189            'Media changelogs (data/media_meta/*.changes)' => [
190                'root' => $conf['mediametadir'],
191                'kind' => 'changes',
192            ],
193            'Page metadata (data/meta/*.meta)' => [
194                'root' => $conf['metadir'],
195                'kind' => 'meta',
196            ],
197        ];
198
199        $results = [];
200        foreach ($sections as $label => $cfg) {
201            $results[$label] = $this->walkSection($cfg['root'], $cfg['kind'], $mutate);
202        }
203        return $results;
204    }
205
206    /**
207     * Walk one section root, dispatching each candidate file to the right scrubber.
208     *
209     * @return array{files:int,lines:int,errors:array}
210     */
211    protected function walkSection($root, $kind, $mutate)
212    {
213        $stats = ['files' => 0, 'lines' => 0, 'errors' => []];
214
215        if (!is_dir($root)) return $stats;
216
217        try {
218            $it = new RecursiveIteratorIterator(
219                new RecursiveDirectoryIterator(
220                    $root,
221                    FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS
222                ),
223                RecursiveIteratorIterator::LEAVES_ONLY
224            );
225        } catch (Exception $e) {
226            $stats['errors'][] = $root . ': ' . $e->getMessage();
227            return $stats;
228        }
229
230        foreach ($it as $info) {
231            try {
232                if (!$info->isFile() || !$info->isReadable()) continue;
233                $path = $info->getPathname();
234                $base = basename($path);
235
236                // Filter by extension matching the section we're walking.
237                if ($kind === 'changes' && !str_ends_with($base, '.changes')) continue;
238                if ($kind === 'meta'    && !str_ends_with($base, '.meta'))    continue;
239
240                $count = ($kind === 'changes')
241                    ? $this->processChangelog($path, $mutate)
242                    : $this->processMetaFile($path, $mutate);
243
244                if ($count > 0) {
245                    $stats['files']++;
246                    $stats['lines'] += $count;
247                }
248            } catch (Exception $e) {
249                $stats['errors'][] = ($path ?? '?') . ': ' . $e->getMessage();
250            }
251        }
252        return $stats;
253    }
254
255    /* ----------------------------------------------------------------- *
256     *  Changelog (.changes) scrubber — TSV format
257     * ----------------------------------------------------------------- */
258
259    /**
260     * Process one .changes file.
261     *
262     * Line format (DokuWiki convention, tab-separated):
263     *   timestamp \t ip \t type \t pageid \t user \t summary \t extra \t sizechange \n
264     *
265     * The IP field is field index 1. We rewrite it to PLACEHOLDER_IP unless it
266     * already equals the placeholder (idempotent) or is empty (already scrubbed
267     * by an older tool like the GDPR plugin which blanked it).
268     *
269     * @param string $path
270     * @param bool   $mutate  false = count lines that would change, true = rewrite
271     * @return int            number of lines counted/changed
272     */
273    protected function processChangelog($path, $mutate)
274    {
275        $content = file_get_contents($path);
276        if ($content === false) {
277            throw new RuntimeException('cannot read');
278        }
279
280        // Use \n split so we can rejoin without modification. Trailing newline
281        // (if any) becomes an empty final element we filter when rebuilding.
282        $lines = explode("\n", $content);
283        $hadTrailingNewline = ($content !== '' && substr($content, -1) === "\n");
284        if ($hadTrailingNewline) array_pop($lines);   // drop the empty tail
285
286        $changed = 0;
287        foreach ($lines as $i => $line) {
288            if ($line === '') continue;                 // skip blank lines in-place
289            $fields = explode("\t", $line);
290            if (count($fields) < 2) continue;           // malformed; leave alone
291
292            $ip = $fields[1];
293            if ($ip === self::PLACEHOLDER_IP) continue; // already scrubbed
294            if (trim($ip) === '') continue;             // already blanked (GDPR-style)
295
296            $fields[1] = self::PLACEHOLDER_IP;
297            $lines[$i] = implode("\t", $fields);
298            $changed++;
299        }
300
301        if ($changed === 0) return 0;
302        if (!$mutate)       return $changed;
303
304        $newContent = implode("\n", $lines);
305        if ($hadTrailingNewline) $newContent .= "\n";
306
307        $this->atomicWrite($path, $newContent);
308        return $changed;
309    }
310
311    /* ----------------------------------------------------------------- *
312     *  Page metadata (.meta) scrubber — PHP serialize format
313     * ----------------------------------------------------------------- */
314
315    /**
316     * Process one .meta file.
317     *
318     * .meta is a serialize()d ['current' => [...], 'persistent' => [...]]
319     * structure (see inc/parserutils.php::p_save_metadata). The IP can live
320     * under last_change.ip in either branch.
321     *
322     * @param string $path
323     * @param bool   $mutate
324     * @return int   number of ip slots changed (0..2 per file)
325     */
326    protected function processMetaFile($path, $mutate)
327    {
328        $raw = file_get_contents($path);
329        if ($raw === false) throw new RuntimeException('cannot read');
330        if ($raw === '')    return 0;
331
332        $meta = unserialize($raw, ['allowed_classes' => false]);
333        if (!is_array($meta)) return 0;   // corrupt or non-meta - leave alone
334
335        $changed = 0;
336        foreach (['current', 'persistent'] as $branch) {
337            if (
338                isset($meta[$branch]['last_change']['ip'])
339                && $meta[$branch]['last_change']['ip'] !== self::PLACEHOLDER_IP
340            ) {
341                $meta[$branch]['last_change']['ip'] = self::PLACEHOLDER_IP;
342                $changed++;
343            }
344        }
345
346        if ($changed === 0) return 0;
347        if (!$mutate)       return $changed;
348
349        $this->atomicWrite($path, serialize($meta));
350        return $changed;
351    }
352
353    /* ----------------------------------------------------------------- *
354     *  Safe write helper
355     * ----------------------------------------------------------------- */
356
357    /**
358     * Write $content to $path atomically, preserving the original mtime.
359     *
360     * @throws RuntimeException on any unrecoverable failure
361     */
362    protected function atomicWrite($path, $content)
363    {
364        $origMtime = filemtime($path);
365        $tmp = $path . '.hideip_tmp_' . bin2hex(random_bytes(self::TMP_SUFFIX_BYTES));
366
367        $ok = file_put_contents($tmp, $content, LOCK_EX);
368        if ($ok === false) {
369            @unlink($tmp);
370            throw new RuntimeException('failed to write temp file');
371        }
372
373        // Copy permissions from the original so the rename doesn't change them.
374        $origPerms = fileperms($path);
375        if ($origPerms !== false) chmod($tmp, $origPerms & 0777);
376
377        if (!rename($tmp, $path)) {
378            @unlink($tmp);
379            throw new RuntimeException('atomic rename failed');
380        }
381
382        if ($origMtime !== false) touch($path, $origMtime);
383    }
384
385    /* ----------------------------------------------------------------- *
386     *  Presentation
387     * ----------------------------------------------------------------- */
388
389    /**
390     * Render the results table for a preview or scrub run.
391     *
392     * @param string  $heading
393     * @param array[] $results   [section_label => [files, lines, errors]]
394     * @param bool    $wasScrub
395     * @return void
396     */
397    protected function renderResults($heading, array $results, $wasScrub)
398    {
399        echo '<h2>' . hsc($heading) . '</h2>';
400
401        $totalFiles = 0;
402        $totalLines = 0;
403        $totalErrors = 0;
404        foreach ($results as $stats) {
405            $totalFiles  += $stats['files'];
406            $totalLines  += $stats['lines'];
407            $totalErrors += count($stats['errors']);
408        }
409
410        if ($wasScrub) {
411            echo '<p><strong>Done.</strong> Rewrote ' . (int)$totalLines
412                . ' IP slot(s) across ' . (int)$totalFiles . ' file(s).</p>';
413        } else {
414            echo '<p>Would rewrite ' . (int)$totalLines . ' IP slot(s) across '
415                . (int)$totalFiles . ' file(s).</p>';
416        }
417
418        echo '<table class="inline"><thead><tr>'
419            . '<th>Section</th>'
420            . '<th>Files affected</th>'
421            . '<th>IP slots ' . ($wasScrub ? 'rewritten' : 'to rewrite') . '</th>'
422            . '<th>Errors</th>'
423            . '</tr></thead><tbody>';
424        foreach ($results as $label => $stats) {
425            echo '<tr>'
426                . '<td>' . hsc($label) . '</td>'
427                . '<td style="text-align:right;">' . (int)$stats['files'] . '</td>'
428                . '<td style="text-align:right;">' . (int)$stats['lines'] . '</td>'
429                . '<td style="text-align:right;">' . count($stats['errors']) . '</td>'
430                . '</tr>';
431        }
432        echo '</tbody></table>';
433
434        if ($totalErrors > 0) {
435            echo '<h3>Errors</h3><ul>';
436            foreach ($results as $stats) {
437                foreach ($stats['errors'] as $err) {
438                    echo '<li><code>' . hsc($err) . '</code></li>';
439                }
440            }
441            echo '</ul>';
442        }
443    }
444}
445