xref: /plugin/hideip/admin.php (revision 679c68affd3cd6e4ceb8699bdffea5948edad6d8)
1<?php
2/**
3 * Hide IP — admin component.
4 *
5 * Admin-only page that walks the historical IP-bearing files DokuWiki has
6 * accumulated and rewrites every IP field with the placeholder used by the
7 * action component. Scope is intentionally narrow:
8 *
9 *   - $conf['metadir']/**.changes        page changelogs (per-page + master)
10 *   - $conf['mediametadir']/**.changes   media changelogs (per-media + master)
11 *   - $conf['metadir']/**.meta           page metadata (last_change.ip)
12 *
13 * NOT touched (per the project's explicit scope):
14 *   - data/attic/, data/media_attic/     historical .gz revision archives
15 *   - data/cache/, data/tmp/, data/log/  ephemeral / regenerated
16 *
17 * Authorship (user field) and timestamps (date field) are preserved; only
18 * the IP field is rewritten. File mtimes are preserved across the rewrite.
19 *
20 * Atomicity: every write goes to a sibling tmp file with a random suffix and
21 * is then rename()d into place. rename() is atomic on a single filesystem,
22 * so a concurrent reader either sees the old file or the new file.
23 *
24 * Idempotent: running scrub twice is a no-op on lines that already hold the
25 * placeholder.
26 */
27
28use dokuwiki\Extension\AdminPlugin;
29use dokuwiki\Form\Form;
30
31class admin_plugin_hideip extends AdminPlugin
32{
33    /** Mirror of action_plugin_hideip::PLACEHOLDER_IP. Kept inline so this
34     *  admin component can run without the action component being loaded. */
35    const PLACEHOLDER_IP = '0.0.0.0';
36
37    /** Random suffix length for tmp files; .tmp_<8 hex>. */
38    const TMP_SUFFIX_BYTES = 4;
39
40    public function forAdminOnly()
41    {
42        return true;
43    }
44
45    public function getMenuSort()
46    {
47        return 1000;
48    }
49
50    public function getMenuText($language)
51    {
52        return 'Hide IP';
53    }
54
55    /* ----------------------------------------------------------------- *
56     *  Dispatch
57     * ----------------------------------------------------------------- */
58
59    /** @var array|null per-section preview results: [section => [files, ipLines]] */
60    protected $preview = null;
61
62    /** @var array|null per-section scrub results: [section => [files, ipLines, errors]] */
63    protected $scrub = null;
64
65    public function handle()
66    {
67        global $INPUT;
68
69        if (!$INPUT->has('hideip_action')) return;
70        if (!checkSecurityToken()) return;
71
72        $action = $INPUT->str('hideip_action');
73        if ($action !== 'preview' && $action !== 'scrub') return;
74
75        if ($action === 'scrub' && ($_SERVER['REQUEST_METHOD'] ?? 'GET') !== 'POST') {
76            msg('Hide IP: scrub must be submitted via POST.', -1);
77            return;
78        }
79
80        if ($action === 'preview') {
81            $this->preview = $this->runScan(false);
82        } else {
83            // Defense-in-depth admin re-check (framework already gates via
84            // forAdminOnly + isAccessibleByCurrentUser, but the scrub mutates
85            // production data; one more check is cheap).
86            if (!auth_isadmin()) {
87                msg('Hide IP: admin access required.', -1);
88                return;
89            }
90            $this->scrub = $this->runScan(true);
91        }
92    }
93
94    public function html()
95    {
96        echo '<h1>Hide IP</h1>';
97        echo '<p>This page rewrites historical IP addresses on disk to '
98            . '<code>' . hsc(self::PLACEHOLDER_IP) . '</code>.<br>New edits are already '
99            . 'anonymised by the action component of this plugin (loads on every request).<br>'
100            . 'Timestamps and authorship are preserved.</p>';
101
102        echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">'
103            . '<strong>This action is destructive.</strong><br>Real IP addresses recorded in '
104            . 'page and media changelogs and in page metadata will be replaced and cannot '
105            . 'be recovered from these files.<br>The <code>data/attic/</code> revision archives are '
106            . 'not modified — if your wiki retains those, IPs from saved revisions remain '
107            . 'inside them.<br>Take a backup with the Site Backup plugin first if you want '
108            . 'a recovery point.'
109            . '</p>';
110
111        $this->renderForm();
112
113        if ($this->preview !== null) {
114            $this->renderResults('Preview', $this->preview, false);
115        }
116        if ($this->scrub !== null) {
117            $this->renderResults('Scrub complete', $this->scrub, true);
118        }
119    }
120
121    /* ----------------------------------------------------------------- *
122     *  Form
123     * ----------------------------------------------------------------- */
124
125    protected function renderForm()
126    {
127        $form = new Form(['method' => 'POST', 'id' => 'hideip_form']);
128        $form->setHiddenField('do', 'admin');
129        $form->setHiddenField('page', 'hideip');
130
131        $form->addTagOpen('p');
132        $form->addButton('hideip_action', 'Preview (count only)')->val('preview');
133        $form->addHTML(' &nbsp;&nbsp; ');
134        $form->addButton('hideip_action', 'Scrub now')->val('scrub');
135        $form->addTagClose('p');
136
137        echo $form->toHTML();
138    }
139
140    /* ----------------------------------------------------------------- *
141     *  Scan/scrub orchestrator
142     * ----------------------------------------------------------------- */
143
144    /**
145     * Walk all target files and either count IP-bearing entries or rewrite them.
146     *
147     * @param bool $mutate  false = preview only, true = rewrite on disk
148     * @return array[]      [section_label => [files, lines, errors]]
149     */
150    protected function runScan($mutate)
151    {
152        global $conf;
153
154        @set_time_limit(0);
155        @ignore_user_abort(true);
156
157        $sections = [
158            'Page changelogs (data/meta/*.changes)' => [
159                'root' => $conf['metadir'],
160                'kind' => 'changes',
161            ],
162            'Media changelogs (data/media_meta/*.changes)' => [
163                'root' => $conf['mediametadir'],
164                'kind' => 'changes',
165            ],
166            'Page metadata (data/meta/*.meta)' => [
167                'root' => $conf['metadir'],
168                'kind' => 'meta',
169            ],
170        ];
171
172        $results = [];
173        foreach ($sections as $label => $cfg) {
174            $results[$label] = $this->walkSection($cfg['root'], $cfg['kind'], $mutate);
175        }
176        return $results;
177    }
178
179    /**
180     * Walk one section root, dispatching each candidate file to the right scrubber.
181     *
182     * @return array{files:int,lines:int,errors:array}
183     */
184    protected function walkSection($root, $kind, $mutate)
185    {
186        $stats = ['files' => 0, 'lines' => 0, 'errors' => []];
187
188        if (!is_dir($root)) return $stats;
189
190        try {
191            $it = new RecursiveIteratorIterator(
192                new RecursiveDirectoryIterator(
193                    $root,
194                    FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS
195                ),
196                RecursiveIteratorIterator::LEAVES_ONLY
197            );
198        } catch (Exception $e) {
199            $stats['errors'][] = $root . ': ' . $e->getMessage();
200            return $stats;
201        }
202
203        foreach ($it as $info) {
204            try {
205                if (!$info->isFile() || !$info->isReadable()) continue;
206                $path = $info->getPathname();
207                $base = basename($path);
208
209                // Filter by extension matching the section we're walking.
210                if ($kind === 'changes' && substr($base, -8) !== '.changes') continue;
211                if ($kind === 'meta'    && substr($base, -5) !== '.meta')    continue;
212
213                $count = ($kind === 'changes')
214                    ? $this->processChangelog($path, $mutate)
215                    : $this->processMetaFile($path, $mutate);
216
217                if ($count > 0) {
218                    $stats['files']++;
219                    $stats['lines'] += $count;
220                }
221            } catch (Exception $e) {
222                $stats['errors'][] = ($path ?? '?') . ': ' . $e->getMessage();
223            }
224        }
225        return $stats;
226    }
227
228    /* ----------------------------------------------------------------- *
229     *  Changelog (.changes) scrubber — TSV format
230     * ----------------------------------------------------------------- */
231
232    /**
233     * Process one .changes file.
234     *
235     * Line format (DokuWiki convention, tab-separated):
236     *   timestamp \t ip \t type \t pageid \t user \t summary \t extra \t sizechange \n
237     *
238     * The IP field is field index 1. We rewrite it to PLACEHOLDER_IP unless it
239     * already equals the placeholder (idempotent) or is empty (already scrubbed
240     * by an older tool like the GDPR plugin which blanked it).
241     *
242     * @param string $path
243     * @param bool   $mutate  false = count lines that would change, true = rewrite
244     * @return int            number of lines counted/changed
245     */
246    protected function processChangelog($path, $mutate)
247    {
248        $content = @file_get_contents($path);
249        if ($content === false) {
250            throw new RuntimeException('cannot read');
251        }
252
253        // Use \n split so we can rejoin without modification. Trailing newline
254        // (if any) becomes an empty final element we filter when rebuilding.
255        $lines = explode("\n", $content);
256        $hadTrailingNewline = ($content !== '' && substr($content, -1) === "\n");
257        if ($hadTrailingNewline) array_pop($lines);   // drop the empty tail
258
259        $changed = 0;
260        foreach ($lines as $i => $line) {
261            if ($line === '') continue;                 // skip blank lines in-place
262            $fields = explode("\t", $line);
263            if (count($fields) < 2) continue;           // malformed; leave alone
264
265            $ip = $fields[1];
266            if ($ip === self::PLACEHOLDER_IP) continue; // already scrubbed
267            if (trim($ip) === '') continue;             // already blanked (GDPR-style)
268
269            $fields[1] = self::PLACEHOLDER_IP;
270            $lines[$i] = implode("\t", $fields);
271            $changed++;
272        }
273
274        if ($changed === 0) return 0;
275        if (!$mutate)       return $changed;
276
277        $newContent = implode("\n", $lines);
278        if ($hadTrailingNewline) $newContent .= "\n";
279
280        $this->atomicWrite($path, $newContent);
281        return $changed;
282    }
283
284    /* ----------------------------------------------------------------- *
285     *  Page metadata (.meta) scrubber — PHP serialize format
286     * ----------------------------------------------------------------- */
287
288    /**
289     * Process one .meta file.
290     *
291     * .meta is a serialize()d ['current' => [...], 'persistent' => [...]]
292     * structure (see inc/parserutils.php::p_save_metadata). The IP can live
293     * under last_change.ip in either branch.
294     *
295     * @param string $path
296     * @param bool   $mutate
297     * @return int   number of ip slots changed (0..2 per file)
298     */
299    protected function processMetaFile($path, $mutate)
300    {
301        $raw = @file_get_contents($path);
302        if ($raw === false) throw new RuntimeException('cannot read');
303        if ($raw === '')    return 0;
304
305        // Use the standard error-silenced unserialize. Suppress notices because
306        // unknown classes inside the serialized data are not our problem here.
307        $meta = @unserialize($raw, ['allowed_classes' => false]);
308        if (!is_array($meta)) return 0;   // corrupt or non-meta - leave alone
309
310        $changed = 0;
311        foreach (['current', 'persistent'] as $branch) {
312            if (
313                isset($meta[$branch]['last_change']['ip'])
314                && $meta[$branch]['last_change']['ip'] !== self::PLACEHOLDER_IP
315            ) {
316                $meta[$branch]['last_change']['ip'] = self::PLACEHOLDER_IP;
317                $changed++;
318            }
319        }
320
321        if ($changed === 0) return 0;
322        if (!$mutate)       return $changed;
323
324        $this->atomicWrite($path, serialize($meta));
325        return $changed;
326    }
327
328    /* ----------------------------------------------------------------- *
329     *  Safe write helper
330     * ----------------------------------------------------------------- */
331
332    /**
333     * Write $content to $path atomically, preserving the original mtime.
334     *
335     * @throws RuntimeException on any unrecoverable failure
336     */
337    protected function atomicWrite($path, $content)
338    {
339        $origMtime = @filemtime($path);
340        $tmp = $path . '.hideip_tmp_' . bin2hex(random_bytes(self::TMP_SUFFIX_BYTES));
341
342        $ok = @file_put_contents($tmp, $content, LOCK_EX);
343        if ($ok === false) {
344            @unlink($tmp);
345            throw new RuntimeException('failed to write temp file');
346        }
347
348        // Copy permissions from the original so the rename doesn't change them.
349        $origPerms = @fileperms($path);
350        if ($origPerms !== false) @chmod($tmp, $origPerms & 0777);
351
352        if (!@rename($tmp, $path)) {
353            @unlink($tmp);
354            throw new RuntimeException('atomic rename failed');
355        }
356
357        if ($origMtime !== false) @touch($path, $origMtime);
358    }
359
360    /* ----------------------------------------------------------------- *
361     *  Presentation
362     * ----------------------------------------------------------------- */
363
364    protected function renderResults($heading, array $results, $wasScrub)
365    {
366        echo '<h2>' . hsc($heading) . '</h2>';
367
368        $totalFiles = 0;
369        $totalLines = 0;
370        $totalErrors = 0;
371        foreach ($results as $stats) {
372            $totalFiles  += $stats['files'];
373            $totalLines  += $stats['lines'];
374            $totalErrors += count($stats['errors']);
375        }
376
377        if ($wasScrub) {
378            echo '<p><strong>Done.</strong> Rewrote ' . (int)$totalLines
379                . ' IP slot(s) across ' . (int)$totalFiles . ' file(s).</p>';
380        } else {
381            echo '<p>Would rewrite ' . (int)$totalLines . ' IP slot(s) across '
382                . (int)$totalFiles . ' file(s).</p>';
383        }
384
385        echo '<table class="inline"><thead><tr>'
386            . '<th>Section</th>'
387            . '<th>Files affected</th>'
388            . '<th>IP slots ' . ($wasScrub ? 'rewritten' : 'to rewrite') . '</th>'
389            . '<th>Errors</th>'
390            . '</tr></thead><tbody>';
391        foreach ($results as $label => $stats) {
392            echo '<tr>'
393                . '<td>' . hsc($label) . '</td>'
394                . '<td style="text-align:right;">' . (int)$stats['files'] . '</td>'
395                . '<td style="text-align:right;">' . (int)$stats['lines'] . '</td>'
396                . '<td style="text-align:right;">' . count($stats['errors']) . '</td>'
397                . '</tr>';
398        }
399        echo '</tbody></table>';
400
401        if ($totalErrors > 0) {
402            echo '<h3>Errors</h3><ul>';
403            foreach ($results as $stats) {
404                foreach ($stats['errors'] as $err) {
405                    echo '<li><code>' . hsc($err) . '</code></li>';
406                }
407            }
408            echo '</ul>';
409        }
410    }
411}
412