xref: /plugin/hideip/admin.php (revision e16e67b76da65c8320d1ffbb2454fd7dfac6c231)
1<?php
2if (!defined('DOKU_INC')) die();
3
4/**
5 * Hide IP — admin component.
6 *
7 * Admin-only page that walks the historical IP-bearing files DokuWiki has
8 * accumulated and rewrites every IP field with the placeholder used by the
9 * action component. Scope is intentionally narrow:
10 *
11 *   - $conf['metadir']/**.changes        page changelogs (per-page + master)
12 *   - $conf['mediametadir']/**.changes   media changelogs (per-media + master)
13 *   - $conf['metadir']/**.meta           page metadata (last_change.ip)
14 *
15 * NOT touched (per the project's explicit scope):
16 *   - data/attic/, data/media_attic/     historical .gz revision archives
17 *   - data/cache/, data/tmp/, data/log/  ephemeral / regenerated
18 *
19 * Authorship (user field) and timestamps (date field) are preserved; only
20 * the IP field is rewritten. File mtimes are preserved across the rewrite.
21 *
22 * Atomicity: every write goes to a sibling tmp file with a random suffix and
23 * is then rename()d into place. rename() is atomic on a single filesystem,
24 * so a concurrent reader either sees the old file or the new file.
25 *
26 * Concurrency: processChangelog() and processMetaFile() hold io_lock() across
27 * the full read-modify-write cycle when mutating, so concurrent DokuWiki
28 * changelog appends (which also use io_lock) are properly serialized.
29 *
30 * Idempotent: running scrub twice is a no-op on lines that already hold the
31 * placeholder.
32 */
33
34use dokuwiki\Extension\AdminPlugin;
35use dokuwiki\Form\Form;
36
37class admin_plugin_hideip extends AdminPlugin
38{
39    /** Mirror of action_plugin_hideip::PLACEHOLDER_IP. Kept inline so this
40     *  admin component can run without the action component being loaded. */
41    public const PLACEHOLDER_IP = '0.0.0.0';
42
43    /** DokuWiki's hardcoded "external edit" marker. Not a real visitor IP and
44     *  not something this plugin can intercept in real time — see isExemptIp(). */
45    public const LOOPBACK_IP = '127.0.0.1';
46
47    /** Random suffix length for tmp files; .hideip_tmp_<8 hex>. */
48    public const TMP_SUFFIX_BYTES = 4;
49
50    /**
51     * @return bool
52     */
53    public function forAdminOnly()
54    {
55        return true;
56    }
57
58    /**
59     * @return int
60     */
61    public function getMenuSort()
62    {
63        return 1000;
64    }
65
66    /**
67     * @param string $language
68     * @return string
69     */
70    public function getMenuText($language)
71    {
72        return $this->getLang('menu');
73    }
74
75    /* ----------------------------------------------------------------- *
76     *  Dispatch
77     * ----------------------------------------------------------------- */
78
79    /** @var array|null per-section preview results: [section => [files, ipLines]] */
80    protected $preview = null;
81
82    /** @var array|null per-section scrub results: [section => [files, ipLines, errors]] */
83    protected $scrub = null;
84
85    /**
86     * Process form submissions (preview and scrub actions).
87     *
88     * @return void
89     */
90    public function handle()
91    {
92        global $INPUT;
93
94        if (!$INPUT->has('hideip_action')) return;
95        if (!checkSecurityToken()) return;
96
97        $action = $INPUT->str('hideip_action');
98        if ($action !== 'preview' && $action !== 'scrub') return;
99
100        if ($action === 'scrub' && $INPUT->server->str('REQUEST_METHOD', 'GET') !== 'POST') {
101            msg($this->getLang('err_post_only'), -1);
102            return;
103        }
104
105        if ($action === 'preview') {
106            $this->preview = $this->runScan(false);
107        } else {
108            // Defense-in-depth admin re-check (framework already gates via
109            // forAdminOnly + isAccessibleByCurrentUser, but the scrub mutates
110            // production data; one more check is cheap).
111            if (!auth_isadmin()) {
112                msg($this->getLang('err_admin_required'), -1);
113                return;
114            }
115            $this->scrub = $this->runScan(true);
116        }
117    }
118
119    /**
120     * Render the admin page.
121     *
122     * @return void
123     */
124    public function html()
125    {
126        echo '<h1>' . hsc($this->getLang('menu')) . '</h1>';
127        echo '<p>'
128            . sprintf($this->getLang('intro_rewrite'), '<code>' . hsc(self::PLACEHOLDER_IP) . '</code>')
129            . '<br>'
130            . $this->getLang('intro_realtime')
131            . '<br>'
132            . $this->getLang('intro_preserved')
133            . '</p>';
134
135        echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">'
136            . '<strong>' . $this->getLang('warn_heading') . '</strong><br>'
137            . $this->getLang('warn_data') . '<br>'
138            . sprintf($this->getLang('warn_attic'), '<code>data/attic/</code>') . '<br>'
139            . $this->getLang('warn_backup')
140            . '</p>';
141
142        $this->renderForm();
143
144        if ($this->preview !== null) {
145            $this->renderResults($this->getLang('heading_preview'), $this->preview, false);
146        }
147        if ($this->scrub !== null) {
148            $this->renderResults($this->getLang('heading_scrub_done'), $this->scrub, true);
149        }
150    }
151
152    /* ----------------------------------------------------------------- *
153     *  Form
154     * ----------------------------------------------------------------- */
155
156    /**
157     * Render the preview/scrub action form.
158     *
159     * @return void
160     */
161    protected function renderForm()
162    {
163        $form = new Form(['method' => 'POST', 'id' => 'hideip_form']);
164        $form->setHiddenField('do', 'admin');
165        $form->setHiddenField('page', 'hideip');
166
167        $form->addTagOpen('p');
168        $form->addButton('hideip_action', $this->getLang('btn_preview'))->val('preview');
169        $form->addHTML(' &nbsp;&nbsp; ');
170        $form->addButton('hideip_action', $this->getLang('btn_scrub'))->val('scrub');
171        $form->addTagClose('p');
172
173        echo $form->toHTML();
174    }
175
176    /* ----------------------------------------------------------------- *
177     *  Scan/scrub orchestrator
178     * ----------------------------------------------------------------- */
179
180    /**
181     * Walk all target files and either count IP-bearing entries or rewrite them.
182     *
183     * @param bool $mutate  false = preview only, true = rewrite on disk
184     * @return array[]      [section_label => [files, lines, errors]]
185     */
186    protected function runScan($mutate)
187    {
188        global $conf;
189
190        if (function_exists('set_time_limit')) set_time_limit(0);
191        if (function_exists('ignore_user_abort')) ignore_user_abort(true);
192
193        $sections = [
194            $this->getLang('section_page_changes')  => [
195                'root' => $conf['metadir'],
196                'kind' => 'changes',
197            ],
198            $this->getLang('section_media_changes') => [
199                'root' => $conf['mediametadir'],
200                'kind' => 'changes',
201            ],
202            $this->getLang('section_page_meta')     => [
203                'root' => $conf['metadir'],
204                'kind' => 'meta',
205            ],
206        ];
207
208        $results = [];
209        foreach ($sections as $label => $cfg) {
210            $results[$label] = $this->walkSection($cfg['root'], $cfg['kind'], $mutate);
211        }
212        return $results;
213    }
214
215    /**
216     * Walk one section root, dispatching each candidate file to the right scrubber.
217     *
218     * @param string $root
219     * @param string $kind    'changes' or 'meta'
220     * @param bool   $mutate
221     * @return array{files:int,lines:int,errors:array}
222     */
223    protected function walkSection($root, $kind, $mutate)
224    {
225        $stats = ['files' => 0, 'lines' => 0, 'errors' => []];
226
227        if (!is_dir($root)) return $stats;
228
229        try {
230            $it = new RecursiveIteratorIterator(
231                new RecursiveDirectoryIterator(
232                    $root,
233                    FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS
234                ),
235                RecursiveIteratorIterator::LEAVES_ONLY
236            );
237        } catch (Exception $e) {
238            $stats['errors'][] = $root . ': ' . $e->getMessage();
239            return $stats;
240        }
241
242        foreach ($it as $info) {
243            $path = '?';
244            try {
245                if (!$info->isFile() || !$info->isReadable()) continue;
246                $path = $info->getPathname();
247                $base = basename($path);
248
249                // Filter by extension matching the section we're walking.
250                if ($kind === 'changes' && !str_ends_with($base, '.changes')) continue;
251                if ($kind === 'meta'    && !str_ends_with($base, '.meta'))    continue;
252
253                $count = ($kind === 'changes')
254                    ? $this->processChangelog($path, $mutate)
255                    : $this->processMetaFile($path, $mutate);
256
257                if ($count > 0) {
258                    $stats['files']++;
259                    $stats['lines'] += $count;
260                }
261            } catch (Exception $e) {
262                $stats['errors'][] = $path . ': ' . $e->getMessage();
263            }
264        }
265        return $stats;
266    }
267
268    /**
269     * Whether an IP value needs no action from the scrub.
270     *
271     * Three cases are exempt:
272     *   - the placeholder itself ('0.0.0.0') — already anonymised (idempotent);
273     *   - blank — already stripped by an older tool (e.g. the GDPR plugin);
274     *   - loopback '127.0.0.1' — DokuWiki hardcodes this as its "external edit"
275     *     marker (inc/ChangeLog/ChangeLog.php) whenever a page file's on-disk
276     *     mtime no longer matches its changelog. It is re-synthesised on every
277     *     view (page metadata) and on the next save (changelog) of such a page,
278     *     so rewriting it is a treadmill. It is also a loopback address, not a
279     *     real visitor IP, so it leaks nothing. We leave it untouched.
280     *
281     * @param string $ip
282     * @return bool
283     */
284    protected function isExemptIp($ip)
285    {
286        $ip = trim($ip);
287        return $ip === ''
288            || $ip === self::PLACEHOLDER_IP
289            || $ip === self::LOOPBACK_IP;
290    }
291
292    /* ----------------------------------------------------------------- *
293     *  Changelog (.changes) scrubber — TSV format
294     * ----------------------------------------------------------------- */
295
296    /**
297     * Process one .changes file.
298     *
299     * Line format (DokuWiki convention, tab-separated):
300     *   timestamp \t ip \t type \t pageid \t user \t summary \t extra \t sizechange \n
301     *
302     * The IP field is field index 1. We rewrite it to PLACEHOLDER_IP unless it
303     * already equals the placeholder (idempotent) or is empty (already scrubbed
304     * by an older tool like the GDPR plugin which blanked it).
305     *
306     * When mutating, io_lock() is held for the full read-modify-write cycle so
307     * concurrent changelog appends (which also use io_lock) are serialized.
308     *
309     * @param string $path
310     * @param bool   $mutate  false = count lines that would change, true = rewrite
311     * @return int            number of lines counted/changed
312     */
313    protected function processChangelog($path, $mutate)
314    {
315        if ($mutate) io_lock($path);
316        try {
317            $content = file_get_contents($path);
318            if ($content === false) {
319                throw new RuntimeException('cannot read');
320            }
321
322            // Use \n split so we can rejoin without modification. Trailing newline
323            // (if any) becomes an empty final element we filter when rebuilding.
324            $lines = explode("\n", $content);
325            $hadTrailingNewline = ($content !== '' && substr($content, -1) === "\n");
326            if ($hadTrailingNewline) array_pop($lines);   // drop the empty tail
327
328            $changed = 0;
329            foreach ($lines as $i => $line) {
330                if ($line === '') continue;                 // skip blank lines in-place
331                $fields = explode("\t", $line);
332                if (count($fields) < 2) continue;           // malformed; leave alone
333
334                $ip = $fields[1];
335                if ($this->isExemptIp($ip)) continue;       // placeholder, blank, or loopback marker
336
337                $fields[1] = self::PLACEHOLDER_IP;
338                $lines[$i] = implode("\t", $fields);
339                $changed++;
340            }
341
342            if ($changed === 0) return 0;
343            if (!$mutate) return $changed;
344
345            $newContent = implode("\n", $lines);
346            if ($hadTrailingNewline) $newContent .= "\n";
347
348            $this->atomicWrite($path, $newContent);
349            return $changed;
350        } finally {
351            if ($mutate) io_unlock($path);
352        }
353    }
354
355    /* ----------------------------------------------------------------- *
356     *  Page metadata (.meta) scrubber — PHP serialize format
357     * ----------------------------------------------------------------- */
358
359    /**
360     * Process one .meta file.
361     *
362     * .meta is a serialize()d ['current' => [...], 'persistent' => [...]]
363     * structure (see inc/parserutils.php::p_save_metadata). The IP can live
364     * under last_change.ip in either branch.
365     *
366     * When mutating, io_lock() is held for the full read-modify-write cycle so
367     * concurrent metadata saves (which also use io_lock) are serialized.
368     *
369     * @param string $path
370     * @param bool   $mutate
371     * @return int   number of ip slots changed (0..2 per file)
372     */
373    protected function processMetaFile($path, $mutate)
374    {
375        if ($mutate) io_lock($path);
376        try {
377            $raw = file_get_contents($path);
378            if ($raw === false) throw new RuntimeException('cannot read');
379            if ($raw === '')    return 0;
380
381            $meta = unserialize($raw, ['allowed_classes' => false]);
382            if (!is_array($meta)) return 0;   // corrupt or non-meta - leave alone
383
384            $changed = 0;
385            foreach (['current', 'persistent'] as $branch) {
386                if (
387                    isset($meta[$branch]['last_change']['ip'])
388                    && !$this->isExemptIp($meta[$branch]['last_change']['ip'])
389                ) {
390                    $meta[$branch]['last_change']['ip'] = self::PLACEHOLDER_IP;
391                    $changed++;
392                }
393            }
394
395            if ($changed === 0) return 0;
396            if (!$mutate) return $changed;
397
398            $this->atomicWrite($path, serialize($meta));
399            return $changed;
400        } finally {
401            if ($mutate) io_unlock($path);
402        }
403    }
404
405    /* ----------------------------------------------------------------- *
406     *  Safe write helper
407     * ----------------------------------------------------------------- */
408
409    /**
410     * Write $content to $path atomically, preserving the original mtime.
411     *
412     * The caller must already hold io_lock($path) when mutating to prevent
413     * concurrent writes from being lost by the rename.
414     *
415     * @param string $path
416     * @param string $content
417     * @throws RuntimeException on any unrecoverable failure
418     */
419    protected function atomicWrite($path, $content)
420    {
421        $origMtime = filemtime($path);
422        $tmp = $path . '.hideip_tmp_' . bin2hex(random_bytes(self::TMP_SUFFIX_BYTES));
423
424        $ok = file_put_contents($tmp, $content, LOCK_EX);
425        if ($ok === false) {
426            if (is_file($tmp)) unlink($tmp);
427            throw new RuntimeException('failed to write temp file');
428        }
429
430        // Copy permissions from the original so the rename doesn't change them.
431        $origPerms = fileperms($path);
432        if ($origPerms !== false) chmod($tmp, $origPerms & 0777);
433
434        if (!rename($tmp, $path)) {
435            if (is_file($tmp)) unlink($tmp);
436            throw new RuntimeException('atomic rename failed');
437        }
438
439        if ($origMtime !== false) touch($path, $origMtime);
440    }
441
442    /* ----------------------------------------------------------------- *
443     *  Presentation
444     * ----------------------------------------------------------------- */
445
446    /**
447     * Render the results table for a preview or scrub run.
448     *
449     * @param string  $heading    pre-translated heading string
450     * @param array[] $results    [section_label => [files, lines, errors]]
451     * @param bool    $wasScrub
452     * @return void
453     */
454    protected function renderResults($heading, array $results, $wasScrub)
455    {
456        echo '<h2>' . hsc($heading) . '</h2>';
457
458        $totalFiles  = 0;
459        $totalLines  = 0;
460        $totalErrors = 0;
461        foreach ($results as $stats) {
462            $totalFiles  += $stats['files'];
463            $totalLines  += $stats['lines'];
464            $totalErrors += count($stats['errors']);
465        }
466
467        if ($wasScrub) {
468            echo '<p>' . sprintf($this->getLang('done_summary'), $totalLines, $totalFiles) . '</p>';
469        } else {
470            echo '<p>' . sprintf($this->getLang('preview_summary'), $totalLines, $totalFiles) . '</p>';
471        }
472
473        $colSlots = $wasScrub
474            ? $this->getLang('col_slots_rewritten')
475            : $this->getLang('col_slots_pending');
476
477        echo '<table class="inline"><thead><tr>'
478            . '<th>' . hsc($this->getLang('col_section')) . '</th>'
479            . '<th>' . hsc($this->getLang('col_files')) . '</th>'
480            . '<th>' . hsc($colSlots) . '</th>'
481            . '<th>' . hsc($this->getLang('col_errors')) . '</th>'
482            . '</tr></thead><tbody>';
483        foreach ($results as $label => $stats) {
484            echo '<tr>'
485                . '<td>' . hsc($label) . '</td>'
486                . '<td style="text-align:right;">' . (int)$stats['files'] . '</td>'
487                . '<td style="text-align:right;">' . (int)$stats['lines'] . '</td>'
488                . '<td style="text-align:right;">' . count($stats['errors']) . '</td>'
489                . '</tr>';
490        }
491        echo '</tbody></table>';
492
493        if ($totalErrors > 0) {
494            echo '<h3>' . hsc($this->getLang('errors_heading')) . '</h3><ul>';
495            foreach ($results as $stats) {
496                foreach ($stats['errors'] as $err) {
497                    echo '<li><code>' . hsc($err) . '</code></li>';
498                }
499            }
500            echo '</ul>';
501        }
502    }
503}
504