xref: /plugin/hideip/admin.php (revision 4c7d65e99da3c1301827c480a6705f5e5b6083b7)
1<?php
2if (!defined('DOKU_INC')) die();
3
4/**
5 * Hide IP — admin component.
6 *
7 * Admin-only page that walks the historical IP-bearing files DokuWiki has
8 * accumulated and rewrites every IP field with the placeholder used by the
9 * action component. Scope is intentionally narrow:
10 *
11 *   - $conf['metadir']/**.changes        page changelogs (per-page + master)
12 *   - $conf['mediametadir']/**.changes   media changelogs (per-media + master)
13 *   - $conf['metadir']/**.meta           page metadata (last_change.ip)
14 *
15 * NOT touched (per the project's explicit scope):
16 *   - data/attic/, data/media_attic/     historical .gz revision archives
17 *   - data/cache/, data/tmp/, data/log/  ephemeral / regenerated
18 *
19 * Authorship (user field) and timestamps (date field) are preserved; only
20 * the IP field is rewritten. File mtimes are preserved across the rewrite.
21 *
22 * Atomicity: every write goes to a sibling tmp file with a random suffix and
23 * is then rename()d into place. rename() is atomic on a single filesystem,
24 * so a concurrent reader either sees the old file or the new file.
25 *
26 * Concurrency: processChangelog() and processMetaFile() hold io_lock() across
27 * the full read-modify-write cycle when mutating, so concurrent DokuWiki
28 * changelog appends (which also use io_lock) are properly serialized.
29 *
30 * Idempotent: running scrub twice is a no-op on lines that already hold the
31 * placeholder.
32 */
33
34use dokuwiki\Extension\AdminPlugin;
35use dokuwiki\Form\Form;
36
37class admin_plugin_hideip extends AdminPlugin
38{
39    /** Mirror of action_plugin_hideip::PLACEHOLDER_IP. Kept inline so this
40     *  admin component can run without the action component being loaded. */
41    public const PLACEHOLDER_IP = '0.0.0.0';
42
43    /** Random suffix length for tmp files; .hideip_tmp_<8 hex>. */
44    public const TMP_SUFFIX_BYTES = 4;
45
46    /**
47     * @return bool
48     */
49    public function forAdminOnly()
50    {
51        return true;
52    }
53
54    /**
55     * @return int
56     */
57    public function getMenuSort()
58    {
59        return 1000;
60    }
61
62    /**
63     * @param string $language
64     * @return string
65     */
66    public function getMenuText($language)
67    {
68        return $this->getLang('menu');
69    }
70
71    /* ----------------------------------------------------------------- *
72     *  Dispatch
73     * ----------------------------------------------------------------- */
74
75    /** @var array|null per-section preview results: [section => [files, ipLines]] */
76    protected $preview = null;
77
78    /** @var array|null per-section scrub results: [section => [files, ipLines, errors]] */
79    protected $scrub = null;
80
81    /**
82     * Process form submissions (preview and scrub actions).
83     *
84     * @return void
85     */
86    public function handle()
87    {
88        global $INPUT;
89
90        if (!$INPUT->has('hideip_action')) return;
91        if (!checkSecurityToken()) return;
92
93        $action = $INPUT->str('hideip_action');
94        if ($action !== 'preview' && $action !== 'scrub') return;
95
96        if ($action === 'scrub' && $INPUT->server->str('REQUEST_METHOD', 'GET') !== 'POST') {
97            msg($this->getLang('err_post_only'), -1);
98            return;
99        }
100
101        if ($action === 'preview') {
102            $this->preview = $this->runScan(false);
103        } else {
104            // Defense-in-depth admin re-check (framework already gates via
105            // forAdminOnly + isAccessibleByCurrentUser, but the scrub mutates
106            // production data; one more check is cheap).
107            if (!auth_isadmin()) {
108                msg($this->getLang('err_admin_required'), -1);
109                return;
110            }
111            $this->scrub = $this->runScan(true);
112        }
113    }
114
115    /**
116     * Render the admin page.
117     *
118     * @return void
119     */
120    public function html()
121    {
122        echo '<h1>' . hsc($this->getLang('menu')) . '</h1>';
123        echo '<p>'
124            . sprintf($this->getLang('intro_rewrite'), '<code>' . hsc(self::PLACEHOLDER_IP) . '</code>')
125            . '<br>'
126            . $this->getLang('intro_realtime')
127            . '<br>'
128            . $this->getLang('intro_preserved')
129            . '</p>';
130
131        echo '<p style="background:#fff3cd; border:1px solid #ffeeba; padding:8px; border-radius:4px;">'
132            . '<strong>' . $this->getLang('warn_heading') . '</strong><br>'
133            . $this->getLang('warn_data') . '<br>'
134            . sprintf($this->getLang('warn_attic'), '<code>data/attic/</code>') . '<br>'
135            . $this->getLang('warn_backup')
136            . '</p>';
137
138        $this->renderForm();
139
140        if ($this->preview !== null) {
141            $this->renderResults($this->getLang('heading_preview'), $this->preview, false);
142        }
143        if ($this->scrub !== null) {
144            $this->renderResults($this->getLang('heading_scrub_done'), $this->scrub, true);
145        }
146    }
147
148    /* ----------------------------------------------------------------- *
149     *  Form
150     * ----------------------------------------------------------------- */
151
152    /**
153     * Render the preview/scrub action form.
154     *
155     * @return void
156     */
157    protected function renderForm()
158    {
159        $form = new Form(['method' => 'POST', 'id' => 'hideip_form']);
160        $form->setHiddenField('do', 'admin');
161        $form->setHiddenField('page', 'hideip');
162
163        $form->addTagOpen('p');
164        $form->addButton('hideip_action', $this->getLang('btn_preview'))->val('preview');
165        $form->addHTML(' &nbsp;&nbsp; ');
166        $form->addButton('hideip_action', $this->getLang('btn_scrub'))->val('scrub');
167        $form->addTagClose('p');
168
169        echo $form->toHTML();
170    }
171
172    /* ----------------------------------------------------------------- *
173     *  Scan/scrub orchestrator
174     * ----------------------------------------------------------------- */
175
176    /**
177     * Walk all target files and either count IP-bearing entries or rewrite them.
178     *
179     * @param bool $mutate  false = preview only, true = rewrite on disk
180     * @return array[]      [section_label => [files, lines, errors]]
181     */
182    protected function runScan($mutate)
183    {
184        global $conf;
185
186        if (function_exists('set_time_limit')) set_time_limit(0);
187        if (function_exists('ignore_user_abort')) ignore_user_abort(true);
188
189        $sections = [
190            $this->getLang('section_page_changes')  => [
191                'root' => $conf['metadir'],
192                'kind' => 'changes',
193            ],
194            $this->getLang('section_media_changes') => [
195                'root' => $conf['mediametadir'],
196                'kind' => 'changes',
197            ],
198            $this->getLang('section_page_meta')     => [
199                'root' => $conf['metadir'],
200                'kind' => 'meta',
201            ],
202        ];
203
204        $results = [];
205        foreach ($sections as $label => $cfg) {
206            $results[$label] = $this->walkSection($cfg['root'], $cfg['kind'], $mutate);
207        }
208        return $results;
209    }
210
211    /**
212     * Walk one section root, dispatching each candidate file to the right scrubber.
213     *
214     * @param string $root
215     * @param string $kind    'changes' or 'meta'
216     * @param bool   $mutate
217     * @return array{files:int,lines:int,errors:array}
218     */
219    protected function walkSection($root, $kind, $mutate)
220    {
221        $stats = ['files' => 0, 'lines' => 0, 'errors' => []];
222
223        if (!is_dir($root)) return $stats;
224
225        try {
226            $it = new RecursiveIteratorIterator(
227                new RecursiveDirectoryIterator(
228                    $root,
229                    FilesystemIterator::SKIP_DOTS | FilesystemIterator::UNIX_PATHS
230                ),
231                RecursiveIteratorIterator::LEAVES_ONLY
232            );
233        } catch (Exception $e) {
234            $stats['errors'][] = $root . ': ' . $e->getMessage();
235            return $stats;
236        }
237
238        foreach ($it as $info) {
239            $path = '?';
240            try {
241                if (!$info->isFile() || !$info->isReadable()) continue;
242                $path = $info->getPathname();
243                $base = basename($path);
244
245                // Filter by extension matching the section we're walking.
246                if ($kind === 'changes' && !str_ends_with($base, '.changes')) continue;
247                if ($kind === 'meta'    && !str_ends_with($base, '.meta'))    continue;
248
249                $count = ($kind === 'changes')
250                    ? $this->processChangelog($path, $mutate)
251                    : $this->processMetaFile($path, $mutate);
252
253                if ($count > 0) {
254                    $stats['files']++;
255                    $stats['lines'] += $count;
256                }
257            } catch (Exception $e) {
258                $stats['errors'][] = $path . ': ' . $e->getMessage();
259            }
260        }
261        return $stats;
262    }
263
264    /* ----------------------------------------------------------------- *
265     *  Changelog (.changes) scrubber — TSV format
266     * ----------------------------------------------------------------- */
267
268    /**
269     * Process one .changes file.
270     *
271     * Line format (DokuWiki convention, tab-separated):
272     *   timestamp \t ip \t type \t pageid \t user \t summary \t extra \t sizechange \n
273     *
274     * The IP field is field index 1. We rewrite it to PLACEHOLDER_IP unless it
275     * already equals the placeholder (idempotent) or is empty (already scrubbed
276     * by an older tool like the GDPR plugin which blanked it).
277     *
278     * When mutating, io_lock() is held for the full read-modify-write cycle so
279     * concurrent changelog appends (which also use io_lock) are serialized.
280     *
281     * @param string $path
282     * @param bool   $mutate  false = count lines that would change, true = rewrite
283     * @return int            number of lines counted/changed
284     */
285    protected function processChangelog($path, $mutate)
286    {
287        if ($mutate) io_lock($path);
288        try {
289            $content = file_get_contents($path);
290            if ($content === false) {
291                throw new RuntimeException('cannot read');
292            }
293
294            // Use \n split so we can rejoin without modification. Trailing newline
295            // (if any) becomes an empty final element we filter when rebuilding.
296            $lines = explode("\n", $content);
297            $hadTrailingNewline = ($content !== '' && substr($content, -1) === "\n");
298            if ($hadTrailingNewline) array_pop($lines);   // drop the empty tail
299
300            $changed = 0;
301            foreach ($lines as $i => $line) {
302                if ($line === '') continue;                 // skip blank lines in-place
303                $fields = explode("\t", $line);
304                if (count($fields) < 2) continue;           // malformed; leave alone
305
306                $ip = $fields[1];
307                if ($ip === self::PLACEHOLDER_IP) continue; // already scrubbed
308                if (trim($ip) === '') continue;             // already blanked (GDPR-style)
309
310                $fields[1] = self::PLACEHOLDER_IP;
311                $lines[$i] = implode("\t", $fields);
312                $changed++;
313            }
314
315            if ($changed === 0) return 0;
316            if (!$mutate) return $changed;
317
318            $newContent = implode("\n", $lines);
319            if ($hadTrailingNewline) $newContent .= "\n";
320
321            $this->atomicWrite($path, $newContent);
322            return $changed;
323        } finally {
324            if ($mutate) io_unlock($path);
325        }
326    }
327
328    /* ----------------------------------------------------------------- *
329     *  Page metadata (.meta) scrubber — PHP serialize format
330     * ----------------------------------------------------------------- */
331
332    /**
333     * Process one .meta file.
334     *
335     * .meta is a serialize()d ['current' => [...], 'persistent' => [...]]
336     * structure (see inc/parserutils.php::p_save_metadata). The IP can live
337     * under last_change.ip in either branch.
338     *
339     * When mutating, io_lock() is held for the full read-modify-write cycle so
340     * concurrent metadata saves (which also use io_lock) are serialized.
341     *
342     * @param string $path
343     * @param bool   $mutate
344     * @return int   number of ip slots changed (0..2 per file)
345     */
346    protected function processMetaFile($path, $mutate)
347    {
348        if ($mutate) io_lock($path);
349        try {
350            $raw = file_get_contents($path);
351            if ($raw === false) throw new RuntimeException('cannot read');
352            if ($raw === '')    return 0;
353
354            $meta = unserialize($raw, ['allowed_classes' => false]);
355            if (!is_array($meta)) return 0;   // corrupt or non-meta - leave alone
356
357            $changed = 0;
358            foreach (['current', 'persistent'] as $branch) {
359                if (
360                    isset($meta[$branch]['last_change']['ip'])
361                    && $meta[$branch]['last_change']['ip'] !== self::PLACEHOLDER_IP
362                ) {
363                    $meta[$branch]['last_change']['ip'] = self::PLACEHOLDER_IP;
364                    $changed++;
365                }
366            }
367
368            if ($changed === 0) return 0;
369            if (!$mutate) return $changed;
370
371            $this->atomicWrite($path, serialize($meta));
372            return $changed;
373        } finally {
374            if ($mutate) io_unlock($path);
375        }
376    }
377
378    /* ----------------------------------------------------------------- *
379     *  Safe write helper
380     * ----------------------------------------------------------------- */
381
382    /**
383     * Write $content to $path atomically, preserving the original mtime.
384     *
385     * The caller must already hold io_lock($path) when mutating to prevent
386     * concurrent writes from being lost by the rename.
387     *
388     * @param string $path
389     * @param string $content
390     * @throws RuntimeException on any unrecoverable failure
391     */
392    protected function atomicWrite($path, $content)
393    {
394        $origMtime = filemtime($path);
395        $tmp = $path . '.hideip_tmp_' . bin2hex(random_bytes(self::TMP_SUFFIX_BYTES));
396
397        $ok = file_put_contents($tmp, $content, LOCK_EX);
398        if ($ok === false) {
399            if (is_file($tmp)) unlink($tmp);
400            throw new RuntimeException('failed to write temp file');
401        }
402
403        // Copy permissions from the original so the rename doesn't change them.
404        $origPerms = fileperms($path);
405        if ($origPerms !== false) chmod($tmp, $origPerms & 0777);
406
407        if (!rename($tmp, $path)) {
408            if (is_file($tmp)) unlink($tmp);
409            throw new RuntimeException('atomic rename failed');
410        }
411
412        if ($origMtime !== false) touch($path, $origMtime);
413    }
414
415    /* ----------------------------------------------------------------- *
416     *  Presentation
417     * ----------------------------------------------------------------- */
418
419    /**
420     * Render the results table for a preview or scrub run.
421     *
422     * @param string  $heading    pre-translated heading string
423     * @param array[] $results    [section_label => [files, lines, errors]]
424     * @param bool    $wasScrub
425     * @return void
426     */
427    protected function renderResults($heading, array $results, $wasScrub)
428    {
429        echo '<h2>' . hsc($heading) . '</h2>';
430
431        $totalFiles  = 0;
432        $totalLines  = 0;
433        $totalErrors = 0;
434        foreach ($results as $stats) {
435            $totalFiles  += $stats['files'];
436            $totalLines  += $stats['lines'];
437            $totalErrors += count($stats['errors']);
438        }
439
440        if ($wasScrub) {
441            echo '<p>' . sprintf($this->getLang('done_summary'), $totalLines, $totalFiles) . '</p>';
442        } else {
443            echo '<p>' . sprintf($this->getLang('preview_summary'), $totalLines, $totalFiles) . '</p>';
444        }
445
446        $colSlots = $wasScrub
447            ? $this->getLang('col_slots_rewritten')
448            : $this->getLang('col_slots_pending');
449
450        echo '<table class="inline"><thead><tr>'
451            . '<th>' . hsc($this->getLang('col_section')) . '</th>'
452            . '<th>' . hsc($this->getLang('col_files')) . '</th>'
453            . '<th>' . hsc($colSlots) . '</th>'
454            . '<th>' . hsc($this->getLang('col_errors')) . '</th>'
455            . '</tr></thead><tbody>';
456        foreach ($results as $label => $stats) {
457            echo '<tr>'
458                . '<td>' . hsc($label) . '</td>'
459                . '<td style="text-align:right;">' . (int)$stats['files'] . '</td>'
460                . '<td style="text-align:right;">' . (int)$stats['lines'] . '</td>'
461                . '<td style="text-align:right;">' . count($stats['errors']) . '</td>'
462                . '</tr>';
463        }
464        echo '</tbody></table>';
465
466        if ($totalErrors > 0) {
467            echo '<h3>' . hsc($this->getLang('errors_heading')) . '</h3><ul>';
468            foreach ($results as $stats) {
469                foreach ($stats['errors'] as $err) {
470                    echo '<li><code>' . hsc($err) . '</code></li>';
471                }
472            }
473            echo '</ul>';
474        }
475    }
476}
477