xref: /plugin/annotations/helper.php (revision 86c7806d6d41bce7c6d00acbee1316c62845cabb)
1<?php
2
3/**
4 * Annotations plugin — storage and data-logic helper.
5 *
6 * This component owns:
7 *
8 *   1. The per-page annotation store. One JSON file per page, obtained via
9 *      metaFN($id, '.annotations'), holding {version, annotations:[...]}.
10 *      JSON and pretty-printed so the files are easy to inspect or back up.
11 *      The page text and the wiki changelog are never touched.
12 *
13 *   2. The text-quote anchor model. Each annotation stores an anchor of
14 *      {exact, prefix, suffix, start} — the quoted text, a short slice of the
15 *      surrounding context on each side (to disambiguate repeated quotes),
16 *      and a character-offset hint. This is the Hypothes.is approach.
17 *
18 *   3. CRUD on annotations and their threaded replies.
19 *
20 *   4. Server-side orphan detection: a page is rendered to plain text and an
21 *      annotation is "orphaned" when its quoted text no longer appears. Used
22 *      by the admin-only per-page "clear orphaned" operation. (The live UI
23 *      also detects orphans client-side for the on-page counter.)
24 *
25 *   5. The permission rules, as the single source of truth. They are pure
26 *      functions: the caller gathers the facts (current user, admin flag, the
27 *      page's ACL level) and passes them in. Because annotations live
28 *      out-of-band, creating one needs only AUTH_READ on the page, never
29 *      AUTH_EDIT — so a group whose page edit access is blocked can still
30 *      annotate.
31 */
32
33// must be run within DokuWiki
34if (!defined('DOKU_INC')) die();
35
36class helper_plugin_annotations extends DokuWiki_Plugin
37{
38    /** storage schema version, written into each file */
39    const SCHEMA_VERSION = 1;
40
41    /** longest quoted selection stored, in characters */
42    const MAX_QUOTE = 1000;
43
44    /** length of the prefix/suffix context slices, in characters (config fallback) */
45    const DEFAULT_CONTEXT = 64;
46
47    /** longest annotation/reply body, in characters (config fallback) */
48    const DEFAULT_BODY = 10000;
49
50    /**
51     * Configured length of each prefix/suffix context slice, in characters.
52     *
53     * @return int
54     */
55    protected function contextLength()
56    {
57        $v = (int) $this->getConf('context_length');
58        return $v >= 0 ? $v : self::DEFAULT_CONTEXT;
59    }
60
61    /**
62     * Configured maximum annotation/reply body length, in characters.
63     *
64     * @return int
65     */
66    protected function bodyCap()
67    {
68        $v = (int) $this->getConf('body_cap');
69        return $v > 0 ? $v : self::DEFAULT_BODY;
70    }
71
72    // ---------------------------------------------------------------------
73    //  Storage
74    // ---------------------------------------------------------------------
75
76    /**
77     * Path of a page's annotation file.
78     *
79     * @param string $id page id
80     * @return string
81     */
82    protected function getFile($id)
83    {
84        return metaFN($id, '.annotations');
85    }
86
87    /**
88     * All annotations stored for a page.
89     *
90     * @param string $id page id
91     * @return array list of annotation arrays (empty if none)
92     */
93    public function getAnnotations($id)
94    {
95        $file = $this->getFile($id);
96        if (!file_exists($file)) {
97            return [];
98        }
99        $raw = io_readFile($file, false);
100        if ($raw === '') {
101            return [];
102        }
103        $data = json_decode($raw, true);
104        if (!is_array($data) || !isset($data['annotations']) || !is_array($data['annotations'])) {
105            return [];
106        }
107        return $data['annotations'];
108    }
109
110    /**
111     * A single annotation by id.
112     *
113     * @param string $id    page id
114     * @param string $annId annotation id
115     * @return array|null
116     */
117    public function getAnnotation($id, $annId)
118    {
119        foreach ($this->getAnnotations($id) as $a) {
120            if (($a['id'] ?? '') === $annId) {
121                return $a;
122            }
123        }
124        return null;
125    }
126
127    /**
128     * Counts for the on-page indicator. The orphan count is deliberately not
129     * here — it depends on the rendered page and is computed client-side.
130     *
131     * @param string $id page id
132     * @return array ['total'=>int, 'open'=>int, 'resolved'=>int]
133     */
134    public function getStats($id)
135    {
136        return $this->statsFor($this->getAnnotations($id));
137    }
138
139    /**
140     * Counts for the on-page indicator, computed from an already-loaded list.
141     * Split out from getStats() so callers that already hold the annotation
142     * array (e.g. the page-load JSINFO injector, which embeds the same list)
143     * don't re-read the file.
144     *
145     * @param array $annotations annotation list
146     * @return array ['total'=>int, 'open'=>int, 'resolved'=>int]
147     */
148    public function statsFor(array $annotations)
149    {
150        $open = 0;
151        $resolved = 0;
152        foreach ($annotations as $a) {
153            if (($a['status'] ?? 'open') === 'resolved') {
154                $resolved++;
155            } else {
156                $open++;
157            }
158        }
159        return ['total' => $open + $resolved, 'open' => $open, 'resolved' => $resolved];
160    }
161
162    /**
163     * Write a page's annotation list to disk.
164     *
165     * @param string $id   page id
166     * @param array  $list annotations
167     * @return bool
168     */
169    protected function writeFile($id, array $list)
170    {
171        $payload = [
172            'version'     => self::SCHEMA_VERSION,
173            'annotations' => array_values($list),
174        ];
175        return (bool) io_saveFile(
176            $this->getFile($id),
177            json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)
178        );
179    }
180
181    /**
182     * Run a modification against a page's annotations under a write lock.
183     *
184     * The modifier receives the annotation list by reference and returns an
185     * outcome value. Returning the boolean false aborts the write (used for
186     * "target not found"); any other value is returned to the caller after a
187     * successful save.
188     *
189     * @param string   $id       page id
190     * @param callable $modifier function(array &$annotations): mixed
191     * @return mixed  the modifier's outcome on success, or false on failure
192     */
193    protected function mutate($id, callable $modifier)
194    {
195        $file = $this->getFile($id);
196        io_lock($file);
197
198        $annotations = $this->getAnnotations($id);
199        $outcome = $modifier($annotations);
200
201        if ($outcome === false) {
202            io_unlock($file);
203            return false;
204        }
205
206        $ok = $this->writeFile($id, $annotations);
207        io_unlock($file);
208        return $ok ? $outcome : false;
209    }
210
211    // ---------------------------------------------------------------------
212    //  Annotation CRUD
213    // ---------------------------------------------------------------------
214
215    /**
216     * Create an annotation.
217     *
218     * @param string $id     page id
219     * @param array  $anchor raw anchor {exact, prefix, suffix, start}
220     * @param string $author username
221     * @param string $body   annotation text
222     * @return array|false  the created annotation, or false on invalid input
223     */
224    public function createAnnotation($id, $anchor, $author, $body)
225    {
226        if ($id === '' || $author === '' || $author === null) {
227            return false;
228        }
229        $body = $this->cleanBody($body);
230        if ($body === '') {
231            return false;
232        }
233        $anchor = $this->cleanAnchor($anchor);
234        if ($anchor === null) {
235            return false;
236        }
237
238        $now = time();
239        $new = [
240            'id'          => $this->newId(),
241            'anchor'      => $anchor,
242            'author'      => $author,
243            'created'     => $now,
244            'modified'    => $now,
245            'body'        => $body,
246            'status'      => 'open',
247            'resolved_by' => '',
248            'resolved_at' => 0,
249            'replies'     => [],
250        ];
251
252        return $this->mutate($id, function (array &$annotations) use ($new) {
253            $annotations[] = $new;
254            return $new;
255        });
256    }
257
258    /**
259     * Edit an annotation's body text.
260     *
261     * @param string $id    page id
262     * @param string $annId annotation id
263     * @param string $body  new text
264     * @return bool
265     */
266    public function updateAnnotationBody($id, $annId, $body)
267    {
268        $body = $this->cleanBody($body);
269        if ($body === '') {
270            return false;
271        }
272        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $body) {
273            foreach ($annotations as $i => $a) {
274                if (($a['id'] ?? '') === $annId) {
275                    $annotations[$i]['body']     = $body;
276                    $annotations[$i]['modified'] = time();
277                    return true;
278                }
279            }
280            return false;
281        });
282    }
283
284    /**
285     * Delete an annotation and all its replies.
286     *
287     * @param string $id    page id
288     * @param string $annId annotation id
289     * @return bool
290     */
291    public function deleteAnnotation($id, $annId)
292    {
293        return (bool) $this->mutate($id, function (array &$annotations) use ($annId) {
294            foreach ($annotations as $i => $a) {
295                if (($a['id'] ?? '') === $annId) {
296                    array_splice($annotations, $i, 1);
297                    return true;
298                }
299            }
300            return false;
301        });
302    }
303
304    /**
305     * Mark an annotation open or resolved.
306     *
307     * @param string $id     page id
308     * @param string $annId  annotation id
309     * @param string $status 'open' or 'resolved'
310     * @param string $actor  username making the change (recorded when resolving)
311     * @return bool
312     */
313    public function setStatus($id, $annId, $status, $actor)
314    {
315        if (!in_array($status, ['open', 'resolved'], true)) {
316            return false;
317        }
318        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $status, $actor) {
319            foreach ($annotations as $i => $a) {
320                if (($a['id'] ?? '') === $annId) {
321                    $annotations[$i]['status'] = $status;
322                    if ($status === 'resolved') {
323                        $annotations[$i]['resolved_by'] = $actor;
324                        $annotations[$i]['resolved_at'] = time();
325                    } else {
326                        $annotations[$i]['resolved_by'] = '';
327                        $annotations[$i]['resolved_at'] = 0;
328                    }
329                    return true;
330                }
331            }
332            return false;
333        });
334    }
335
336    // ---------------------------------------------------------------------
337    //  Reply CRUD
338    // ---------------------------------------------------------------------
339
340    /**
341     * Add a reply to an annotation.
342     *
343     * @param string $id       page id
344     * @param string $annId    annotation id
345     * @param string $author   username
346     * @param string $body     reply text
347     * @param string $parentId id of the reply being replied to, or '' for root-level
348     * @return array|false  the created reply, or false on invalid input
349     */
350    public function addReply($id, $annId, $author, $body, $parentId = '')
351    {
352        if ($author === '' || $author === null) {
353            return false;
354        }
355        $body = $this->cleanBody($body);
356        if ($body === '') {
357            return false;
358        }
359        $now = time();
360        $reply = [
361            'id'       => $this->newId(),
362            'parentId' => preg_replace('/[^a-f0-9]/', '', (string) $parentId),
363            'author'   => $author,
364            'created'  => $now,
365            'modified' => $now,
366            'body'     => $body,
367        ];
368
369        return $this->mutate($id, function (array &$annotations) use ($annId, $reply) {
370            foreach ($annotations as $i => $a) {
371                if (($a['id'] ?? '') === $annId) {
372                    $annotations[$i]['replies'][] = $reply;
373                    return $reply;
374                }
375            }
376            return false;
377        });
378    }
379
380    /**
381     * Edit a reply's body text.
382     *
383     * @param string $id      page id
384     * @param string $annId   annotation id
385     * @param string $replyId reply id
386     * @param string $body    new text
387     * @return bool
388     */
389    public function updateReply($id, $annId, $replyId, $body)
390    {
391        $body = $this->cleanBody($body);
392        if ($body === '') {
393            return false;
394        }
395        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId, $body) {
396            foreach ($annotations as $i => $a) {
397                if (($a['id'] ?? '') !== $annId) {
398                    continue;
399                }
400                foreach (($a['replies'] ?? []) as $j => $r) {
401                    if (($r['id'] ?? '') === $replyId) {
402                        $annotations[$i]['replies'][$j]['body']     = $body;
403                        $annotations[$i]['replies'][$j]['modified'] = time();
404                        return true;
405                    }
406                }
407            }
408            return false;
409        });
410    }
411
412    /**
413     * Delete a reply.
414     *
415     * @param string $id      page id
416     * @param string $annId   annotation id
417     * @param string $replyId reply id
418     * @return bool
419     */
420    public function deleteReply($id, $annId, $replyId)
421    {
422        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId) {
423            foreach ($annotations as $i => $a) {
424                if (($a['id'] ?? '') !== $annId) {
425                    continue;
426                }
427                foreach (($a['replies'] ?? []) as $j => $r) {
428                    if (($r['id'] ?? '') === $replyId) {
429                        array_splice($annotations[$i]['replies'], $j, 1);
430                        return true;
431                    }
432                }
433            }
434            return false;
435        });
436    }
437
438    // ---------------------------------------------------------------------
439    //  Bulk maintenance (admin, per page)
440    // ---------------------------------------------------------------------
441
442    /**
443     * Remove every resolved annotation from a page.
444     *
445     * @param string $id page id
446     * @return int|false number removed, or false on write failure
447     */
448    public function clearResolved($id)
449    {
450        if (empty($this->getAnnotations($id))) {
451            return 0;
452        }
453        return $this->mutate($id, function (array &$annotations) {
454            $before = count($annotations);
455            $annotations = array_values(array_filter($annotations, function ($a) {
456                return ($a['status'] ?? 'open') !== 'resolved';
457            }));
458            return $before - count($annotations);
459        });
460    }
461
462    /**
463     * Remove every orphaned annotation from a page — those whose quoted text
464     * no longer appears in the rendered page. The page is re-checked here, so
465     * this is authoritative regardless of what a client believed.
466     *
467     * @param string $id page id
468     * @return int|false number removed, or false on write failure
469     */
470    public function clearOrphaned($id)
471    {
472        $orphanIds = [];
473        foreach ($this->findOrphaned($id) as $a) {
474            $orphanIds[] = $a['id'];
475        }
476        if (empty($orphanIds)) {
477            return 0;
478        }
479        return $this->mutate($id, function (array &$annotations) use ($orphanIds) {
480            $before = count($annotations);
481            $annotations = array_values(array_filter($annotations, function ($a) use ($orphanIds) {
482                return !in_array($a['id'] ?? '', $orphanIds, true);
483            }));
484            return $before - count($annotations);
485        });
486    }
487
488    // ---------------------------------------------------------------------
489    //  Orphan detection
490    // ---------------------------------------------------------------------
491
492    /**
493     * Render a page to normalised plain text, for quote searching.
494     *
495     * Block-level closing tags become spaces so adjacent blocks do not fuse
496     * into one run of text; then tags are stripped, entities decoded, and
497     * whitespace collapsed — the same normalisation applied to stored quotes.
498     *
499     * @param string $id page id
500     * @return string
501     */
502    public function getPageText($id)
503    {
504        if (!page_exists($id)) {
505            return '';
506        }
507        $xhtml = p_wiki_xhtml($id, '', false);
508        if (!is_string($xhtml) || $xhtml === '') {
509            return '';
510        }
511        $xhtml = preg_replace('#</(p|div|li|h[1-6]|td|th|tr|blockquote|pre|dt|dd)>#i', ' ', $xhtml);
512        $xhtml = preg_replace('#<br\s*/?>#i', ' ', $xhtml);
513        $text  = strip_tags($xhtml);
514        $text  = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
515        return $this->normalizeWhitespace($text);
516    }
517
518    /**
519     * The annotations on a page whose quoted text is no longer present.
520     *
521     * @param string $id page id
522     * @return array list of orphaned annotation arrays
523     */
524    public function findOrphaned($id)
525    {
526        $annotations = $this->getAnnotations($id);
527        if (empty($annotations)) {
528            return [];
529        }
530        $pageText = $this->getPageText($id);
531
532        $orphaned = [];
533        foreach ($annotations as $a) {
534            $exact = $this->normalizeWhitespace($a['anchor']['exact'] ?? '');
535            if ($exact === '' || mb_strpos($pageText, $exact) === false) {
536                $orphaned[] = $a;
537            }
538        }
539        return $orphaned;
540    }
541
542    // ---------------------------------------------------------------------
543    //  Permission rules (single source of truth)
544    // ---------------------------------------------------------------------
545
546    /**
547     * May this user create an annotation, reply, or change a resolve status?
548     *
549     * Requires only read access to the page — annotations are out-of-band, so
550     * a user whose page edit access is blocked may still annotate.
551     *
552     * @param string $user     current username ('' for anonymous)
553     * @param int    $aclLevel the user's ACL level on the page
554     * @return bool
555     */
556    public function canAnnotate($user, $aclLevel)
557    {
558        return $user !== '' && $user !== null && $aclLevel >= AUTH_READ;
559    }
560
561    /**
562     * May this user edit or delete the given annotation? Author or admin.
563     *
564     * @param array  $annotation
565     * @param string $user
566     * @param bool   $isAdmin
567     * @return bool
568     */
569    public function canEditAnnotation(array $annotation, $user, $isAdmin)
570    {
571        if ($user === '' || $user === null) {
572            return false;
573        }
574        return $isAdmin || (($annotation['author'] ?? '') === $user);
575    }
576
577    /**
578     * May this user edit or delete the given reply? Author or admin.
579     *
580     * @param array  $reply
581     * @param string $user
582     * @param bool   $isAdmin
583     * @return bool
584     */
585    public function canEditReply(array $reply, $user, $isAdmin)
586    {
587        if ($user === '' || $user === null) {
588            return false;
589        }
590        return $isAdmin || (($reply['author'] ?? '') === $user);
591    }
592
593    /**
594     * May this user run the per-page "clear resolved/orphaned" operations?
595     * Admins only.
596     *
597     * @param bool $isAdmin
598     * @return bool
599     */
600    public function canClear($isAdmin)
601    {
602        return (bool) $isAdmin;
603    }
604
605    // ---------------------------------------------------------------------
606    //  Input cleaning
607    // ---------------------------------------------------------------------
608
609    /**
610     * Validate and normalise a raw anchor.
611     *
612     * @param mixed $anchor
613     * @return array|null  the cleaned anchor, or null if unusable
614     */
615    protected function cleanAnchor($anchor)
616    {
617        if (!is_array($anchor)) {
618            return null;
619        }
620
621        $exact = (isset($anchor['exact']) && is_string($anchor['exact']))
622            ? $this->normalizeWhitespace($anchor['exact'])
623            : '';
624        if ($exact === '') {
625            return null; // an anchor without quoted text is unusable
626        }
627        if (mb_strlen($exact) > self::MAX_QUOTE) {
628            $exact = mb_substr($exact, 0, self::MAX_QUOTE);
629        }
630
631        $prefix = (isset($anchor['prefix']) && is_string($anchor['prefix']))
632            ? $this->normalizeWhitespace($anchor['prefix'])
633            : '';
634        $suffix = (isset($anchor['suffix']) && is_string($anchor['suffix']))
635            ? $this->normalizeWhitespace($anchor['suffix'])
636            : '';
637        $ctx = $this->contextLength();
638        if (mb_strlen($prefix) > $ctx) {
639            $prefix = mb_substr($prefix, -$ctx);
640        }
641        if (mb_strlen($suffix) > $ctx) {
642            $suffix = mb_substr($suffix, 0, $ctx);
643        }
644
645        $start = isset($anchor['start']) ? max(0, (int) $anchor['start']) : 0;
646
647        return [
648            'exact'  => $exact,
649            'prefix' => $prefix,
650            'suffix' => $suffix,
651            'start'  => $start,
652        ];
653    }
654
655    /**
656     * Clean an annotation/reply body: a plain-text string, trimmed, with
657     * normalised line endings and a length cap. Newlines are kept; the text
658     * is escaped by the consumer at render time.
659     *
660     * @param mixed $body
661     * @return string
662     */
663    protected function cleanBody($body)
664    {
665        if (!is_string($body)) {
666            return '';
667        }
668        $body = str_replace("\r\n", "\n", $body);
669        $body = str_replace("\r", "\n", $body);
670        $body = trim($body);
671        $cap = $this->bodyCap();
672        if (mb_strlen($body) > $cap) {
673            $body = mb_substr($body, 0, $cap);
674        }
675        return $body;
676    }
677
678    /**
679     * Collapse every run of whitespace to a single space and trim.
680     *
681     * @param mixed $text
682     * @return string
683     */
684    protected function normalizeWhitespace($text)
685    {
686        return trim(preg_replace('/\s+/u', ' ', (string) $text));
687    }
688
689    /**
690     * A fresh identifier for an annotation or reply.
691     *
692     * @return string 16 hex characters
693     */
694    protected function newId()
695    {
696        return bin2hex(random_bytes(8));
697    }
698}
699