xref: /plugin/annotations/helper.php (revision da56206cc13612db0df36be97c0f01d8f3c5e9f4)
1<?php
2
3/**
4 * Annotations plugin — storage and data-logic helper.
5 *
6 * This component owns:
7 *
8 *   1. The per-page annotation store. One JSON file per page, obtained via
9 *      metaFN($id, '.annotations'), holding {version, annotations:[...]}.
10 *      JSON and pretty-printed so the files are easy to inspect or back up.
11 *      The page text and the wiki changelog are never touched.
12 *
13 *   2. The text-quote anchor model. Each annotation stores an anchor of
14 *      {exact, prefix, suffix, start} — the quoted text, a short slice of the
15 *      surrounding context on each side (to disambiguate repeated quotes),
16 *      and a character-offset hint. This is the Hypothes.is approach.
17 *
18 *   3. CRUD on annotations and their threaded replies.
19 *
20 *   4. Server-side orphan detection: a page is rendered to plain text and an
21 *      annotation is "orphaned" when its quoted text no longer appears. Used
22 *      by the admin-only per-page "clear orphaned" operation. (The live UI
23 *      also detects orphans client-side for the on-page counter.)
24 *
25 *   5. The permission rules, as the single source of truth. They are pure
26 *      functions: the caller gathers the facts (current user, admin flag, the
27 *      page's ACL level) and passes them in. Because annotations live
28 *      out-of-band, creating one needs only AUTH_READ on the page, never
29 *      AUTH_EDIT — so a group whose page edit access is blocked can still
30 *      annotate.
31 */
32
33// must be run within DokuWiki
34if (!defined('DOKU_INC')) die();
35
36class helper_plugin_annotations extends DokuWiki_Plugin
37{
38    /** storage schema version, written into each file */
39    const SCHEMA_VERSION = 1;
40
41    /** longest quoted selection stored, in characters */
42    const MAX_QUOTE = 1000;
43
44    /** length of the prefix/suffix context slices, in characters */
45    const MAX_CONTEXT = 64;
46
47    /** longest annotation/reply body, in characters */
48    const MAX_BODY = 10000;
49
50    // ---------------------------------------------------------------------
51    //  Storage
52    // ---------------------------------------------------------------------
53
54    /**
55     * Path of a page's annotation file.
56     *
57     * @param string $id page id
58     * @return string
59     */
60    protected function getFile($id)
61    {
62        return metaFN($id, '.annotations');
63    }
64
65    /**
66     * All annotations stored for a page.
67     *
68     * @param string $id page id
69     * @return array list of annotation arrays (empty if none)
70     */
71    public function getAnnotations($id)
72    {
73        $file = $this->getFile($id);
74        if (!file_exists($file)) {
75            return [];
76        }
77        $raw = io_readFile($file, false);
78        if ($raw === '') {
79            return [];
80        }
81        $data = json_decode($raw, true);
82        if (!is_array($data) || !isset($data['annotations']) || !is_array($data['annotations'])) {
83            return [];
84        }
85        return $data['annotations'];
86    }
87
88    /**
89     * A single annotation by id.
90     *
91     * @param string $id    page id
92     * @param string $annId annotation id
93     * @return array|null
94     */
95    public function getAnnotation($id, $annId)
96    {
97        foreach ($this->getAnnotations($id) as $a) {
98            if (($a['id'] ?? '') === $annId) {
99                return $a;
100            }
101        }
102        return null;
103    }
104
105    /**
106     * Counts for the on-page indicator. The orphan count is deliberately not
107     * here — it depends on the rendered page and is computed client-side.
108     *
109     * @param string $id page id
110     * @return array ['total'=>int, 'open'=>int, 'resolved'=>int]
111     */
112    public function getStats($id)
113    {
114        $open = 0;
115        $resolved = 0;
116        foreach ($this->getAnnotations($id) as $a) {
117            if (($a['status'] ?? 'open') === 'resolved') {
118                $resolved++;
119            } else {
120                $open++;
121            }
122        }
123        return ['total' => $open + $resolved, 'open' => $open, 'resolved' => $resolved];
124    }
125
126    /**
127     * Write a page's annotation list to disk.
128     *
129     * @param string $id   page id
130     * @param array  $list annotations
131     * @return bool
132     */
133    protected function writeFile($id, array $list)
134    {
135        $payload = [
136            'version'     => self::SCHEMA_VERSION,
137            'annotations' => array_values($list),
138        ];
139        return (bool) io_saveFile(
140            $this->getFile($id),
141            json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)
142        );
143    }
144
145    /**
146     * Run a modification against a page's annotations under a write lock.
147     *
148     * The modifier receives the annotation list by reference and returns an
149     * outcome value. Returning the boolean false aborts the write (used for
150     * "target not found"); any other value is returned to the caller after a
151     * successful save.
152     *
153     * @param string   $id       page id
154     * @param callable $modifier function(array &$annotations): mixed
155     * @return mixed  the modifier's outcome on success, or false on failure
156     */
157    protected function mutate($id, callable $modifier)
158    {
159        $file = $this->getFile($id);
160        io_lock($file);
161
162        $annotations = $this->getAnnotations($id);
163        $outcome = $modifier($annotations);
164
165        if ($outcome === false) {
166            io_unlock($file);
167            return false;
168        }
169
170        $ok = $this->writeFile($id, $annotations);
171        io_unlock($file);
172        return $ok ? $outcome : false;
173    }
174
175    // ---------------------------------------------------------------------
176    //  Annotation CRUD
177    // ---------------------------------------------------------------------
178
179    /**
180     * Create an annotation.
181     *
182     * @param string $id     page id
183     * @param array  $anchor raw anchor {exact, prefix, suffix, start}
184     * @param string $author username
185     * @param string $body   annotation text
186     * @return array|false  the created annotation, or false on invalid input
187     */
188    public function createAnnotation($id, $anchor, $author, $body)
189    {
190        if ($id === '' || $author === '' || $author === null) {
191            return false;
192        }
193        $body = $this->cleanBody($body);
194        if ($body === '') {
195            return false;
196        }
197        $anchor = $this->cleanAnchor($anchor);
198        if ($anchor === null) {
199            return false;
200        }
201
202        $now = time();
203        $new = [
204            'id'          => $this->newId(),
205            'anchor'      => $anchor,
206            'author'      => $author,
207            'created'     => $now,
208            'modified'    => $now,
209            'body'        => $body,
210            'status'      => 'open',
211            'resolved_by' => '',
212            'resolved_at' => 0,
213            'replies'     => [],
214        ];
215
216        return $this->mutate($id, function (array &$annotations) use ($new) {
217            $annotations[] = $new;
218            return $new;
219        });
220    }
221
222    /**
223     * Edit an annotation's body text.
224     *
225     * @param string $id    page id
226     * @param string $annId annotation id
227     * @param string $body  new text
228     * @return bool
229     */
230    public function updateAnnotationBody($id, $annId, $body)
231    {
232        $body = $this->cleanBody($body);
233        if ($body === '') {
234            return false;
235        }
236        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $body) {
237            foreach ($annotations as $i => $a) {
238                if (($a['id'] ?? '') === $annId) {
239                    $annotations[$i]['body']     = $body;
240                    $annotations[$i]['modified'] = time();
241                    return true;
242                }
243            }
244            return false;
245        });
246    }
247
248    /**
249     * Delete an annotation and all its replies.
250     *
251     * @param string $id    page id
252     * @param string $annId annotation id
253     * @return bool
254     */
255    public function deleteAnnotation($id, $annId)
256    {
257        return (bool) $this->mutate($id, function (array &$annotations) use ($annId) {
258            foreach ($annotations as $i => $a) {
259                if (($a['id'] ?? '') === $annId) {
260                    array_splice($annotations, $i, 1);
261                    return true;
262                }
263            }
264            return false;
265        });
266    }
267
268    /**
269     * Mark an annotation open or resolved.
270     *
271     * @param string $id     page id
272     * @param string $annId  annotation id
273     * @param string $status 'open' or 'resolved'
274     * @param string $actor  username making the change (recorded when resolving)
275     * @return bool
276     */
277    public function setStatus($id, $annId, $status, $actor)
278    {
279        if (!in_array($status, ['open', 'resolved'], true)) {
280            return false;
281        }
282        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $status, $actor) {
283            foreach ($annotations as $i => $a) {
284                if (($a['id'] ?? '') === $annId) {
285                    $annotations[$i]['status'] = $status;
286                    if ($status === 'resolved') {
287                        $annotations[$i]['resolved_by'] = $actor;
288                        $annotations[$i]['resolved_at'] = time();
289                    } else {
290                        $annotations[$i]['resolved_by'] = '';
291                        $annotations[$i]['resolved_at'] = 0;
292                    }
293                    return true;
294                }
295            }
296            return false;
297        });
298    }
299
300    // ---------------------------------------------------------------------
301    //  Reply CRUD
302    // ---------------------------------------------------------------------
303
304    /**
305     * Add a reply to an annotation.
306     *
307     * @param string $id     page id
308     * @param string $annId  annotation id
309     * @param string $author username
310     * @param string $body   reply text
311     * @return array|false  the created reply, or false on invalid input
312     */
313    public function addReply($id, $annId, $author, $body)
314    {
315        if ($author === '' || $author === null) {
316            return false;
317        }
318        $body = $this->cleanBody($body);
319        if ($body === '') {
320            return false;
321        }
322        $now = time();
323        $reply = [
324            'id'       => $this->newId(),
325            'author'   => $author,
326            'created'  => $now,
327            'modified' => $now,
328            'body'     => $body,
329        ];
330
331        return $this->mutate($id, function (array &$annotations) use ($annId, $reply) {
332            foreach ($annotations as $i => $a) {
333                if (($a['id'] ?? '') === $annId) {
334                    $annotations[$i]['replies'][] = $reply;
335                    return $reply;
336                }
337            }
338            return false;
339        });
340    }
341
342    /**
343     * Edit a reply's body text.
344     *
345     * @param string $id      page id
346     * @param string $annId   annotation id
347     * @param string $replyId reply id
348     * @param string $body    new text
349     * @return bool
350     */
351    public function updateReply($id, $annId, $replyId, $body)
352    {
353        $body = $this->cleanBody($body);
354        if ($body === '') {
355            return false;
356        }
357        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId, $body) {
358            foreach ($annotations as $i => $a) {
359                if (($a['id'] ?? '') !== $annId) {
360                    continue;
361                }
362                foreach (($a['replies'] ?? []) as $j => $r) {
363                    if (($r['id'] ?? '') === $replyId) {
364                        $annotations[$i]['replies'][$j]['body']     = $body;
365                        $annotations[$i]['replies'][$j]['modified'] = time();
366                        return true;
367                    }
368                }
369            }
370            return false;
371        });
372    }
373
374    /**
375     * Delete a reply.
376     *
377     * @param string $id      page id
378     * @param string $annId   annotation id
379     * @param string $replyId reply id
380     * @return bool
381     */
382    public function deleteReply($id, $annId, $replyId)
383    {
384        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId) {
385            foreach ($annotations as $i => $a) {
386                if (($a['id'] ?? '') !== $annId) {
387                    continue;
388                }
389                foreach (($a['replies'] ?? []) as $j => $r) {
390                    if (($r['id'] ?? '') === $replyId) {
391                        array_splice($annotations[$i]['replies'], $j, 1);
392                        return true;
393                    }
394                }
395            }
396            return false;
397        });
398    }
399
400    // ---------------------------------------------------------------------
401    //  Bulk maintenance (admin, per page)
402    // ---------------------------------------------------------------------
403
404    /**
405     * Remove every resolved annotation from a page.
406     *
407     * @param string $id page id
408     * @return int|false number removed, or false on write failure
409     */
410    public function clearResolved($id)
411    {
412        if (empty($this->getAnnotations($id))) {
413            return 0;
414        }
415        return $this->mutate($id, function (array &$annotations) {
416            $before = count($annotations);
417            $annotations = array_values(array_filter($annotations, function ($a) {
418                return ($a['status'] ?? 'open') !== 'resolved';
419            }));
420            return $before - count($annotations);
421        });
422    }
423
424    /**
425     * Remove every orphaned annotation from a page — those whose quoted text
426     * no longer appears in the rendered page. The page is re-checked here, so
427     * this is authoritative regardless of what a client believed.
428     *
429     * @param string $id page id
430     * @return int|false number removed, or false on write failure
431     */
432    public function clearOrphaned($id)
433    {
434        $orphanIds = [];
435        foreach ($this->findOrphaned($id) as $a) {
436            $orphanIds[] = $a['id'];
437        }
438        if (empty($orphanIds)) {
439            return 0;
440        }
441        return $this->mutate($id, function (array &$annotations) use ($orphanIds) {
442            $before = count($annotations);
443            $annotations = array_values(array_filter($annotations, function ($a) use ($orphanIds) {
444                return !in_array($a['id'] ?? '', $orphanIds, true);
445            }));
446            return $before - count($annotations);
447        });
448    }
449
450    // ---------------------------------------------------------------------
451    //  Orphan detection
452    // ---------------------------------------------------------------------
453
454    /**
455     * Render a page to normalised plain text, for quote searching.
456     *
457     * Block-level closing tags become spaces so adjacent blocks do not fuse
458     * into one run of text; then tags are stripped, entities decoded, and
459     * whitespace collapsed — the same normalisation applied to stored quotes.
460     *
461     * @param string $id page id
462     * @return string
463     */
464    public function getPageText($id)
465    {
466        if (!page_exists($id)) {
467            return '';
468        }
469        $xhtml = p_wiki_xhtml($id, '', false);
470        if (!is_string($xhtml) || $xhtml === '') {
471            return '';
472        }
473        $xhtml = preg_replace('#</(p|div|li|h[1-6]|td|th|tr|blockquote|pre|dt|dd)>#i', ' ', $xhtml);
474        $xhtml = preg_replace('#<br\s*/?>#i', ' ', $xhtml);
475        $text  = strip_tags($xhtml);
476        $text  = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
477        return $this->normalizeWhitespace($text);
478    }
479
480    /**
481     * The annotations on a page whose quoted text is no longer present.
482     *
483     * @param string $id page id
484     * @return array list of orphaned annotation arrays
485     */
486    public function findOrphaned($id)
487    {
488        $annotations = $this->getAnnotations($id);
489        if (empty($annotations)) {
490            return [];
491        }
492        $pageText = $this->getPageText($id);
493
494        $orphaned = [];
495        foreach ($annotations as $a) {
496            $exact = $this->normalizeWhitespace($a['anchor']['exact'] ?? '');
497            if ($exact === '' || mb_strpos($pageText, $exact) === false) {
498                $orphaned[] = $a;
499            }
500        }
501        return $orphaned;
502    }
503
504    // ---------------------------------------------------------------------
505    //  Permission rules (single source of truth)
506    // ---------------------------------------------------------------------
507
508    /**
509     * May this user create an annotation, reply, or change a resolve status?
510     *
511     * Requires only read access to the page — annotations are out-of-band, so
512     * a user whose page edit access is blocked may still annotate.
513     *
514     * @param string $user     current username ('' for anonymous)
515     * @param int    $aclLevel the user's ACL level on the page
516     * @return bool
517     */
518    public function canAnnotate($user, $aclLevel)
519    {
520        return $user !== '' && $user !== null && $aclLevel >= AUTH_READ;
521    }
522
523    /**
524     * May this user edit or delete the given annotation? Author or admin.
525     *
526     * @param array  $annotation
527     * @param string $user
528     * @param bool   $isAdmin
529     * @return bool
530     */
531    public function canEditAnnotation(array $annotation, $user, $isAdmin)
532    {
533        if ($user === '' || $user === null) {
534            return false;
535        }
536        return $isAdmin || (($annotation['author'] ?? '') === $user);
537    }
538
539    /**
540     * May this user edit or delete the given reply? Author or admin.
541     *
542     * @param array  $reply
543     * @param string $user
544     * @param bool   $isAdmin
545     * @return bool
546     */
547    public function canEditReply(array $reply, $user, $isAdmin)
548    {
549        if ($user === '' || $user === null) {
550            return false;
551        }
552        return $isAdmin || (($reply['author'] ?? '') === $user);
553    }
554
555    /**
556     * May this user run the per-page "clear resolved/orphaned" operations?
557     * Admins only.
558     *
559     * @param bool $isAdmin
560     * @return bool
561     */
562    public function canClear($isAdmin)
563    {
564        return (bool) $isAdmin;
565    }
566
567    // ---------------------------------------------------------------------
568    //  Input cleaning
569    // ---------------------------------------------------------------------
570
571    /**
572     * Validate and normalise a raw anchor.
573     *
574     * @param mixed $anchor
575     * @return array|null  the cleaned anchor, or null if unusable
576     */
577    protected function cleanAnchor($anchor)
578    {
579        if (!is_array($anchor)) {
580            return null;
581        }
582
583        $exact = (isset($anchor['exact']) && is_string($anchor['exact']))
584            ? $this->normalizeWhitespace($anchor['exact'])
585            : '';
586        if ($exact === '') {
587            return null; // an anchor without quoted text is unusable
588        }
589        if (mb_strlen($exact) > self::MAX_QUOTE) {
590            $exact = mb_substr($exact, 0, self::MAX_QUOTE);
591        }
592
593        $prefix = (isset($anchor['prefix']) && is_string($anchor['prefix']))
594            ? $this->normalizeWhitespace($anchor['prefix'])
595            : '';
596        $suffix = (isset($anchor['suffix']) && is_string($anchor['suffix']))
597            ? $this->normalizeWhitespace($anchor['suffix'])
598            : '';
599        if (mb_strlen($prefix) > self::MAX_CONTEXT) {
600            $prefix = mb_substr($prefix, -self::MAX_CONTEXT);
601        }
602        if (mb_strlen($suffix) > self::MAX_CONTEXT) {
603            $suffix = mb_substr($suffix, 0, self::MAX_CONTEXT);
604        }
605
606        $start = isset($anchor['start']) ? max(0, (int) $anchor['start']) : 0;
607
608        return [
609            'exact'  => $exact,
610            'prefix' => $prefix,
611            'suffix' => $suffix,
612            'start'  => $start,
613        ];
614    }
615
616    /**
617     * Clean an annotation/reply body: a plain-text string, trimmed, with
618     * normalised line endings and a length cap. Newlines are kept; the text
619     * is escaped by the consumer at render time.
620     *
621     * @param mixed $body
622     * @return string
623     */
624    protected function cleanBody($body)
625    {
626        if (!is_string($body)) {
627            return '';
628        }
629        $body = str_replace("\r\n", "\n", $body);
630        $body = str_replace("\r", "\n", $body);
631        $body = trim($body);
632        if (mb_strlen($body) > self::MAX_BODY) {
633            $body = mb_substr($body, 0, self::MAX_BODY);
634        }
635        return $body;
636    }
637
638    /**
639     * Collapse every run of whitespace to a single space and trim.
640     *
641     * @param mixed $text
642     * @return string
643     */
644    protected function normalizeWhitespace($text)
645    {
646        return trim(preg_replace('/\s+/u', ' ', (string) $text));
647    }
648
649    /**
650     * A fresh identifier for an annotation or reply.
651     *
652     * @return string 16 hex characters
653     */
654    protected function newId()
655    {
656        return bin2hex(random_bytes(8));
657    }
658}
659