xref: /plugin/annotations/helper.php (revision ee9dbf1506bc8a2e17701b4e3c1bc1caf77e1561)
1<?php
2
3/**
4 * Annotations plugin — storage and data-logic helper.
5 *
6 * This component owns:
7 *
8 *   1. The per-page annotation store. One JSON file per page, obtained via
9 *      metaFN($id, '.annotations'), holding {version, annotations:[...]}.
10 *      JSON and pretty-printed so the files are easy to inspect or back up.
11 *      The page text and the wiki changelog are never touched.
12 *
13 *   2. The text-quote anchor model. Each annotation stores an anchor of
14 *      {exact, prefix, suffix, start} — the quoted text, a short slice of the
15 *      surrounding context on each side (to disambiguate repeated quotes),
16 *      and a character-offset hint. This is the Hypothes.is approach.
17 *
18 *   3. CRUD on annotations and their threaded replies.
19 *
20 *   4. Server-side orphan detection: a page is rendered to plain text and an
21 *      annotation is "orphaned" when its quoted text no longer appears. Used
22 *      by the admin-only per-page "clear orphaned" operation. (The live UI
23 *      also detects orphans client-side for the on-page counter.)
24 *
25 *   5. The permission rules, as the single source of truth. They are pure
26 *      functions: the caller gathers the facts (current user, admin flag, the
27 *      page's ACL level) and passes them in. Because annotations live
28 *      out-of-band, creating one needs only AUTH_READ on the page, never
29 *      AUTH_EDIT — so a group whose page edit access is blocked can still
30 *      annotate.
31 */
32
33// must be run within DokuWiki
34if (!defined('DOKU_INC')) die();
35
36class helper_plugin_annotations extends DokuWiki_Plugin
37{
38    /** storage schema version, written into each file */
39    const SCHEMA_VERSION = 1;
40
41    /** longest quoted selection stored, in characters */
42    const MAX_QUOTE = 1000;
43
44    /** length of the prefix/suffix context slices, in characters */
45    const MAX_CONTEXT = 64;
46
47    /** longest annotation/reply body, in characters */
48    const MAX_BODY = 10000;
49
50    // ---------------------------------------------------------------------
51    //  Storage
52    // ---------------------------------------------------------------------
53
54    /**
55     * Path of a page's annotation file.
56     *
57     * @param string $id page id
58     * @return string
59     */
60    protected function getFile($id)
61    {
62        return metaFN($id, '.annotations');
63    }
64
65    /**
66     * All annotations stored for a page.
67     *
68     * @param string $id page id
69     * @return array list of annotation arrays (empty if none)
70     */
71    public function getAnnotations($id)
72    {
73        $file = $this->getFile($id);
74        if (!file_exists($file)) {
75            return [];
76        }
77        $raw = io_readFile($file, false);
78        if ($raw === '') {
79            return [];
80        }
81        $data = json_decode($raw, true);
82        if (!is_array($data) || !isset($data['annotations']) || !is_array($data['annotations'])) {
83            return [];
84        }
85        return $data['annotations'];
86    }
87
88    /**
89     * A single annotation by id.
90     *
91     * @param string $id    page id
92     * @param string $annId annotation id
93     * @return array|null
94     */
95    public function getAnnotation($id, $annId)
96    {
97        foreach ($this->getAnnotations($id) as $a) {
98            if (($a['id'] ?? '') === $annId) {
99                return $a;
100            }
101        }
102        return null;
103    }
104
105    /**
106     * Counts for the on-page indicator. The orphan count is deliberately not
107     * here — it depends on the rendered page and is computed client-side.
108     *
109     * @param string $id page id
110     * @return array ['total'=>int, 'open'=>int, 'resolved'=>int]
111     */
112    public function getStats($id)
113    {
114        $open = 0;
115        $resolved = 0;
116        foreach ($this->getAnnotations($id) as $a) {
117            if (($a['status'] ?? 'open') === 'resolved') {
118                $resolved++;
119            } else {
120                $open++;
121            }
122        }
123        return ['total' => $open + $resolved, 'open' => $open, 'resolved' => $resolved];
124    }
125
126    /**
127     * Write a page's annotation list to disk.
128     *
129     * @param string $id   page id
130     * @param array  $list annotations
131     * @return bool
132     */
133    protected function writeFile($id, array $list)
134    {
135        $payload = [
136            'version'     => self::SCHEMA_VERSION,
137            'annotations' => array_values($list),
138        ];
139        return (bool) io_saveFile(
140            $this->getFile($id),
141            json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)
142        );
143    }
144
145    /**
146     * Run a modification against a page's annotations under a write lock.
147     *
148     * The modifier receives the annotation list by reference and returns an
149     * outcome value. Returning the boolean false aborts the write (used for
150     * "target not found"); any other value is returned to the caller after a
151     * successful save.
152     *
153     * @param string   $id       page id
154     * @param callable $modifier function(array &$annotations): mixed
155     * @return mixed  the modifier's outcome on success, or false on failure
156     */
157    protected function mutate($id, callable $modifier)
158    {
159        $file = $this->getFile($id);
160        io_lock($file);
161
162        $annotations = $this->getAnnotations($id);
163        $outcome = $modifier($annotations);
164
165        if ($outcome === false) {
166            io_unlock($file);
167            return false;
168        }
169
170        $ok = $this->writeFile($id, $annotations);
171        io_unlock($file);
172        return $ok ? $outcome : false;
173    }
174
175    // ---------------------------------------------------------------------
176    //  Annotation CRUD
177    // ---------------------------------------------------------------------
178
179    /**
180     * Create an annotation.
181     *
182     * @param string $id     page id
183     * @param array  $anchor raw anchor {exact, prefix, suffix, start}
184     * @param string $author username
185     * @param string $body   annotation text
186     * @return array|false  the created annotation, or false on invalid input
187     */
188    public function createAnnotation($id, $anchor, $author, $body)
189    {
190        if ($id === '' || $author === '' || $author === null) {
191            return false;
192        }
193        $body = $this->cleanBody($body);
194        if ($body === '') {
195            return false;
196        }
197        $anchor = $this->cleanAnchor($anchor);
198        if ($anchor === null) {
199            return false;
200        }
201
202        $now = time();
203        $new = [
204            'id'          => $this->newId(),
205            'anchor'      => $anchor,
206            'author'      => $author,
207            'created'     => $now,
208            'modified'    => $now,
209            'body'        => $body,
210            'status'      => 'open',
211            'resolved_by' => '',
212            'resolved_at' => 0,
213            'replies'     => [],
214        ];
215
216        return $this->mutate($id, function (array &$annotations) use ($new) {
217            $annotations[] = $new;
218            return $new;
219        });
220    }
221
222    /**
223     * Edit an annotation's body text.
224     *
225     * @param string $id    page id
226     * @param string $annId annotation id
227     * @param string $body  new text
228     * @return bool
229     */
230    public function updateAnnotationBody($id, $annId, $body)
231    {
232        $body = $this->cleanBody($body);
233        if ($body === '') {
234            return false;
235        }
236        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $body) {
237            foreach ($annotations as $i => $a) {
238                if (($a['id'] ?? '') === $annId) {
239                    $annotations[$i]['body']     = $body;
240                    $annotations[$i]['modified'] = time();
241                    return true;
242                }
243            }
244            return false;
245        });
246    }
247
248    /**
249     * Delete an annotation and all its replies.
250     *
251     * @param string $id    page id
252     * @param string $annId annotation id
253     * @return bool
254     */
255    public function deleteAnnotation($id, $annId)
256    {
257        return (bool) $this->mutate($id, function (array &$annotations) use ($annId) {
258            foreach ($annotations as $i => $a) {
259                if (($a['id'] ?? '') === $annId) {
260                    array_splice($annotations, $i, 1);
261                    return true;
262                }
263            }
264            return false;
265        });
266    }
267
268    /**
269     * Mark an annotation open or resolved.
270     *
271     * @param string $id     page id
272     * @param string $annId  annotation id
273     * @param string $status 'open' or 'resolved'
274     * @param string $actor  username making the change (recorded when resolving)
275     * @return bool
276     */
277    public function setStatus($id, $annId, $status, $actor)
278    {
279        if (!in_array($status, ['open', 'resolved'], true)) {
280            return false;
281        }
282        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $status, $actor) {
283            foreach ($annotations as $i => $a) {
284                if (($a['id'] ?? '') === $annId) {
285                    $annotations[$i]['status'] = $status;
286                    if ($status === 'resolved') {
287                        $annotations[$i]['resolved_by'] = $actor;
288                        $annotations[$i]['resolved_at'] = time();
289                    } else {
290                        $annotations[$i]['resolved_by'] = '';
291                        $annotations[$i]['resolved_at'] = 0;
292                    }
293                    return true;
294                }
295            }
296            return false;
297        });
298    }
299
300    // ---------------------------------------------------------------------
301    //  Reply CRUD
302    // ---------------------------------------------------------------------
303
304    /**
305     * Add a reply to an annotation.
306     *
307     * @param string $id       page id
308     * @param string $annId    annotation id
309     * @param string $author   username
310     * @param string $body     reply text
311     * @param string $parentId id of the reply being replied to, or '' for root-level
312     * @return array|false  the created reply, or false on invalid input
313     */
314    public function addReply($id, $annId, $author, $body, $parentId = '')
315    {
316        if ($author === '' || $author === null) {
317            return false;
318        }
319        $body = $this->cleanBody($body);
320        if ($body === '') {
321            return false;
322        }
323        $now = time();
324        $reply = [
325            'id'       => $this->newId(),
326            'parentId' => preg_replace('/[^a-f0-9]/', '', (string) $parentId),
327            'author'   => $author,
328            'created'  => $now,
329            'modified' => $now,
330            'body'     => $body,
331        ];
332
333        return $this->mutate($id, function (array &$annotations) use ($annId, $reply) {
334            foreach ($annotations as $i => $a) {
335                if (($a['id'] ?? '') === $annId) {
336                    $annotations[$i]['replies'][] = $reply;
337                    return $reply;
338                }
339            }
340            return false;
341        });
342    }
343
344    /**
345     * Edit a reply's body text.
346     *
347     * @param string $id      page id
348     * @param string $annId   annotation id
349     * @param string $replyId reply id
350     * @param string $body    new text
351     * @return bool
352     */
353    public function updateReply($id, $annId, $replyId, $body)
354    {
355        $body = $this->cleanBody($body);
356        if ($body === '') {
357            return false;
358        }
359        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId, $body) {
360            foreach ($annotations as $i => $a) {
361                if (($a['id'] ?? '') !== $annId) {
362                    continue;
363                }
364                foreach (($a['replies'] ?? []) as $j => $r) {
365                    if (($r['id'] ?? '') === $replyId) {
366                        $annotations[$i]['replies'][$j]['body']     = $body;
367                        $annotations[$i]['replies'][$j]['modified'] = time();
368                        return true;
369                    }
370                }
371            }
372            return false;
373        });
374    }
375
376    /**
377     * Delete a reply.
378     *
379     * @param string $id      page id
380     * @param string $annId   annotation id
381     * @param string $replyId reply id
382     * @return bool
383     */
384    public function deleteReply($id, $annId, $replyId)
385    {
386        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId) {
387            foreach ($annotations as $i => $a) {
388                if (($a['id'] ?? '') !== $annId) {
389                    continue;
390                }
391                foreach (($a['replies'] ?? []) as $j => $r) {
392                    if (($r['id'] ?? '') === $replyId) {
393                        array_splice($annotations[$i]['replies'], $j, 1);
394                        return true;
395                    }
396                }
397            }
398            return false;
399        });
400    }
401
402    // ---------------------------------------------------------------------
403    //  Bulk maintenance (admin, per page)
404    // ---------------------------------------------------------------------
405
406    /**
407     * Remove every resolved annotation from a page.
408     *
409     * @param string $id page id
410     * @return int|false number removed, or false on write failure
411     */
412    public function clearResolved($id)
413    {
414        if (empty($this->getAnnotations($id))) {
415            return 0;
416        }
417        return $this->mutate($id, function (array &$annotations) {
418            $before = count($annotations);
419            $annotations = array_values(array_filter($annotations, function ($a) {
420                return ($a['status'] ?? 'open') !== 'resolved';
421            }));
422            return $before - count($annotations);
423        });
424    }
425
426    /**
427     * Remove every orphaned annotation from a page — those whose quoted text
428     * no longer appears in the rendered page. The page is re-checked here, so
429     * this is authoritative regardless of what a client believed.
430     *
431     * @param string $id page id
432     * @return int|false number removed, or false on write failure
433     */
434    public function clearOrphaned($id)
435    {
436        $orphanIds = [];
437        foreach ($this->findOrphaned($id) as $a) {
438            $orphanIds[] = $a['id'];
439        }
440        if (empty($orphanIds)) {
441            return 0;
442        }
443        return $this->mutate($id, function (array &$annotations) use ($orphanIds) {
444            $before = count($annotations);
445            $annotations = array_values(array_filter($annotations, function ($a) use ($orphanIds) {
446                return !in_array($a['id'] ?? '', $orphanIds, true);
447            }));
448            return $before - count($annotations);
449        });
450    }
451
452    // ---------------------------------------------------------------------
453    //  Orphan detection
454    // ---------------------------------------------------------------------
455
456    /**
457     * Render a page to normalised plain text, for quote searching.
458     *
459     * Block-level closing tags become spaces so adjacent blocks do not fuse
460     * into one run of text; then tags are stripped, entities decoded, and
461     * whitespace collapsed — the same normalisation applied to stored quotes.
462     *
463     * @param string $id page id
464     * @return string
465     */
466    public function getPageText($id)
467    {
468        if (!page_exists($id)) {
469            return '';
470        }
471        $xhtml = p_wiki_xhtml($id, '', false);
472        if (!is_string($xhtml) || $xhtml === '') {
473            return '';
474        }
475        $xhtml = preg_replace('#</(p|div|li|h[1-6]|td|th|tr|blockquote|pre|dt|dd)>#i', ' ', $xhtml);
476        $xhtml = preg_replace('#<br\s*/?>#i', ' ', $xhtml);
477        $text  = strip_tags($xhtml);
478        $text  = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
479        return $this->normalizeWhitespace($text);
480    }
481
482    /**
483     * The annotations on a page whose quoted text is no longer present.
484     *
485     * @param string $id page id
486     * @return array list of orphaned annotation arrays
487     */
488    public function findOrphaned($id)
489    {
490        $annotations = $this->getAnnotations($id);
491        if (empty($annotations)) {
492            return [];
493        }
494        $pageText = $this->getPageText($id);
495
496        $orphaned = [];
497        foreach ($annotations as $a) {
498            $exact = $this->normalizeWhitespace($a['anchor']['exact'] ?? '');
499            if ($exact === '' || mb_strpos($pageText, $exact) === false) {
500                $orphaned[] = $a;
501            }
502        }
503        return $orphaned;
504    }
505
506    // ---------------------------------------------------------------------
507    //  Permission rules (single source of truth)
508    // ---------------------------------------------------------------------
509
510    /**
511     * May this user create an annotation, reply, or change a resolve status?
512     *
513     * Requires only read access to the page — annotations are out-of-band, so
514     * a user whose page edit access is blocked may still annotate.
515     *
516     * @param string $user     current username ('' for anonymous)
517     * @param int    $aclLevel the user's ACL level on the page
518     * @return bool
519     */
520    public function canAnnotate($user, $aclLevel)
521    {
522        return $user !== '' && $user !== null && $aclLevel >= AUTH_READ;
523    }
524
525    /**
526     * May this user edit or delete the given annotation? Author or admin.
527     *
528     * @param array  $annotation
529     * @param string $user
530     * @param bool   $isAdmin
531     * @return bool
532     */
533    public function canEditAnnotation(array $annotation, $user, $isAdmin)
534    {
535        if ($user === '' || $user === null) {
536            return false;
537        }
538        return $isAdmin || (($annotation['author'] ?? '') === $user);
539    }
540
541    /**
542     * May this user edit or delete the given reply? Author or admin.
543     *
544     * @param array  $reply
545     * @param string $user
546     * @param bool   $isAdmin
547     * @return bool
548     */
549    public function canEditReply(array $reply, $user, $isAdmin)
550    {
551        if ($user === '' || $user === null) {
552            return false;
553        }
554        return $isAdmin || (($reply['author'] ?? '') === $user);
555    }
556
557    /**
558     * May this user run the per-page "clear resolved/orphaned" operations?
559     * Admins only.
560     *
561     * @param bool $isAdmin
562     * @return bool
563     */
564    public function canClear($isAdmin)
565    {
566        return (bool) $isAdmin;
567    }
568
569    // ---------------------------------------------------------------------
570    //  Input cleaning
571    // ---------------------------------------------------------------------
572
573    /**
574     * Validate and normalise a raw anchor.
575     *
576     * @param mixed $anchor
577     * @return array|null  the cleaned anchor, or null if unusable
578     */
579    protected function cleanAnchor($anchor)
580    {
581        if (!is_array($anchor)) {
582            return null;
583        }
584
585        $exact = (isset($anchor['exact']) && is_string($anchor['exact']))
586            ? $this->normalizeWhitespace($anchor['exact'])
587            : '';
588        if ($exact === '') {
589            return null; // an anchor without quoted text is unusable
590        }
591        if (mb_strlen($exact) > self::MAX_QUOTE) {
592            $exact = mb_substr($exact, 0, self::MAX_QUOTE);
593        }
594
595        $prefix = (isset($anchor['prefix']) && is_string($anchor['prefix']))
596            ? $this->normalizeWhitespace($anchor['prefix'])
597            : '';
598        $suffix = (isset($anchor['suffix']) && is_string($anchor['suffix']))
599            ? $this->normalizeWhitespace($anchor['suffix'])
600            : '';
601        if (mb_strlen($prefix) > self::MAX_CONTEXT) {
602            $prefix = mb_substr($prefix, -self::MAX_CONTEXT);
603        }
604        if (mb_strlen($suffix) > self::MAX_CONTEXT) {
605            $suffix = mb_substr($suffix, 0, self::MAX_CONTEXT);
606        }
607
608        $start = isset($anchor['start']) ? max(0, (int) $anchor['start']) : 0;
609
610        return [
611            'exact'  => $exact,
612            'prefix' => $prefix,
613            'suffix' => $suffix,
614            'start'  => $start,
615        ];
616    }
617
618    /**
619     * Clean an annotation/reply body: a plain-text string, trimmed, with
620     * normalised line endings and a length cap. Newlines are kept; the text
621     * is escaped by the consumer at render time.
622     *
623     * @param mixed $body
624     * @return string
625     */
626    protected function cleanBody($body)
627    {
628        if (!is_string($body)) {
629            return '';
630        }
631        $body = str_replace("\r\n", "\n", $body);
632        $body = str_replace("\r", "\n", $body);
633        $body = trim($body);
634        if (mb_strlen($body) > self::MAX_BODY) {
635            $body = mb_substr($body, 0, self::MAX_BODY);
636        }
637        return $body;
638    }
639
640    /**
641     * Collapse every run of whitespace to a single space and trim.
642     *
643     * @param mixed $text
644     * @return string
645     */
646    protected function normalizeWhitespace($text)
647    {
648        return trim(preg_replace('/\s+/u', ' ', (string) $text));
649    }
650
651    /**
652     * A fresh identifier for an annotation or reply.
653     *
654     * @return string 16 hex characters
655     */
656    protected function newId()
657    {
658        return bin2hex(random_bytes(8));
659    }
660}
661