xref: /plugin/annotations/helper.php (revision 43d2073c014d8cf78420fa47c6568a01e7249305)
1<?php
2
3/**
4 * Annotations plugin — storage and data-logic helper.
5 *
6 * This component owns:
7 *
8 *   1. The per-page annotation store. One JSON file per page, obtained via
9 *      metaFN($id, '.annotations'), holding {version, annotations:[...]}.
10 *      JSON and pretty-printed so the files are easy to inspect or back up.
11 *      The page text and the wiki changelog are never touched.
12 *
13 *   2. The text-quote anchor model. Each annotation stores an anchor of
14 *      {exact, prefix, suffix, start} — the quoted text, a short slice of the
15 *      surrounding context on each side (to disambiguate repeated quotes),
16 *      and a character-offset hint. This is the Hypothes.is approach.
17 *
18 *   3. CRUD on annotations and their threaded replies.
19 *
20 *   4. Server-side orphan detection: a page is rendered to plain text and an
21 *      annotation is "orphaned" when its quoted text no longer appears. Used
22 *      by the admin-only per-page "clear orphaned" operation. (The live UI
23 *      also detects orphans client-side for the on-page counter.)
24 *
25 *   5. The permission rules, as the single source of truth. They are pure
26 *      functions: the caller gathers the facts (current user, admin flag, the
27 *      page's ACL level) and passes them in. Because annotations live
28 *      out-of-band, creating one needs only AUTH_READ on the page, never
29 *      AUTH_EDIT — so a group whose page edit access is blocked can still
30 *      annotate.
31 */
32
33// must be run within DokuWiki
34if (!defined('DOKU_INC')) die();
35
36class helper_plugin_annotations extends DokuWiki_Plugin
37{
38    /** storage schema version, written into each file */
39    const SCHEMA_VERSION = 1;
40
41    /** longest quoted selection stored, in characters */
42    const MAX_QUOTE = 1000;
43
44    /** length of the prefix/suffix context slices, in characters */
45    const MAX_CONTEXT = 64;
46
47    /** longest annotation/reply body, in characters */
48    const MAX_BODY = 10000;
49
50    // ---------------------------------------------------------------------
51    //  Storage
52    // ---------------------------------------------------------------------
53
54    /**
55     * Path of a page's annotation file.
56     *
57     * @param string $id page id
58     * @return string
59     */
60    protected function getFile($id)
61    {
62        return metaFN($id, '.annotations');
63    }
64
65    /**
66     * All annotations stored for a page.
67     *
68     * @param string $id page id
69     * @return array list of annotation arrays (empty if none)
70     */
71    public function getAnnotations($id)
72    {
73        $file = $this->getFile($id);
74        if (!file_exists($file)) {
75            return [];
76        }
77        $raw = io_readFile($file, false);
78        if ($raw === '') {
79            return [];
80        }
81        $data = json_decode($raw, true);
82        if (!is_array($data) || !isset($data['annotations']) || !is_array($data['annotations'])) {
83            return [];
84        }
85        return $data['annotations'];
86    }
87
88    /**
89     * A single annotation by id.
90     *
91     * @param string $id    page id
92     * @param string $annId annotation id
93     * @return array|null
94     */
95    public function getAnnotation($id, $annId)
96    {
97        foreach ($this->getAnnotations($id) as $a) {
98            if (($a['id'] ?? '') === $annId) {
99                return $a;
100            }
101        }
102        return null;
103    }
104
105    /**
106     * Counts for the on-page indicator. The orphan count is deliberately not
107     * here — it depends on the rendered page and is computed client-side.
108     *
109     * @param string $id page id
110     * @return array ['total'=>int, 'open'=>int, 'resolved'=>int]
111     */
112    public function getStats($id)
113    {
114        $open = 0;
115        $resolved = 0;
116        foreach ($this->getAnnotations($id) as $a) {
117            if (($a['status'] ?? 'open') === 'resolved') {
118                $resolved++;
119            } else {
120                $open++;
121            }
122        }
123        return ['total' => $open + $resolved, 'open' => $open, 'resolved' => $resolved];
124    }
125
126    /**
127     * Write a page's annotation list to disk.
128     *
129     * @param string $id   page id
130     * @param array  $list annotations
131     * @return bool
132     */
133    protected function writeFile($id, array $list)
134    {
135        $payload = [
136            'version'     => self::SCHEMA_VERSION,
137            'annotations' => array_values($list),
138        ];
139        return (bool) io_saveFile($this->getFile($id), json_encode($payload, JSON_PRETTY_PRINT));
140    }
141
142    /**
143     * Run a modification against a page's annotations under a write lock.
144     *
145     * The modifier receives the annotation list by reference and returns an
146     * outcome value. Returning the boolean false aborts the write (used for
147     * "target not found"); any other value is returned to the caller after a
148     * successful save.
149     *
150     * @param string   $id       page id
151     * @param callable $modifier function(array &$annotations): mixed
152     * @return mixed  the modifier's outcome on success, or false on failure
153     */
154    protected function mutate($id, callable $modifier)
155    {
156        $file = $this->getFile($id);
157        io_lock($file);
158
159        $annotations = $this->getAnnotations($id);
160        $outcome = $modifier($annotations);
161
162        if ($outcome === false) {
163            io_unlock($file);
164            return false;
165        }
166
167        $ok = $this->writeFile($id, $annotations);
168        io_unlock($file);
169        return $ok ? $outcome : false;
170    }
171
172    // ---------------------------------------------------------------------
173    //  Annotation CRUD
174    // ---------------------------------------------------------------------
175
176    /**
177     * Create an annotation.
178     *
179     * @param string $id     page id
180     * @param array  $anchor raw anchor {exact, prefix, suffix, start}
181     * @param string $author username
182     * @param string $body   annotation text
183     * @return array|false  the created annotation, or false on invalid input
184     */
185    public function createAnnotation($id, $anchor, $author, $body)
186    {
187        if ($id === '' || $author === '' || $author === null) {
188            return false;
189        }
190        $body = $this->cleanBody($body);
191        if ($body === '') {
192            return false;
193        }
194        $anchor = $this->cleanAnchor($anchor);
195        if ($anchor === null) {
196            return false;
197        }
198
199        $now = time();
200        $new = [
201            'id'          => $this->newId(),
202            'anchor'      => $anchor,
203            'author'      => $author,
204            'created'     => $now,
205            'modified'    => $now,
206            'body'        => $body,
207            'status'      => 'open',
208            'resolved_by' => '',
209            'resolved_at' => 0,
210            'replies'     => [],
211        ];
212
213        return $this->mutate($id, function (array &$annotations) use ($new) {
214            $annotations[] = $new;
215            return $new;
216        });
217    }
218
219    /**
220     * Edit an annotation's body text.
221     *
222     * @param string $id    page id
223     * @param string $annId annotation id
224     * @param string $body  new text
225     * @return bool
226     */
227    public function updateAnnotationBody($id, $annId, $body)
228    {
229        $body = $this->cleanBody($body);
230        if ($body === '') {
231            return false;
232        }
233        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $body) {
234            foreach ($annotations as $i => $a) {
235                if (($a['id'] ?? '') === $annId) {
236                    $annotations[$i]['body']     = $body;
237                    $annotations[$i]['modified'] = time();
238                    return true;
239                }
240            }
241            return false;
242        });
243    }
244
245    /**
246     * Delete an annotation and all its replies.
247     *
248     * @param string $id    page id
249     * @param string $annId annotation id
250     * @return bool
251     */
252    public function deleteAnnotation($id, $annId)
253    {
254        return (bool) $this->mutate($id, function (array &$annotations) use ($annId) {
255            foreach ($annotations as $i => $a) {
256                if (($a['id'] ?? '') === $annId) {
257                    array_splice($annotations, $i, 1);
258                    return true;
259                }
260            }
261            return false;
262        });
263    }
264
265    /**
266     * Mark an annotation open or resolved.
267     *
268     * @param string $id     page id
269     * @param string $annId  annotation id
270     * @param string $status 'open' or 'resolved'
271     * @param string $actor  username making the change (recorded when resolving)
272     * @return bool
273     */
274    public function setStatus($id, $annId, $status, $actor)
275    {
276        if (!in_array($status, ['open', 'resolved'], true)) {
277            return false;
278        }
279        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $status, $actor) {
280            foreach ($annotations as $i => $a) {
281                if (($a['id'] ?? '') === $annId) {
282                    $annotations[$i]['status'] = $status;
283                    if ($status === 'resolved') {
284                        $annotations[$i]['resolved_by'] = $actor;
285                        $annotations[$i]['resolved_at'] = time();
286                    } else {
287                        $annotations[$i]['resolved_by'] = '';
288                        $annotations[$i]['resolved_at'] = 0;
289                    }
290                    return true;
291                }
292            }
293            return false;
294        });
295    }
296
297    // ---------------------------------------------------------------------
298    //  Reply CRUD
299    // ---------------------------------------------------------------------
300
301    /**
302     * Add a reply to an annotation.
303     *
304     * @param string $id     page id
305     * @param string $annId  annotation id
306     * @param string $author username
307     * @param string $body   reply text
308     * @return array|false  the created reply, or false on invalid input
309     */
310    public function addReply($id, $annId, $author, $body)
311    {
312        if ($author === '' || $author === null) {
313            return false;
314        }
315        $body = $this->cleanBody($body);
316        if ($body === '') {
317            return false;
318        }
319        $now = time();
320        $reply = [
321            'id'       => $this->newId(),
322            'author'   => $author,
323            'created'  => $now,
324            'modified' => $now,
325            'body'     => $body,
326        ];
327
328        return $this->mutate($id, function (array &$annotations) use ($annId, $reply) {
329            foreach ($annotations as $i => $a) {
330                if (($a['id'] ?? '') === $annId) {
331                    $annotations[$i]['replies'][] = $reply;
332                    return $reply;
333                }
334            }
335            return false;
336        });
337    }
338
339    /**
340     * Edit a reply's body text.
341     *
342     * @param string $id      page id
343     * @param string $annId   annotation id
344     * @param string $replyId reply id
345     * @param string $body    new text
346     * @return bool
347     */
348    public function updateReply($id, $annId, $replyId, $body)
349    {
350        $body = $this->cleanBody($body);
351        if ($body === '') {
352            return false;
353        }
354        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId, $body) {
355            foreach ($annotations as $i => $a) {
356                if (($a['id'] ?? '') !== $annId) {
357                    continue;
358                }
359                foreach (($a['replies'] ?? []) as $j => $r) {
360                    if (($r['id'] ?? '') === $replyId) {
361                        $annotations[$i]['replies'][$j]['body']     = $body;
362                        $annotations[$i]['replies'][$j]['modified'] = time();
363                        return true;
364                    }
365                }
366            }
367            return false;
368        });
369    }
370
371    /**
372     * Delete a reply.
373     *
374     * @param string $id      page id
375     * @param string $annId   annotation id
376     * @param string $replyId reply id
377     * @return bool
378     */
379    public function deleteReply($id, $annId, $replyId)
380    {
381        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId) {
382            foreach ($annotations as $i => $a) {
383                if (($a['id'] ?? '') !== $annId) {
384                    continue;
385                }
386                foreach (($a['replies'] ?? []) as $j => $r) {
387                    if (($r['id'] ?? '') === $replyId) {
388                        array_splice($annotations[$i]['replies'], $j, 1);
389                        return true;
390                    }
391                }
392            }
393            return false;
394        });
395    }
396
397    // ---------------------------------------------------------------------
398    //  Bulk maintenance (admin, per page)
399    // ---------------------------------------------------------------------
400
401    /**
402     * Remove every resolved annotation from a page.
403     *
404     * @param string $id page id
405     * @return int|false number removed, or false on write failure
406     */
407    public function clearResolved($id)
408    {
409        if (empty($this->getAnnotations($id))) {
410            return 0;
411        }
412        return $this->mutate($id, function (array &$annotations) {
413            $before = count($annotations);
414            $annotations = array_values(array_filter($annotations, function ($a) {
415                return ($a['status'] ?? 'open') !== 'resolved';
416            }));
417            return $before - count($annotations);
418        });
419    }
420
421    /**
422     * Remove every orphaned annotation from a page — those whose quoted text
423     * no longer appears in the rendered page. The page is re-checked here, so
424     * this is authoritative regardless of what a client believed.
425     *
426     * @param string $id page id
427     * @return int|false number removed, or false on write failure
428     */
429    public function clearOrphaned($id)
430    {
431        $orphanIds = [];
432        foreach ($this->findOrphaned($id) as $a) {
433            $orphanIds[] = $a['id'];
434        }
435        if (empty($orphanIds)) {
436            return 0;
437        }
438        return $this->mutate($id, function (array &$annotations) use ($orphanIds) {
439            $before = count($annotations);
440            $annotations = array_values(array_filter($annotations, function ($a) use ($orphanIds) {
441                return !in_array($a['id'] ?? '', $orphanIds, true);
442            }));
443            return $before - count($annotations);
444        });
445    }
446
447    // ---------------------------------------------------------------------
448    //  Orphan detection
449    // ---------------------------------------------------------------------
450
451    /**
452     * Render a page to normalised plain text, for quote searching.
453     *
454     * Block-level closing tags become spaces so adjacent blocks do not fuse
455     * into one run of text; then tags are stripped, entities decoded, and
456     * whitespace collapsed — the same normalisation applied to stored quotes.
457     *
458     * @param string $id page id
459     * @return string
460     */
461    public function getPageText($id)
462    {
463        if (!page_exists($id)) {
464            return '';
465        }
466        $xhtml = p_wiki_xhtml($id, '', false);
467        if (!is_string($xhtml) || $xhtml === '') {
468            return '';
469        }
470        $xhtml = preg_replace('#</(p|div|li|h[1-6]|td|th|tr|blockquote|pre|dt|dd)>#i', ' ', $xhtml);
471        $xhtml = preg_replace('#<br\s*/?>#i', ' ', $xhtml);
472        $text  = strip_tags($xhtml);
473        $text  = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
474        return $this->normalizeWhitespace($text);
475    }
476
477    /**
478     * The annotations on a page whose quoted text is no longer present.
479     *
480     * @param string $id page id
481     * @return array list of orphaned annotation arrays
482     */
483    public function findOrphaned($id)
484    {
485        $annotations = $this->getAnnotations($id);
486        if (empty($annotations)) {
487            return [];
488        }
489        $pageText = $this->getPageText($id);
490
491        $orphaned = [];
492        foreach ($annotations as $a) {
493            $exact = $this->normalizeWhitespace($a['anchor']['exact'] ?? '');
494            if ($exact === '' || mb_strpos($pageText, $exact) === false) {
495                $orphaned[] = $a;
496            }
497        }
498        return $orphaned;
499    }
500
501    // ---------------------------------------------------------------------
502    //  Permission rules (single source of truth)
503    // ---------------------------------------------------------------------
504
505    /**
506     * May this user create an annotation, reply, or change a resolve status?
507     *
508     * Requires only read access to the page — annotations are out-of-band, so
509     * a user whose page edit access is blocked may still annotate.
510     *
511     * @param string $user     current username ('' for anonymous)
512     * @param int    $aclLevel the user's ACL level on the page
513     * @return bool
514     */
515    public function canAnnotate($user, $aclLevel)
516    {
517        return $user !== '' && $user !== null && $aclLevel >= AUTH_READ;
518    }
519
520    /**
521     * May this user edit or delete the given annotation? Author or admin.
522     *
523     * @param array  $annotation
524     * @param string $user
525     * @param bool   $isAdmin
526     * @return bool
527     */
528    public function canEditAnnotation(array $annotation, $user, $isAdmin)
529    {
530        if ($user === '' || $user === null) {
531            return false;
532        }
533        return $isAdmin || (($annotation['author'] ?? '') === $user);
534    }
535
536    /**
537     * May this user edit or delete the given reply? Author or admin.
538     *
539     * @param array  $reply
540     * @param string $user
541     * @param bool   $isAdmin
542     * @return bool
543     */
544    public function canEditReply(array $reply, $user, $isAdmin)
545    {
546        if ($user === '' || $user === null) {
547            return false;
548        }
549        return $isAdmin || (($reply['author'] ?? '') === $user);
550    }
551
552    /**
553     * May this user run the per-page "clear resolved/orphaned" operations?
554     * Admins only.
555     *
556     * @param bool $isAdmin
557     * @return bool
558     */
559    public function canClear($isAdmin)
560    {
561        return (bool) $isAdmin;
562    }
563
564    // ---------------------------------------------------------------------
565    //  Input cleaning
566    // ---------------------------------------------------------------------
567
568    /**
569     * Validate and normalise a raw anchor.
570     *
571     * @param mixed $anchor
572     * @return array|null  the cleaned anchor, or null if unusable
573     */
574    protected function cleanAnchor($anchor)
575    {
576        if (!is_array($anchor)) {
577            return null;
578        }
579
580        $exact = (isset($anchor['exact']) && is_string($anchor['exact']))
581            ? $this->normalizeWhitespace($anchor['exact'])
582            : '';
583        if ($exact === '') {
584            return null; // an anchor without quoted text is unusable
585        }
586        if (mb_strlen($exact) > self::MAX_QUOTE) {
587            $exact = mb_substr($exact, 0, self::MAX_QUOTE);
588        }
589
590        $prefix = (isset($anchor['prefix']) && is_string($anchor['prefix']))
591            ? $this->normalizeWhitespace($anchor['prefix'])
592            : '';
593        $suffix = (isset($anchor['suffix']) && is_string($anchor['suffix']))
594            ? $this->normalizeWhitespace($anchor['suffix'])
595            : '';
596        if (mb_strlen($prefix) > self::MAX_CONTEXT) {
597            $prefix = mb_substr($prefix, -self::MAX_CONTEXT);
598        }
599        if (mb_strlen($suffix) > self::MAX_CONTEXT) {
600            $suffix = mb_substr($suffix, 0, self::MAX_CONTEXT);
601        }
602
603        $start = isset($anchor['start']) ? max(0, (int) $anchor['start']) : 0;
604
605        return [
606            'exact'  => $exact,
607            'prefix' => $prefix,
608            'suffix' => $suffix,
609            'start'  => $start,
610        ];
611    }
612
613    /**
614     * Clean an annotation/reply body: a plain-text string, trimmed, with
615     * normalised line endings and a length cap. Newlines are kept; the text
616     * is escaped by the consumer at render time.
617     *
618     * @param mixed $body
619     * @return string
620     */
621    protected function cleanBody($body)
622    {
623        if (!is_string($body)) {
624            return '';
625        }
626        $body = str_replace("\r\n", "\n", $body);
627        $body = str_replace("\r", "\n", $body);
628        $body = trim($body);
629        if (mb_strlen($body) > self::MAX_BODY) {
630            $body = mb_substr($body, 0, self::MAX_BODY);
631        }
632        return $body;
633    }
634
635    /**
636     * Collapse every run of whitespace to a single space and trim.
637     *
638     * @param mixed $text
639     * @return string
640     */
641    protected function normalizeWhitespace($text)
642    {
643        return trim(preg_replace('/\s+/u', ' ', (string) $text));
644    }
645
646    /**
647     * A fresh identifier for an annotation or reply.
648     *
649     * @return string 16 hex characters
650     */
651    protected function newId()
652    {
653        return bin2hex(random_bytes(8));
654    }
655}
656