xref: /plugin/annotations/helper.php (revision 108f92bd856af52ccb9e86517ad03d96f4a9273a)
1<?php
2
3/**
4 * Annotations plugin — storage and data-logic helper.
5 *
6 * This component owns:
7 *
8 *   1. The per-page annotation store. One JSON file per page, obtained via
9 *      metaFN($id, '.annotations'), holding {version, annotations:[...]}.
10 *      JSON and pretty-printed so the files are easy to inspect or back up.
11 *      The page text and the wiki changelog are never touched.
12 *
13 *   2. The text-quote anchor model. Each annotation stores an anchor of
14 *      {exact, prefix, suffix, start} — the quoted text, a short slice of the
15 *      surrounding context on each side (to disambiguate repeated quotes),
16 *      and a character-offset hint. This is the Hypothes.is approach.
17 *
18 *   3. CRUD on annotations and their threaded replies.
19 *
20 *   4. Server-side orphan detection: a page is rendered to plain text and an
21 *      annotation is "orphaned" when its quoted text no longer appears. Used
22 *      by the admin-only per-page "clear orphaned" operation. (The live UI
23 *      also detects orphans client-side for the on-page counter.)
24 *
25 *   5. The permission rules, as the single source of truth. They are pure
26 *      functions: the caller gathers the facts (current user, admin flag, the
27 *      page's ACL level) and passes them in. Because annotations live
28 *      out-of-band, creating one needs only AUTH_READ on the page, never
29 *      AUTH_EDIT — so a group whose page edit access is blocked can still
30 *      annotate.
31 */
32
33// must be run within DokuWiki
34if (!defined('DOKU_INC')) die();
35
36class helper_plugin_annotations extends DokuWiki_Plugin
37{
38    /** storage schema version, written into each file */
39    const SCHEMA_VERSION = 1;
40
41    /** longest quoted selection stored, in characters */
42    const MAX_QUOTE = 1000;
43
44    /** length of the prefix/suffix context slices, in characters */
45    const MAX_CONTEXT = 64;
46
47    /** longest annotation/reply body, in characters */
48    const MAX_BODY = 10000;
49
50    // ---------------------------------------------------------------------
51    //  Storage
52    // ---------------------------------------------------------------------
53
54    /**
55     * Path of a page's annotation file.
56     *
57     * @param string $id page id
58     * @return string
59     */
60    protected function getFile($id)
61    {
62        return metaFN($id, '.annotations');
63    }
64
65    /**
66     * All annotations stored for a page.
67     *
68     * @param string $id page id
69     * @return array list of annotation arrays (empty if none)
70     */
71    public function getAnnotations($id)
72    {
73        $file = $this->getFile($id);
74        if (!file_exists($file)) {
75            return [];
76        }
77        $raw = io_readFile($file, false);
78        if ($raw === '') {
79            return [];
80        }
81        $data = json_decode($raw, true);
82        if (!is_array($data) || !isset($data['annotations']) || !is_array($data['annotations'])) {
83            return [];
84        }
85        return $data['annotations'];
86    }
87
88    /**
89     * A single annotation by id.
90     *
91     * @param string $id    page id
92     * @param string $annId annotation id
93     * @return array|null
94     */
95    public function getAnnotation($id, $annId)
96    {
97        foreach ($this->getAnnotations($id) as $a) {
98            if (($a['id'] ?? '') === $annId) {
99                return $a;
100            }
101        }
102        return null;
103    }
104
105    /**
106     * Counts for the on-page indicator. The orphan count is deliberately not
107     * here — it depends on the rendered page and is computed client-side.
108     *
109     * @param string $id page id
110     * @return array ['total'=>int, 'open'=>int, 'resolved'=>int]
111     */
112    public function getStats($id)
113    {
114        return $this->statsFor($this->getAnnotations($id));
115    }
116
117    /**
118     * Counts for the on-page indicator, computed from an already-loaded list.
119     * Split out from getStats() so callers that already hold the annotation
120     * array (e.g. the page-load JSINFO injector, which embeds the same list)
121     * don't re-read the file.
122     *
123     * @param array $annotations annotation list
124     * @return array ['total'=>int, 'open'=>int, 'resolved'=>int]
125     */
126    public function statsFor(array $annotations)
127    {
128        $open = 0;
129        $resolved = 0;
130        foreach ($annotations as $a) {
131            if (($a['status'] ?? 'open') === 'resolved') {
132                $resolved++;
133            } else {
134                $open++;
135            }
136        }
137        return ['total' => $open + $resolved, 'open' => $open, 'resolved' => $resolved];
138    }
139
140    /**
141     * Write a page's annotation list to disk.
142     *
143     * @param string $id   page id
144     * @param array  $list annotations
145     * @return bool
146     */
147    protected function writeFile($id, array $list)
148    {
149        $payload = [
150            'version'     => self::SCHEMA_VERSION,
151            'annotations' => array_values($list),
152        ];
153        return (bool) io_saveFile(
154            $this->getFile($id),
155            json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)
156        );
157    }
158
159    /**
160     * Run a modification against a page's annotations under a write lock.
161     *
162     * The modifier receives the annotation list by reference and returns an
163     * outcome value. Returning the boolean false aborts the write (used for
164     * "target not found"); any other value is returned to the caller after a
165     * successful save.
166     *
167     * @param string   $id       page id
168     * @param callable $modifier function(array &$annotations): mixed
169     * @return mixed  the modifier's outcome on success, or false on failure
170     */
171    protected function mutate($id, callable $modifier)
172    {
173        $file = $this->getFile($id);
174        io_lock($file);
175
176        $annotations = $this->getAnnotations($id);
177        $outcome = $modifier($annotations);
178
179        if ($outcome === false) {
180            io_unlock($file);
181            return false;
182        }
183
184        $ok = $this->writeFile($id, $annotations);
185        io_unlock($file);
186        return $ok ? $outcome : false;
187    }
188
189    // ---------------------------------------------------------------------
190    //  Annotation CRUD
191    // ---------------------------------------------------------------------
192
193    /**
194     * Create an annotation.
195     *
196     * @param string $id     page id
197     * @param array  $anchor raw anchor {exact, prefix, suffix, start}
198     * @param string $author username
199     * @param string $body   annotation text
200     * @return array|false  the created annotation, or false on invalid input
201     */
202    public function createAnnotation($id, $anchor, $author, $body)
203    {
204        if ($id === '' || $author === '' || $author === null) {
205            return false;
206        }
207        $body = $this->cleanBody($body);
208        if ($body === '') {
209            return false;
210        }
211        $anchor = $this->cleanAnchor($anchor);
212        if ($anchor === null) {
213            return false;
214        }
215
216        $now = time();
217        $new = [
218            'id'          => $this->newId(),
219            'anchor'      => $anchor,
220            'author'      => $author,
221            'created'     => $now,
222            'modified'    => $now,
223            'body'        => $body,
224            'status'      => 'open',
225            'resolved_by' => '',
226            'resolved_at' => 0,
227            'replies'     => [],
228        ];
229
230        return $this->mutate($id, function (array &$annotations) use ($new) {
231            $annotations[] = $new;
232            return $new;
233        });
234    }
235
236    /**
237     * Edit an annotation's body text.
238     *
239     * @param string $id    page id
240     * @param string $annId annotation id
241     * @param string $body  new text
242     * @return bool
243     */
244    public function updateAnnotationBody($id, $annId, $body)
245    {
246        $body = $this->cleanBody($body);
247        if ($body === '') {
248            return false;
249        }
250        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $body) {
251            foreach ($annotations as $i => $a) {
252                if (($a['id'] ?? '') === $annId) {
253                    $annotations[$i]['body']     = $body;
254                    $annotations[$i]['modified'] = time();
255                    return true;
256                }
257            }
258            return false;
259        });
260    }
261
262    /**
263     * Delete an annotation and all its replies.
264     *
265     * @param string $id    page id
266     * @param string $annId annotation id
267     * @return bool
268     */
269    public function deleteAnnotation($id, $annId)
270    {
271        return (bool) $this->mutate($id, function (array &$annotations) use ($annId) {
272            foreach ($annotations as $i => $a) {
273                if (($a['id'] ?? '') === $annId) {
274                    array_splice($annotations, $i, 1);
275                    return true;
276                }
277            }
278            return false;
279        });
280    }
281
282    /**
283     * Mark an annotation open or resolved.
284     *
285     * @param string $id     page id
286     * @param string $annId  annotation id
287     * @param string $status 'open' or 'resolved'
288     * @param string $actor  username making the change (recorded when resolving)
289     * @return bool
290     */
291    public function setStatus($id, $annId, $status, $actor)
292    {
293        if (!in_array($status, ['open', 'resolved'], true)) {
294            return false;
295        }
296        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $status, $actor) {
297            foreach ($annotations as $i => $a) {
298                if (($a['id'] ?? '') === $annId) {
299                    $annotations[$i]['status'] = $status;
300                    if ($status === 'resolved') {
301                        $annotations[$i]['resolved_by'] = $actor;
302                        $annotations[$i]['resolved_at'] = time();
303                    } else {
304                        $annotations[$i]['resolved_by'] = '';
305                        $annotations[$i]['resolved_at'] = 0;
306                    }
307                    return true;
308                }
309            }
310            return false;
311        });
312    }
313
314    // ---------------------------------------------------------------------
315    //  Reply CRUD
316    // ---------------------------------------------------------------------
317
318    /**
319     * Add a reply to an annotation.
320     *
321     * @param string $id       page id
322     * @param string $annId    annotation id
323     * @param string $author   username
324     * @param string $body     reply text
325     * @param string $parentId id of the reply being replied to, or '' for root-level
326     * @return array|false  the created reply, or false on invalid input
327     */
328    public function addReply($id, $annId, $author, $body, $parentId = '')
329    {
330        if ($author === '' || $author === null) {
331            return false;
332        }
333        $body = $this->cleanBody($body);
334        if ($body === '') {
335            return false;
336        }
337        $now = time();
338        $reply = [
339            'id'       => $this->newId(),
340            'parentId' => preg_replace('/[^a-f0-9]/', '', (string) $parentId),
341            'author'   => $author,
342            'created'  => $now,
343            'modified' => $now,
344            'body'     => $body,
345        ];
346
347        return $this->mutate($id, function (array &$annotations) use ($annId, $reply) {
348            foreach ($annotations as $i => $a) {
349                if (($a['id'] ?? '') === $annId) {
350                    $annotations[$i]['replies'][] = $reply;
351                    return $reply;
352                }
353            }
354            return false;
355        });
356    }
357
358    /**
359     * Edit a reply's body text.
360     *
361     * @param string $id      page id
362     * @param string $annId   annotation id
363     * @param string $replyId reply id
364     * @param string $body    new text
365     * @return bool
366     */
367    public function updateReply($id, $annId, $replyId, $body)
368    {
369        $body = $this->cleanBody($body);
370        if ($body === '') {
371            return false;
372        }
373        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId, $body) {
374            foreach ($annotations as $i => $a) {
375                if (($a['id'] ?? '') !== $annId) {
376                    continue;
377                }
378                foreach (($a['replies'] ?? []) as $j => $r) {
379                    if (($r['id'] ?? '') === $replyId) {
380                        $annotations[$i]['replies'][$j]['body']     = $body;
381                        $annotations[$i]['replies'][$j]['modified'] = time();
382                        return true;
383                    }
384                }
385            }
386            return false;
387        });
388    }
389
390    /**
391     * Delete a reply.
392     *
393     * @param string $id      page id
394     * @param string $annId   annotation id
395     * @param string $replyId reply id
396     * @return bool
397     */
398    public function deleteReply($id, $annId, $replyId)
399    {
400        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId) {
401            foreach ($annotations as $i => $a) {
402                if (($a['id'] ?? '') !== $annId) {
403                    continue;
404                }
405                foreach (($a['replies'] ?? []) as $j => $r) {
406                    if (($r['id'] ?? '') === $replyId) {
407                        array_splice($annotations[$i]['replies'], $j, 1);
408                        return true;
409                    }
410                }
411            }
412            return false;
413        });
414    }
415
416    // ---------------------------------------------------------------------
417    //  Bulk maintenance (admin, per page)
418    // ---------------------------------------------------------------------
419
420    /**
421     * Remove every resolved annotation from a page.
422     *
423     * @param string $id page id
424     * @return int|false number removed, or false on write failure
425     */
426    public function clearResolved($id)
427    {
428        if (empty($this->getAnnotations($id))) {
429            return 0;
430        }
431        return $this->mutate($id, function (array &$annotations) {
432            $before = count($annotations);
433            $annotations = array_values(array_filter($annotations, function ($a) {
434                return ($a['status'] ?? 'open') !== 'resolved';
435            }));
436            return $before - count($annotations);
437        });
438    }
439
440    /**
441     * Remove every orphaned annotation from a page — those whose quoted text
442     * no longer appears in the rendered page. The page is re-checked here, so
443     * this is authoritative regardless of what a client believed.
444     *
445     * @param string $id page id
446     * @return int|false number removed, or false on write failure
447     */
448    public function clearOrphaned($id)
449    {
450        $orphanIds = [];
451        foreach ($this->findOrphaned($id) as $a) {
452            $orphanIds[] = $a['id'];
453        }
454        if (empty($orphanIds)) {
455            return 0;
456        }
457        return $this->mutate($id, function (array &$annotations) use ($orphanIds) {
458            $before = count($annotations);
459            $annotations = array_values(array_filter($annotations, function ($a) use ($orphanIds) {
460                return !in_array($a['id'] ?? '', $orphanIds, true);
461            }));
462            return $before - count($annotations);
463        });
464    }
465
466    // ---------------------------------------------------------------------
467    //  Orphan detection
468    // ---------------------------------------------------------------------
469
470    /**
471     * Render a page to normalised plain text, for quote searching.
472     *
473     * Block-level closing tags become spaces so adjacent blocks do not fuse
474     * into one run of text; then tags are stripped, entities decoded, and
475     * whitespace collapsed — the same normalisation applied to stored quotes.
476     *
477     * @param string $id page id
478     * @return string
479     */
480    public function getPageText($id)
481    {
482        if (!page_exists($id)) {
483            return '';
484        }
485        $xhtml = p_wiki_xhtml($id, '', false);
486        if (!is_string($xhtml) || $xhtml === '') {
487            return '';
488        }
489        $xhtml = preg_replace('#</(p|div|li|h[1-6]|td|th|tr|blockquote|pre|dt|dd)>#i', ' ', $xhtml);
490        $xhtml = preg_replace('#<br\s*/?>#i', ' ', $xhtml);
491        $text  = strip_tags($xhtml);
492        $text  = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
493        return $this->normalizeWhitespace($text);
494    }
495
496    /**
497     * The annotations on a page whose quoted text is no longer present.
498     *
499     * @param string $id page id
500     * @return array list of orphaned annotation arrays
501     */
502    public function findOrphaned($id)
503    {
504        $annotations = $this->getAnnotations($id);
505        if (empty($annotations)) {
506            return [];
507        }
508        $pageText = $this->getPageText($id);
509
510        $orphaned = [];
511        foreach ($annotations as $a) {
512            $exact = $this->normalizeWhitespace($a['anchor']['exact'] ?? '');
513            if ($exact === '' || mb_strpos($pageText, $exact) === false) {
514                $orphaned[] = $a;
515            }
516        }
517        return $orphaned;
518    }
519
520    // ---------------------------------------------------------------------
521    //  Permission rules (single source of truth)
522    // ---------------------------------------------------------------------
523
524    /**
525     * May this user create an annotation, reply, or change a resolve status?
526     *
527     * Requires only read access to the page — annotations are out-of-band, so
528     * a user whose page edit access is blocked may still annotate.
529     *
530     * @param string $user     current username ('' for anonymous)
531     * @param int    $aclLevel the user's ACL level on the page
532     * @return bool
533     */
534    public function canAnnotate($user, $aclLevel)
535    {
536        return $user !== '' && $user !== null && $aclLevel >= AUTH_READ;
537    }
538
539    /**
540     * May this user edit or delete the given annotation? Author or admin.
541     *
542     * @param array  $annotation
543     * @param string $user
544     * @param bool   $isAdmin
545     * @return bool
546     */
547    public function canEditAnnotation(array $annotation, $user, $isAdmin)
548    {
549        if ($user === '' || $user === null) {
550            return false;
551        }
552        return $isAdmin || (($annotation['author'] ?? '') === $user);
553    }
554
555    /**
556     * May this user edit or delete the given reply? Author or admin.
557     *
558     * @param array  $reply
559     * @param string $user
560     * @param bool   $isAdmin
561     * @return bool
562     */
563    public function canEditReply(array $reply, $user, $isAdmin)
564    {
565        if ($user === '' || $user === null) {
566            return false;
567        }
568        return $isAdmin || (($reply['author'] ?? '') === $user);
569    }
570
571    /**
572     * May this user run the per-page "clear resolved/orphaned" operations?
573     * Admins only.
574     *
575     * @param bool $isAdmin
576     * @return bool
577     */
578    public function canClear($isAdmin)
579    {
580        return (bool) $isAdmin;
581    }
582
583    // ---------------------------------------------------------------------
584    //  Input cleaning
585    // ---------------------------------------------------------------------
586
587    /**
588     * Validate and normalise a raw anchor.
589     *
590     * @param mixed $anchor
591     * @return array|null  the cleaned anchor, or null if unusable
592     */
593    protected function cleanAnchor($anchor)
594    {
595        if (!is_array($anchor)) {
596            return null;
597        }
598
599        $exact = (isset($anchor['exact']) && is_string($anchor['exact']))
600            ? $this->normalizeWhitespace($anchor['exact'])
601            : '';
602        if ($exact === '') {
603            return null; // an anchor without quoted text is unusable
604        }
605        if (mb_strlen($exact) > self::MAX_QUOTE) {
606            $exact = mb_substr($exact, 0, self::MAX_QUOTE);
607        }
608
609        $prefix = (isset($anchor['prefix']) && is_string($anchor['prefix']))
610            ? $this->normalizeWhitespace($anchor['prefix'])
611            : '';
612        $suffix = (isset($anchor['suffix']) && is_string($anchor['suffix']))
613            ? $this->normalizeWhitespace($anchor['suffix'])
614            : '';
615        if (mb_strlen($prefix) > self::MAX_CONTEXT) {
616            $prefix = mb_substr($prefix, -self::MAX_CONTEXT);
617        }
618        if (mb_strlen($suffix) > self::MAX_CONTEXT) {
619            $suffix = mb_substr($suffix, 0, self::MAX_CONTEXT);
620        }
621
622        $start = isset($anchor['start']) ? max(0, (int) $anchor['start']) : 0;
623
624        return [
625            'exact'  => $exact,
626            'prefix' => $prefix,
627            'suffix' => $suffix,
628            'start'  => $start,
629        ];
630    }
631
632    /**
633     * Clean an annotation/reply body: a plain-text string, trimmed, with
634     * normalised line endings and a length cap. Newlines are kept; the text
635     * is escaped by the consumer at render time.
636     *
637     * @param mixed $body
638     * @return string
639     */
640    protected function cleanBody($body)
641    {
642        if (!is_string($body)) {
643            return '';
644        }
645        $body = str_replace("\r\n", "\n", $body);
646        $body = str_replace("\r", "\n", $body);
647        $body = trim($body);
648        if (mb_strlen($body) > self::MAX_BODY) {
649            $body = mb_substr($body, 0, self::MAX_BODY);
650        }
651        return $body;
652    }
653
654    /**
655     * Collapse every run of whitespace to a single space and trim.
656     *
657     * @param mixed $text
658     * @return string
659     */
660    protected function normalizeWhitespace($text)
661    {
662        return trim(preg_replace('/\s+/u', ' ', (string) $text));
663    }
664
665    /**
666     * A fresh identifier for an annotation or reply.
667     *
668     * @return string 16 hex characters
669     */
670    protected function newId()
671    {
672        return bin2hex(random_bytes(8));
673    }
674}
675