xref: /plugin/annotations/helper.php (revision 72d60f2d94b24cb66fabf596a2ec440f459ba88f)
1<?php
2
3/**
4 * Annotations plugin — storage and data-logic helper.
5 *
6 * This component owns:
7 *
8 *   1. The per-page annotation store. One JSON file per page, obtained via
9 *      metaFN($id, '.annotations'), holding {version, annotations:[...]}.
10 *      JSON and pretty-printed so the files are easy to inspect or back up.
11 *      The page text and the wiki changelog are never touched.
12 *
13 *   2. The text-quote anchor model. Each annotation stores an anchor of
14 *      {exact, prefix, suffix, start} — the quoted text, a short slice of the
15 *      surrounding context on each side (to disambiguate repeated quotes),
16 *      and a character-offset hint. This is the Hypothes.is approach.
17 *
18 *   3. CRUD on annotations and their threaded replies.
19 *
20 *   4. Server-side orphan detection: a page is rendered to plain text and an
21 *      annotation is "orphaned" when its quoted text no longer appears. Used
22 *      by the admin-only per-page "clear orphaned" operation. (The live UI
23 *      also detects orphans client-side for the on-page counter.)
24 *
25 *   5. The permission rules, as the single source of truth. They are pure
26 *      functions: the caller gathers the facts (current user, admin flag, the
27 *      page's ACL level) and passes them in. Because annotations live
28 *      out-of-band, creating one needs only AUTH_READ on the page, never
29 *      AUTH_EDIT — so a group whose page edit access is blocked can still
30 *      annotate.
31 */
32
33// must be run within DokuWiki
34if (!defined('DOKU_INC')) die();
35
36class helper_plugin_annotations extends DokuWiki_Plugin
37{
38    /** storage schema version, written into each file */
39    const SCHEMA_VERSION = 1;
40
41    /** longest quoted selection stored, in characters */
42    const MAX_QUOTE = 1000;
43
44    /** length of the prefix/suffix context slices, in characters (config fallback) */
45    const DEFAULT_CONTEXT = 64;
46
47    /** longest annotation/reply body, in characters (config fallback) */
48    const DEFAULT_BODY = 10000;
49
50    /**
51     * Configured length of each prefix/suffix context slice, in characters.
52     *
53     * @return int
54     */
55    protected function contextLength()
56    {
57        $v = (int) $this->getConf('context_length');
58        return $v >= 0 ? $v : self::DEFAULT_CONTEXT;
59    }
60
61    /**
62     * Configured maximum annotation/reply body length, in characters.
63     *
64     * @return int
65     */
66    protected function bodyCap()
67    {
68        $v = (int) $this->getConf('body_cap');
69        return $v > 0 ? $v : self::DEFAULT_BODY;
70    }
71
72    // ---------------------------------------------------------------------
73    //  Storage
74    // ---------------------------------------------------------------------
75
76    /**
77     * Path of a page's annotation file.
78     *
79     * @param string $id page id
80     * @return string
81     */
82    protected function getFile($id)
83    {
84        return metaFN($id, '.annotations');
85    }
86
87    /**
88     * All annotations stored for a page.
89     *
90     * @param string $id page id
91     * @return array list of annotation arrays (empty if none)
92     */
93    public function getAnnotations($id)
94    {
95        $file = $this->getFile($id);
96        if (!file_exists($file)) {
97            return [];
98        }
99        $raw = io_readFile($file, false);
100        if ($raw === '') {
101            return [];
102        }
103        $data = json_decode($raw, true);
104        if (!is_array($data) || !isset($data['annotations']) || !is_array($data['annotations'])) {
105            return [];
106        }
107        return $data['annotations'];
108    }
109
110    /**
111     * A single annotation by id.
112     *
113     * @param string $id    page id
114     * @param string $annId annotation id
115     * @return array|null
116     */
117    public function getAnnotation($id, $annId)
118    {
119        foreach ($this->getAnnotations($id) as $a) {
120            if (($a['id'] ?? '') === $annId) {
121                return $a;
122            }
123        }
124        return null;
125    }
126
127    /**
128     * Counts for the on-page indicator. The orphan count is deliberately not
129     * here — it depends on the rendered page and is computed client-side.
130     *
131     * @param string $id page id
132     * @return array ['total'=>int, 'open'=>int, 'resolved'=>int]
133     */
134    public function getStats($id)
135    {
136        return $this->statsFor($this->getAnnotations($id));
137    }
138
139    /**
140     * Counts for the on-page indicator, computed from an already-loaded list.
141     * Split out from getStats() so callers that already hold the annotation
142     * array (e.g. the page-load JSINFO injector, which embeds the same list)
143     * don't re-read the file.
144     *
145     * @param array $annotations annotation list
146     * @return array ['total'=>int, 'open'=>int, 'resolved'=>int]
147     */
148    public function statsFor(array $annotations)
149    {
150        $open = 0;
151        $resolved = 0;
152        foreach ($annotations as $a) {
153            if (($a['status'] ?? 'open') === 'resolved') {
154                $resolved++;
155            } else {
156                $open++;
157            }
158        }
159        return ['total' => $open + $resolved, 'open' => $open, 'resolved' => $resolved];
160    }
161
162    /**
163     * Write a page's annotation list to disk.
164     *
165     * @param string $id   page id
166     * @param array  $list annotations
167     * @return bool
168     */
169    protected function writeFile($id, array $list)
170    {
171        $payload = [
172            'version'     => self::SCHEMA_VERSION,
173            'annotations' => array_values($list),
174        ];
175        return (bool) io_saveFile(
176            $this->getFile($id),
177            json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)
178        );
179    }
180
181    /**
182     * Run a modification against a page's annotations under a write lock.
183     *
184     * The modifier receives the annotation list by reference and returns an
185     * outcome value. Returning the boolean false aborts the write (used for
186     * "target not found"); any other value is returned to the caller after a
187     * successful save.
188     *
189     * @param string   $id       page id
190     * @param callable $modifier function(array &$annotations): mixed
191     * @return mixed  the modifier's outcome on success, or false on failure
192     */
193    protected function mutate($id, callable $modifier)
194    {
195        $file = $this->getFile($id);
196        // Lock on a sentinel key, NOT $file itself: writeFile() below calls
197        // io_saveFile($file), which takes its own io_lock($file) internally.
198        // Locking $file here would collide with that inner lock — io_lock
199        // busy-waits ~3s for the stale-lock timeout on every write and then
200        // proceeds, defeating mutual exclusion (see DokuWiki TaskRunner). A
201        // distinct key serialises the read-modify-write across requests while
202        // leaving io_saveFile's lock uncontended.
203        $lock = $file . '.lock';
204        io_lock($lock);
205
206        $annotations = $this->getAnnotations($id);
207        $outcome = $modifier($annotations);
208
209        if ($outcome === false) {
210            io_unlock($lock);
211            return false;
212        }
213
214        $ok = $this->writeFile($id, $annotations);
215        io_unlock($lock);
216        return $ok ? $outcome : false;
217    }
218
219    // ---------------------------------------------------------------------
220    //  Annotation CRUD
221    // ---------------------------------------------------------------------
222
223    /**
224     * Create an annotation.
225     *
226     * @param string $id     page id
227     * @param array  $anchor raw anchor {exact, prefix, suffix, start}
228     * @param string $author username
229     * @param string $body   annotation text
230     * @return array|false  the created annotation, or false on invalid input
231     */
232    public function createAnnotation($id, $anchor, $author, $body)
233    {
234        if ($id === '' || $author === '' || $author === null) {
235            return false;
236        }
237        $body = $this->cleanBody($body);
238        if ($body === '') {
239            return false;
240        }
241        $anchor = $this->cleanAnchor($anchor);
242        if ($anchor === null) {
243            return false;
244        }
245
246        $now = time();
247        $new = [
248            'id'          => $this->newId(),
249            'anchor'      => $anchor,
250            'author'      => $author,
251            'created'     => $now,
252            'modified'    => $now,
253            'body'        => $body,
254            'status'      => 'open',
255            'resolved_by' => '',
256            'resolved_at' => 0,
257            'replies'     => [],
258        ];
259
260        return $this->mutate($id, function (array &$annotations) use ($new) {
261            $annotations[] = $new;
262            return $new;
263        });
264    }
265
266    /**
267     * Edit an annotation's body text.
268     *
269     * @param string $id    page id
270     * @param string $annId annotation id
271     * @param string $body  new text
272     * @return bool
273     */
274    public function updateAnnotationBody($id, $annId, $body)
275    {
276        $body = $this->cleanBody($body);
277        if ($body === '') {
278            return false;
279        }
280        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $body) {
281            foreach ($annotations as $i => $a) {
282                if (($a['id'] ?? '') === $annId) {
283                    $annotations[$i]['body']     = $body;
284                    $annotations[$i]['modified'] = time();
285                    return true;
286                }
287            }
288            return false;
289        });
290    }
291
292    /**
293     * Delete an annotation and all its replies.
294     *
295     * @param string $id    page id
296     * @param string $annId annotation id
297     * @return bool
298     */
299    public function deleteAnnotation($id, $annId)
300    {
301        return (bool) $this->mutate($id, function (array &$annotations) use ($annId) {
302            foreach ($annotations as $i => $a) {
303                if (($a['id'] ?? '') === $annId) {
304                    array_splice($annotations, $i, 1);
305                    return true;
306                }
307            }
308            return false;
309        });
310    }
311
312    /**
313     * Mark an annotation open or resolved.
314     *
315     * @param string $id     page id
316     * @param string $annId  annotation id
317     * @param string $status 'open' or 'resolved'
318     * @param string $actor  username making the change (recorded when resolving)
319     * @return bool
320     */
321    public function setStatus($id, $annId, $status, $actor)
322    {
323        if (!in_array($status, ['open', 'resolved'], true)) {
324            return false;
325        }
326        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $status, $actor) {
327            foreach ($annotations as $i => $a) {
328                if (($a['id'] ?? '') === $annId) {
329                    $annotations[$i]['status'] = $status;
330                    if ($status === 'resolved') {
331                        $annotations[$i]['resolved_by'] = $actor;
332                        $annotations[$i]['resolved_at'] = time();
333                    } else {
334                        $annotations[$i]['resolved_by'] = '';
335                        $annotations[$i]['resolved_at'] = 0;
336                    }
337                    return true;
338                }
339            }
340            return false;
341        });
342    }
343
344    // ---------------------------------------------------------------------
345    //  Reply CRUD
346    // ---------------------------------------------------------------------
347
348    /**
349     * Add a reply to an annotation.
350     *
351     * @param string $id       page id
352     * @param string $annId    annotation id
353     * @param string $author   username
354     * @param string $body     reply text
355     * @param string $parentId id of the reply being replied to, or '' for root-level
356     * @return array|false  the created reply, or false on invalid input
357     */
358    public function addReply($id, $annId, $author, $body, $parentId = '')
359    {
360        if ($author === '' || $author === null) {
361            return false;
362        }
363        $body = $this->cleanBody($body);
364        if ($body === '') {
365            return false;
366        }
367        $now = time();
368        $reply = [
369            'id'       => $this->newId(),
370            'parentId' => preg_replace('/[^a-f0-9]/', '', (string) $parentId),
371            'author'   => $author,
372            'created'  => $now,
373            'modified' => $now,
374            'body'     => $body,
375        ];
376
377        return $this->mutate($id, function (array &$annotations) use ($annId, $reply) {
378            foreach ($annotations as $i => $a) {
379                if (($a['id'] ?? '') === $annId) {
380                    $annotations[$i]['replies'][] = $reply;
381                    return $reply;
382                }
383            }
384            return false;
385        });
386    }
387
388    /**
389     * Edit a reply's body text.
390     *
391     * @param string $id      page id
392     * @param string $annId   annotation id
393     * @param string $replyId reply id
394     * @param string $body    new text
395     * @return bool
396     */
397    public function updateReply($id, $annId, $replyId, $body)
398    {
399        $body = $this->cleanBody($body);
400        if ($body === '') {
401            return false;
402        }
403        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId, $body) {
404            foreach ($annotations as $i => $a) {
405                if (($a['id'] ?? '') !== $annId) {
406                    continue;
407                }
408                foreach (($a['replies'] ?? []) as $j => $r) {
409                    if (($r['id'] ?? '') === $replyId) {
410                        $annotations[$i]['replies'][$j]['body']     = $body;
411                        $annotations[$i]['replies'][$j]['modified'] = time();
412                        return true;
413                    }
414                }
415            }
416            return false;
417        });
418    }
419
420    /**
421     * Delete a reply.
422     *
423     * @param string $id      page id
424     * @param string $annId   annotation id
425     * @param string $replyId reply id
426     * @return bool
427     */
428    public function deleteReply($id, $annId, $replyId)
429    {
430        return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId) {
431            foreach ($annotations as $i => $a) {
432                if (($a['id'] ?? '') !== $annId) {
433                    continue;
434                }
435                foreach (($a['replies'] ?? []) as $j => $r) {
436                    if (($r['id'] ?? '') === $replyId) {
437                        array_splice($annotations[$i]['replies'], $j, 1);
438                        return true;
439                    }
440                }
441            }
442            return false;
443        });
444    }
445
446    // ---------------------------------------------------------------------
447    //  Bulk maintenance (admin, per page)
448    // ---------------------------------------------------------------------
449
450    /**
451     * Remove every resolved annotation from a page.
452     *
453     * @param string $id page id
454     * @return int|false number removed, or false on write failure
455     */
456    public function clearResolved($id)
457    {
458        if (empty($this->getAnnotations($id))) {
459            return 0;
460        }
461        return $this->mutate($id, function (array &$annotations) {
462            $before = count($annotations);
463            $annotations = array_values(array_filter($annotations, function ($a) {
464                return ($a['status'] ?? 'open') !== 'resolved';
465            }));
466            return $before - count($annotations);
467        });
468    }
469
470    /**
471     * Remove every resolved annotation from every annotated page.
472     *
473     * The wiki-wide companion to clearResolved(), mirroring clearOrphanedAll():
474     * iterates the pages found by getAnnotatedPages() and applies the per-page
475     * clearResolved() to each. A page whose write fails is skipped rather than
476     * aborting the whole sweep.
477     *
478     * @return int total number of annotations removed across all pages
479     */
480    public function clearResolvedAll()
481    {
482        $removed = 0;
483        foreach ($this->getAnnotatedPages() as $id) {
484            $n = $this->clearResolved($id);
485            if ($n !== false) {
486                $removed += (int) $n;
487            }
488        }
489        return $removed;
490    }
491
492    /**
493     * Remove every orphaned annotation from a page — those whose quoted text
494     * no longer appears in the rendered page. The page is re-checked here, so
495     * this is authoritative regardless of what a client believed.
496     *
497     * @param string $id page id
498     * @return int|false number removed, or false on write failure
499     */
500    public function clearOrphaned($id)
501    {
502        $orphanIds = [];
503        foreach ($this->findOrphaned($id) as $a) {
504            $orphanIds[] = $a['id'];
505        }
506        if (empty($orphanIds)) {
507            return 0;
508        }
509        return $this->mutate($id, function (array &$annotations) use ($orphanIds) {
510            $before = count($annotations);
511            $annotations = array_values(array_filter($annotations, function ($a) use ($orphanIds) {
512                return !in_array($a['id'] ?? '', $orphanIds, true);
513            }));
514            return $before - count($annotations);
515        });
516    }
517
518    /**
519     * Remove every orphaned annotation from every annotated page.
520     *
521     * Iterates the pages found by getAnnotatedPages() and applies the per-page
522     * clearOrphaned() to each, so the same authoritative re-check runs for
523     * every page. A page whose write fails is skipped rather than aborting the
524     * whole sweep.
525     *
526     * @return int total number of annotations removed across all pages
527     */
528    public function clearOrphanedAll()
529    {
530        $removed = 0;
531        foreach ($this->getAnnotatedPages() as $id) {
532            $n = $this->clearOrphaned($id);
533            if ($n !== false) {
534                $removed += (int) $n;
535            }
536        }
537        return $removed;
538    }
539
540    // ---------------------------------------------------------------------
541    //  Orphan detection
542    // ---------------------------------------------------------------------
543
544    /**
545     * Render a page to normalised plain text, for quote searching.
546     *
547     * Block-level closing tags become spaces so adjacent blocks do not fuse
548     * into one run of text; then tags are stripped, entities decoded, and
549     * whitespace collapsed — the same normalisation applied to stored quotes.
550     *
551     * @param string $id page id
552     * @return string
553     */
554    public function getPageText($id)
555    {
556        if (!page_exists($id)) {
557            return '';
558        }
559        $xhtml = p_wiki_xhtml($id, '', false);
560        if (!is_string($xhtml) || $xhtml === '') {
561            return '';
562        }
563        $xhtml = preg_replace('#</(p|div|li|h[1-6]|td|th|tr|blockquote|pre|dt|dd)>#i', ' ', $xhtml);
564        $xhtml = preg_replace('#<br\s*/?>#i', ' ', $xhtml);
565        $text  = strip_tags($xhtml);
566        $text  = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
567        return $this->normalizeWhitespace($text);
568    }
569
570    /**
571     * The annotations on a page whose quoted text is no longer present.
572     *
573     * @param string $id page id
574     * @return array list of orphaned annotation arrays
575     */
576    public function findOrphaned($id)
577    {
578        $annotations = $this->getAnnotations($id);
579        if (empty($annotations)) {
580            return [];
581        }
582        $pageText = $this->getPageText($id);
583
584        $orphaned = [];
585        foreach ($annotations as $a) {
586            if ($this->quoteMissing($a, $pageText)) {
587                $orphaned[] = $a;
588            }
589        }
590        return $orphaned;
591    }
592
593    /**
594     * Whether an annotation's quoted text is absent from the page text.
595     *
596     * The single orphan rule, shared by findOrphaned() and pageCounts(): an
597     * annotation is orphaned when its (normalised) quoted text no longer
598     * appears in the (normalised) rendered page text.
599     *
600     * @param array  $annotation annotation array
601     * @param string $pageText   normalised plain-text page body (see getPageText)
602     * @return bool
603     */
604    protected function quoteMissing(array $annotation, $pageText)
605    {
606        $exact = $this->normalizeWhitespace($annotation['anchor']['exact'] ?? '');
607        return $exact === '' || mb_strpos($pageText, $exact) === false;
608    }
609
610    // ---------------------------------------------------------------------
611    //  Admin overview (enumeration & counts)
612    // ---------------------------------------------------------------------
613
614    /**
615     * Every page that currently has at least one stored annotation.
616     *
617     * Scans the meta directory for ".annotations" files and maps each back to a
618     * page id. Files left behind with an empty annotation list (every
619     * annotation since deleted) are skipped, so the result matches what the
620     * admin overview shows.
621     *
622     * @return string[] page ids, in the natural order search() yields
623     */
624    public function getAnnotatedPages()
625    {
626        global $conf;
627        $dir = $conf['metadir'];
628        if (!is_dir($dir)) {
629            return [];
630        }
631        $found = [];
632        search($found, $dir, [$this, 'searchAnnotations'], []);
633
634        $ids = [];
635        foreach ($found as $item) {
636            if (!empty($this->getAnnotations($item['id']))) {
637                $ids[] = $item['id'];
638            }
639        }
640        return $ids;
641    }
642
643    /**
644     * search() callback collecting ".annotations" files as page ids.
645     *
646     * Directories are always traversed; files are matched on the extension and
647     * the derived id is validated with cleanID() so anything that is not a real
648     * page id is skipped.
649     *
650     * @param array  $data &$ result accumulator (each entry ['id' => string])
651     * @param string $base search root (the meta directory)
652     * @param string $file current path relative to $base (leading slash)
653     * @param string $type 'd' for directory, 'f' for file
654     * @param int    $lvl  recursion depth
655     * @param array  $opts options passed to search() (unused)
656     * @return bool whether search() should recurse into a directory
657     */
658    public function searchAnnotations(&$data, $base, $file, $type, $lvl, $opts)
659    {
660        if ($type === 'd') {
661            return true; // recurse into namespaces
662        }
663        if (!str_ends_with($file, '.annotations')) {
664            return false;
665        }
666        $id = pathID(substr($file, 0, -strlen('.annotations')));
667        if ($id === '' || $id !== cleanID($id)) {
668            return false;
669        }
670        $data[] = ['id' => $id];
671        return false;
672    }
673
674    /**
675     * Annotation counts for one page, for the admin overview.
676     *
677     * The page is rendered once (getPageText) and every annotation tested with
678     * the shared quoteMissing() rule, so "normal" here means exactly "not
679     * orphaned" — the same definition the per-page clear-orphaned uses.
680     *
681     * "resolved" counts every annotation whose status is resolved, regardless of
682     * whether it is also orphaned, so it matches exactly what clearResolved()
683     * removes. The facets therefore overlap: a resolved-and-present annotation is
684     * counted in both "normal" and "resolved"; a resolved-and-orphaned one in
685     * both "orphaned" and "resolved".
686     *
687     * @param string $id page id
688     * @return array ['total'=>int, 'normal'=>int, 'resolved'=>int, 'orphaned'=>int]
689     */
690    public function pageCounts($id)
691    {
692        $annotations = $this->getAnnotations($id);
693        $total = count($annotations);
694        if ($total === 0) {
695            return ['total' => 0, 'normal' => 0, 'resolved' => 0, 'orphaned' => 0];
696        }
697        $pageText = $this->getPageText($id);
698        $orphaned = 0;
699        $resolved = 0;
700        foreach ($annotations as $a) {
701            if ($this->quoteMissing($a, $pageText)) {
702                $orphaned++;
703            }
704            if (($a['status'] ?? 'open') === 'resolved') {
705                $resolved++;
706            }
707        }
708        return [
709            'total'    => $total,
710            'normal'   => $total - $orphaned,
711            'resolved' => $resolved,
712            'orphaned' => $orphaned,
713        ];
714    }
715
716    // ---------------------------------------------------------------------
717    //  Permission rules (single source of truth)
718    // ---------------------------------------------------------------------
719
720    /**
721     * May this user create an annotation, reply, or change a resolve status?
722     *
723     * Requires only read access to the page — annotations are out-of-band, so
724     * a user whose page edit access is blocked may still annotate.
725     *
726     * @param string $user     current username ('' for anonymous)
727     * @param int    $aclLevel the user's ACL level on the page
728     * @return bool
729     */
730    public function canAnnotate($user, $aclLevel)
731    {
732        return $user !== '' && $user !== null && $aclLevel >= AUTH_READ;
733    }
734
735    /**
736     * May this user edit or delete the given annotation? Author or admin.
737     *
738     * @param array  $annotation
739     * @param string $user
740     * @param bool   $isAdmin
741     * @return bool
742     */
743    public function canEditAnnotation(array $annotation, $user, $isAdmin)
744    {
745        if ($user === '' || $user === null) {
746            return false;
747        }
748        return $isAdmin || (($annotation['author'] ?? '') === $user);
749    }
750
751    /**
752     * May this user edit or delete the given reply? Author or admin.
753     *
754     * @param array  $reply
755     * @param string $user
756     * @param bool   $isAdmin
757     * @return bool
758     */
759    public function canEditReply(array $reply, $user, $isAdmin)
760    {
761        if ($user === '' || $user === null) {
762            return false;
763        }
764        return $isAdmin || (($reply['author'] ?? '') === $user);
765    }
766
767    /**
768     * May this user run the per-page "clear resolved/orphaned" operations?
769     * Admins only.
770     *
771     * @param bool $isAdmin
772     * @return bool
773     */
774    public function canClear($isAdmin)
775    {
776        return (bool) $isAdmin;
777    }
778
779    // ---------------------------------------------------------------------
780    //  Input cleaning
781    // ---------------------------------------------------------------------
782
783    /**
784     * Validate and normalise a raw anchor.
785     *
786     * @param mixed $anchor
787     * @return array|null  the cleaned anchor, or null if unusable
788     */
789    protected function cleanAnchor($anchor)
790    {
791        if (!is_array($anchor)) {
792            return null;
793        }
794
795        $exact = (isset($anchor['exact']) && is_string($anchor['exact']))
796            ? $this->normalizeWhitespace($anchor['exact'])
797            : '';
798        if ($exact === '') {
799            return null; // an anchor without quoted text is unusable
800        }
801        if (mb_strlen($exact) > self::MAX_QUOTE) {
802            $exact = mb_substr($exact, 0, self::MAX_QUOTE);
803        }
804
805        $prefix = (isset($anchor['prefix']) && is_string($anchor['prefix']))
806            ? $this->normalizeWhitespace($anchor['prefix'])
807            : '';
808        $suffix = (isset($anchor['suffix']) && is_string($anchor['suffix']))
809            ? $this->normalizeWhitespace($anchor['suffix'])
810            : '';
811        $ctx = $this->contextLength();
812        if (mb_strlen($prefix) > $ctx) {
813            $prefix = mb_substr($prefix, -$ctx);
814        }
815        if (mb_strlen($suffix) > $ctx) {
816            $suffix = mb_substr($suffix, 0, $ctx);
817        }
818
819        $start = isset($anchor['start']) ? max(0, (int) $anchor['start']) : 0;
820
821        return [
822            'exact'  => $exact,
823            'prefix' => $prefix,
824            'suffix' => $suffix,
825            'start'  => $start,
826        ];
827    }
828
829    /**
830     * Clean an annotation/reply body: a plain-text string, trimmed, with
831     * normalised line endings and a length cap. Newlines are kept; the text
832     * is escaped by the consumer at render time.
833     *
834     * @param mixed $body
835     * @return string
836     */
837    protected function cleanBody($body)
838    {
839        if (!is_string($body)) {
840            return '';
841        }
842        $body = str_replace("\r\n", "\n", $body);
843        $body = str_replace("\r", "\n", $body);
844        $body = trim($body);
845        $cap = $this->bodyCap();
846        if (mb_strlen($body) > $cap) {
847            $body = mb_substr($body, 0, $cap);
848        }
849        return $body;
850    }
851
852    /**
853     * Collapse every run of whitespace to a single space and trim.
854     *
855     * @param mixed $text
856     * @return string
857     */
858    protected function normalizeWhitespace($text)
859    {
860        return trim(preg_replace('/\s+/u', ' ', (string) $text));
861    }
862
863    /**
864     * A fresh identifier for an annotation or reply.
865     *
866     * @return string 16 hex characters
867     */
868    protected function newId()
869    {
870        return bin2hex(random_bytes(8));
871    }
872}
873