1<?php 2 3/** 4 * Annotations plugin — storage and data-logic helper. 5 * 6 * This component owns: 7 * 8 * 1. The per-page annotation store. One JSON file per page, obtained via 9 * metaFN($id, '.annotations'), holding {version, annotations:[...]}. 10 * JSON and pretty-printed so the files are easy to inspect or back up. 11 * The page text and the wiki changelog are never touched. 12 * 13 * 2. The text-quote anchor model. Each annotation stores an anchor of 14 * {exact, prefix, suffix, start} — the quoted text, a short slice of the 15 * surrounding context on each side (to disambiguate repeated quotes), 16 * and a character-offset hint. This is the Hypothes.is approach. 17 * 18 * 3. CRUD on annotations and their threaded replies. 19 * 20 * 4. Server-side orphan detection: a page is rendered to plain text and an 21 * annotation is "orphaned" when its quoted text no longer appears. Used 22 * by the admin-only per-page "clear orphaned" operation. (The live UI 23 * also detects orphans client-side for the on-page counter.) 24 * 25 * 5. The permission rules, as the single source of truth. They are pure 26 * functions: the caller gathers the facts (current user, admin flag, the 27 * page's ACL level) and passes them in. Because annotations live 28 * out-of-band, creating one needs only AUTH_READ on the page, never 29 * AUTH_EDIT — so a group whose page edit access is blocked can still 30 * annotate. 31 */ 32 33// must be run within DokuWiki 34if (!defined('DOKU_INC')) die(); 35 36class helper_plugin_annotations extends DokuWiki_Plugin 37{ 38 /** storage schema version, written into each file */ 39 const SCHEMA_VERSION = 1; 40 41 /** longest quoted selection stored, in characters */ 42 const MAX_QUOTE = 1000; 43 44 /** length of the prefix/suffix context slices, in characters (config fallback) */ 45 const DEFAULT_CONTEXT = 64; 46 47 /** longest annotation/reply body, in characters (config fallback) */ 48 const DEFAULT_BODY = 10000; 49 50 /** 51 * Configured length of each prefix/suffix context slice, in characters. 52 * 53 * @return int 54 */ 55 protected function contextLength() 56 { 57 $v = (int) $this->getConf('context_length'); 58 return $v >= 0 ? $v : self::DEFAULT_CONTEXT; 59 } 60 61 /** 62 * Configured maximum annotation/reply body length, in characters. 63 * 64 * @return int 65 */ 66 protected function bodyCap() 67 { 68 $v = (int) $this->getConf('body_cap'); 69 return $v > 0 ? $v : self::DEFAULT_BODY; 70 } 71 72 // --------------------------------------------------------------------- 73 // Storage 74 // --------------------------------------------------------------------- 75 76 /** 77 * Path of a page's annotation file. 78 * 79 * @param string $id page id 80 * @return string 81 */ 82 protected function getFile($id) 83 { 84 return metaFN($id, '.annotations'); 85 } 86 87 /** 88 * All annotations stored for a page. 89 * 90 * @param string $id page id 91 * @return array list of annotation arrays (empty if none) 92 */ 93 public function getAnnotations($id) 94 { 95 $file = $this->getFile($id); 96 if (!file_exists($file)) { 97 return []; 98 } 99 $raw = io_readFile($file, false); 100 if ($raw === '') { 101 return []; 102 } 103 $data = json_decode($raw, true); 104 if (!is_array($data) || !isset($data['annotations']) || !is_array($data['annotations'])) { 105 return []; 106 } 107 return $data['annotations']; 108 } 109 110 /** 111 * A single annotation by id. 112 * 113 * @param string $id page id 114 * @param string $annId annotation id 115 * @return array|null 116 */ 117 public function getAnnotation($id, $annId) 118 { 119 foreach ($this->getAnnotations($id) as $a) { 120 if (($a['id'] ?? '') === $annId) { 121 return $a; 122 } 123 } 124 return null; 125 } 126 127 /** 128 * Counts for the on-page indicator. The orphan count is deliberately not 129 * here — it depends on the rendered page and is computed client-side. 130 * 131 * @param string $id page id 132 * @return array ['total'=>int, 'open'=>int, 'resolved'=>int] 133 */ 134 public function getStats($id) 135 { 136 return $this->statsFor($this->getAnnotations($id)); 137 } 138 139 /** 140 * Counts for the on-page indicator, computed from an already-loaded list. 141 * Split out from getStats() so callers that already hold the annotation 142 * array (e.g. the page-load JSINFO injector, which embeds the same list) 143 * don't re-read the file. 144 * 145 * @param array $annotations annotation list 146 * @return array ['total'=>int, 'open'=>int, 'resolved'=>int] 147 */ 148 public function statsFor(array $annotations) 149 { 150 $open = 0; 151 $resolved = 0; 152 foreach ($annotations as $a) { 153 if (($a['status'] ?? 'open') === 'resolved') { 154 $resolved++; 155 } else { 156 $open++; 157 } 158 } 159 return ['total' => $open + $resolved, 'open' => $open, 'resolved' => $resolved]; 160 } 161 162 /** 163 * Write a page's annotation list to disk. 164 * 165 * @param string $id page id 166 * @param array $list annotations 167 * @return bool 168 */ 169 protected function writeFile($id, array $list) 170 { 171 $payload = [ 172 'version' => self::SCHEMA_VERSION, 173 'annotations' => array_values($list), 174 ]; 175 return (bool) io_saveFile( 176 $this->getFile($id), 177 json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) 178 ); 179 } 180 181 /** 182 * Run a modification against a page's annotations under a write lock. 183 * 184 * The modifier receives the annotation list by reference and returns an 185 * outcome value. Returning the boolean false aborts the write (used for 186 * "target not found"); any other value is returned to the caller after a 187 * successful save. 188 * 189 * @param string $id page id 190 * @param callable $modifier function(array &$annotations): mixed 191 * @return mixed the modifier's outcome on success, or false on failure 192 */ 193 protected function mutate($id, callable $modifier) 194 { 195 $file = $this->getFile($id); 196 // Lock on a sentinel key, NOT $file itself: writeFile() below calls 197 // io_saveFile($file), which takes its own io_lock($file) internally. 198 // Locking $file here would collide with that inner lock — io_lock 199 // busy-waits ~3s for the stale-lock timeout on every write and then 200 // proceeds, defeating mutual exclusion (see DokuWiki TaskRunner). A 201 // distinct key serialises the read-modify-write across requests while 202 // leaving io_saveFile's lock uncontended. 203 $lock = $file . '.lock'; 204 io_lock($lock); 205 206 $annotations = $this->getAnnotations($id); 207 $outcome = $modifier($annotations); 208 209 if ($outcome === false) { 210 io_unlock($lock); 211 return false; 212 } 213 214 $ok = $this->writeFile($id, $annotations); 215 io_unlock($lock); 216 return $ok ? $outcome : false; 217 } 218 219 // --------------------------------------------------------------------- 220 // Annotation CRUD 221 // --------------------------------------------------------------------- 222 223 /** 224 * Create an annotation. 225 * 226 * @param string $id page id 227 * @param array $anchor raw anchor {exact, prefix, suffix, start} 228 * @param string $author username 229 * @param string $body annotation text 230 * @return array|false the created annotation, or false on invalid input 231 */ 232 public function createAnnotation($id, $anchor, $author, $body) 233 { 234 if ($id === '' || $author === '' || $author === null) { 235 return false; 236 } 237 $body = $this->cleanBody($body); 238 if ($body === '') { 239 return false; 240 } 241 $anchor = $this->cleanAnchor($anchor); 242 if ($anchor === null) { 243 return false; 244 } 245 246 $now = time(); 247 $new = [ 248 'id' => $this->newId(), 249 'anchor' => $anchor, 250 'author' => $author, 251 'created' => $now, 252 'modified' => $now, 253 'body' => $body, 254 'status' => 'open', 255 'resolved_by' => '', 256 'resolved_at' => 0, 257 'replies' => [], 258 ]; 259 260 return $this->mutate($id, function (array &$annotations) use ($new) { 261 $annotations[] = $new; 262 return $new; 263 }); 264 } 265 266 /** 267 * Edit an annotation's body text. 268 * 269 * @param string $id page id 270 * @param string $annId annotation id 271 * @param string $body new text 272 * @return bool 273 */ 274 public function updateAnnotationBody($id, $annId, $body) 275 { 276 $body = $this->cleanBody($body); 277 if ($body === '') { 278 return false; 279 } 280 return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $body) { 281 foreach ($annotations as $i => $a) { 282 if (($a['id'] ?? '') === $annId) { 283 $annotations[$i]['body'] = $body; 284 $annotations[$i]['modified'] = time(); 285 return true; 286 } 287 } 288 return false; 289 }); 290 } 291 292 /** 293 * Delete an annotation and all its replies. 294 * 295 * @param string $id page id 296 * @param string $annId annotation id 297 * @return bool 298 */ 299 public function deleteAnnotation($id, $annId) 300 { 301 return (bool) $this->mutate($id, function (array &$annotations) use ($annId) { 302 foreach ($annotations as $i => $a) { 303 if (($a['id'] ?? '') === $annId) { 304 array_splice($annotations, $i, 1); 305 return true; 306 } 307 } 308 return false; 309 }); 310 } 311 312 /** 313 * Mark an annotation open or resolved. 314 * 315 * @param string $id page id 316 * @param string $annId annotation id 317 * @param string $status 'open' or 'resolved' 318 * @param string $actor username making the change (recorded when resolving) 319 * @return bool 320 */ 321 public function setStatus($id, $annId, $status, $actor) 322 { 323 if (!in_array($status, ['open', 'resolved'], true)) { 324 return false; 325 } 326 return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $status, $actor) { 327 foreach ($annotations as $i => $a) { 328 if (($a['id'] ?? '') === $annId) { 329 $annotations[$i]['status'] = $status; 330 if ($status === 'resolved') { 331 $annotations[$i]['resolved_by'] = $actor; 332 $annotations[$i]['resolved_at'] = time(); 333 } else { 334 $annotations[$i]['resolved_by'] = ''; 335 $annotations[$i]['resolved_at'] = 0; 336 } 337 return true; 338 } 339 } 340 return false; 341 }); 342 } 343 344 // --------------------------------------------------------------------- 345 // Reply CRUD 346 // --------------------------------------------------------------------- 347 348 /** 349 * Add a reply to an annotation. 350 * 351 * @param string $id page id 352 * @param string $annId annotation id 353 * @param string $author username 354 * @param string $body reply text 355 * @param string $parentId id of the reply being replied to, or '' for root-level 356 * @return array|false the created reply, or false on invalid input 357 */ 358 public function addReply($id, $annId, $author, $body, $parentId = '') 359 { 360 if ($author === '' || $author === null) { 361 return false; 362 } 363 $body = $this->cleanBody($body); 364 if ($body === '') { 365 return false; 366 } 367 $now = time(); 368 $reply = [ 369 'id' => $this->newId(), 370 'parentId' => preg_replace('/[^a-f0-9]/', '', (string) $parentId), 371 'author' => $author, 372 'created' => $now, 373 'modified' => $now, 374 'body' => $body, 375 ]; 376 377 return $this->mutate($id, function (array &$annotations) use ($annId, $reply) { 378 foreach ($annotations as $i => $a) { 379 if (($a['id'] ?? '') === $annId) { 380 $annotations[$i]['replies'][] = $reply; 381 return $reply; 382 } 383 } 384 return false; 385 }); 386 } 387 388 /** 389 * Edit a reply's body text. 390 * 391 * @param string $id page id 392 * @param string $annId annotation id 393 * @param string $replyId reply id 394 * @param string $body new text 395 * @return bool 396 */ 397 public function updateReply($id, $annId, $replyId, $body) 398 { 399 $body = $this->cleanBody($body); 400 if ($body === '') { 401 return false; 402 } 403 return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId, $body) { 404 foreach ($annotations as $i => $a) { 405 if (($a['id'] ?? '') !== $annId) { 406 continue; 407 } 408 foreach (($a['replies'] ?? []) as $j => $r) { 409 if (($r['id'] ?? '') === $replyId) { 410 $annotations[$i]['replies'][$j]['body'] = $body; 411 $annotations[$i]['replies'][$j]['modified'] = time(); 412 return true; 413 } 414 } 415 } 416 return false; 417 }); 418 } 419 420 /** 421 * Delete a reply. 422 * 423 * @param string $id page id 424 * @param string $annId annotation id 425 * @param string $replyId reply id 426 * @return bool 427 */ 428 public function deleteReply($id, $annId, $replyId) 429 { 430 return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId) { 431 foreach ($annotations as $i => $a) { 432 if (($a['id'] ?? '') !== $annId) { 433 continue; 434 } 435 foreach (($a['replies'] ?? []) as $j => $r) { 436 if (($r['id'] ?? '') === $replyId) { 437 array_splice($annotations[$i]['replies'], $j, 1); 438 return true; 439 } 440 } 441 } 442 return false; 443 }); 444 } 445 446 // --------------------------------------------------------------------- 447 // Bulk maintenance (admin, per page) 448 // --------------------------------------------------------------------- 449 450 /** 451 * Remove every resolved annotation from a page. 452 * 453 * @param string $id page id 454 * @return int|false number removed, or false on write failure 455 */ 456 public function clearResolved($id) 457 { 458 if (empty($this->getAnnotations($id))) { 459 return 0; 460 } 461 return $this->mutate($id, function (array &$annotations) { 462 $before = count($annotations); 463 $annotations = array_values(array_filter($annotations, function ($a) { 464 return ($a['status'] ?? 'open') !== 'resolved'; 465 })); 466 return $before - count($annotations); 467 }); 468 } 469 470 /** 471 * Remove every orphaned annotation from a page — those whose quoted text 472 * no longer appears in the rendered page. The page is re-checked here, so 473 * this is authoritative regardless of what a client believed. 474 * 475 * @param string $id page id 476 * @return int|false number removed, or false on write failure 477 */ 478 public function clearOrphaned($id) 479 { 480 $orphanIds = []; 481 foreach ($this->findOrphaned($id) as $a) { 482 $orphanIds[] = $a['id']; 483 } 484 if (empty($orphanIds)) { 485 return 0; 486 } 487 return $this->mutate($id, function (array &$annotations) use ($orphanIds) { 488 $before = count($annotations); 489 $annotations = array_values(array_filter($annotations, function ($a) use ($orphanIds) { 490 return !in_array($a['id'] ?? '', $orphanIds, true); 491 })); 492 return $before - count($annotations); 493 }); 494 } 495 496 // --------------------------------------------------------------------- 497 // Orphan detection 498 // --------------------------------------------------------------------- 499 500 /** 501 * Render a page to normalised plain text, for quote searching. 502 * 503 * Block-level closing tags become spaces so adjacent blocks do not fuse 504 * into one run of text; then tags are stripped, entities decoded, and 505 * whitespace collapsed — the same normalisation applied to stored quotes. 506 * 507 * @param string $id page id 508 * @return string 509 */ 510 public function getPageText($id) 511 { 512 if (!page_exists($id)) { 513 return ''; 514 } 515 $xhtml = p_wiki_xhtml($id, '', false); 516 if (!is_string($xhtml) || $xhtml === '') { 517 return ''; 518 } 519 $xhtml = preg_replace('#</(p|div|li|h[1-6]|td|th|tr|blockquote|pre|dt|dd)>#i', ' ', $xhtml); 520 $xhtml = preg_replace('#<br\s*/?>#i', ' ', $xhtml); 521 $text = strip_tags($xhtml); 522 $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8'); 523 return $this->normalizeWhitespace($text); 524 } 525 526 /** 527 * The annotations on a page whose quoted text is no longer present. 528 * 529 * @param string $id page id 530 * @return array list of orphaned annotation arrays 531 */ 532 public function findOrphaned($id) 533 { 534 $annotations = $this->getAnnotations($id); 535 if (empty($annotations)) { 536 return []; 537 } 538 $pageText = $this->getPageText($id); 539 540 $orphaned = []; 541 foreach ($annotations as $a) { 542 $exact = $this->normalizeWhitespace($a['anchor']['exact'] ?? ''); 543 if ($exact === '' || mb_strpos($pageText, $exact) === false) { 544 $orphaned[] = $a; 545 } 546 } 547 return $orphaned; 548 } 549 550 // --------------------------------------------------------------------- 551 // Permission rules (single source of truth) 552 // --------------------------------------------------------------------- 553 554 /** 555 * May this user create an annotation, reply, or change a resolve status? 556 * 557 * Requires only read access to the page — annotations are out-of-band, so 558 * a user whose page edit access is blocked may still annotate. 559 * 560 * @param string $user current username ('' for anonymous) 561 * @param int $aclLevel the user's ACL level on the page 562 * @return bool 563 */ 564 public function canAnnotate($user, $aclLevel) 565 { 566 return $user !== '' && $user !== null && $aclLevel >= AUTH_READ; 567 } 568 569 /** 570 * May this user edit or delete the given annotation? Author or admin. 571 * 572 * @param array $annotation 573 * @param string $user 574 * @param bool $isAdmin 575 * @return bool 576 */ 577 public function canEditAnnotation(array $annotation, $user, $isAdmin) 578 { 579 if ($user === '' || $user === null) { 580 return false; 581 } 582 return $isAdmin || (($annotation['author'] ?? '') === $user); 583 } 584 585 /** 586 * May this user edit or delete the given reply? Author or admin. 587 * 588 * @param array $reply 589 * @param string $user 590 * @param bool $isAdmin 591 * @return bool 592 */ 593 public function canEditReply(array $reply, $user, $isAdmin) 594 { 595 if ($user === '' || $user === null) { 596 return false; 597 } 598 return $isAdmin || (($reply['author'] ?? '') === $user); 599 } 600 601 /** 602 * May this user run the per-page "clear resolved/orphaned" operations? 603 * Admins only. 604 * 605 * @param bool $isAdmin 606 * @return bool 607 */ 608 public function canClear($isAdmin) 609 { 610 return (bool) $isAdmin; 611 } 612 613 // --------------------------------------------------------------------- 614 // Input cleaning 615 // --------------------------------------------------------------------- 616 617 /** 618 * Validate and normalise a raw anchor. 619 * 620 * @param mixed $anchor 621 * @return array|null the cleaned anchor, or null if unusable 622 */ 623 protected function cleanAnchor($anchor) 624 { 625 if (!is_array($anchor)) { 626 return null; 627 } 628 629 $exact = (isset($anchor['exact']) && is_string($anchor['exact'])) 630 ? $this->normalizeWhitespace($anchor['exact']) 631 : ''; 632 if ($exact === '') { 633 return null; // an anchor without quoted text is unusable 634 } 635 if (mb_strlen($exact) > self::MAX_QUOTE) { 636 $exact = mb_substr($exact, 0, self::MAX_QUOTE); 637 } 638 639 $prefix = (isset($anchor['prefix']) && is_string($anchor['prefix'])) 640 ? $this->normalizeWhitespace($anchor['prefix']) 641 : ''; 642 $suffix = (isset($anchor['suffix']) && is_string($anchor['suffix'])) 643 ? $this->normalizeWhitespace($anchor['suffix']) 644 : ''; 645 $ctx = $this->contextLength(); 646 if (mb_strlen($prefix) > $ctx) { 647 $prefix = mb_substr($prefix, -$ctx); 648 } 649 if (mb_strlen($suffix) > $ctx) { 650 $suffix = mb_substr($suffix, 0, $ctx); 651 } 652 653 $start = isset($anchor['start']) ? max(0, (int) $anchor['start']) : 0; 654 655 return [ 656 'exact' => $exact, 657 'prefix' => $prefix, 658 'suffix' => $suffix, 659 'start' => $start, 660 ]; 661 } 662 663 /** 664 * Clean an annotation/reply body: a plain-text string, trimmed, with 665 * normalised line endings and a length cap. Newlines are kept; the text 666 * is escaped by the consumer at render time. 667 * 668 * @param mixed $body 669 * @return string 670 */ 671 protected function cleanBody($body) 672 { 673 if (!is_string($body)) { 674 return ''; 675 } 676 $body = str_replace("\r\n", "\n", $body); 677 $body = str_replace("\r", "\n", $body); 678 $body = trim($body); 679 $cap = $this->bodyCap(); 680 if (mb_strlen($body) > $cap) { 681 $body = mb_substr($body, 0, $cap); 682 } 683 return $body; 684 } 685 686 /** 687 * Collapse every run of whitespace to a single space and trim. 688 * 689 * @param mixed $text 690 * @return string 691 */ 692 protected function normalizeWhitespace($text) 693 { 694 return trim(preg_replace('/\s+/u', ' ', (string) $text)); 695 } 696 697 /** 698 * A fresh identifier for an annotation or reply. 699 * 700 * @return string 16 hex characters 701 */ 702 protected function newId() 703 { 704 return bin2hex(random_bytes(8)); 705 } 706} 707