1<?php 2 3/** 4 * Annotations plugin — storage and data-logic helper. 5 * 6 * This component owns: 7 * 8 * 1. The per-page annotation store. One JSON file per page, obtained via 9 * metaFN($id, '.annotations'), holding {version, annotations:[...]}. 10 * JSON and pretty-printed so the files are easy to inspect or back up. 11 * The page text and the wiki changelog are never touched. 12 * 13 * 2. The text-quote anchor model. Each annotation stores an anchor of 14 * {exact, prefix, suffix, start} — the quoted text, a short slice of the 15 * surrounding context on each side (to disambiguate repeated quotes), 16 * and a character-offset hint. This is the Hypothes.is approach. 17 * 18 * 3. CRUD on annotations and their threaded replies. 19 * 20 * 4. Server-side orphan detection: a page is rendered to plain text and an 21 * annotation is "orphaned" when its quoted text no longer appears. Used 22 * by the admin-only per-page "clear orphaned" operation. (The live UI 23 * also detects orphans client-side for the on-page counter.) 24 * 25 * 5. The permission rules, as the single source of truth. They are pure 26 * functions: the caller gathers the facts (current user, admin flag, the 27 * page's ACL level) and passes them in. Because annotations live 28 * out-of-band, creating one needs only AUTH_READ on the page, never 29 * AUTH_EDIT — so a group whose page edit access is blocked can still 30 * annotate. 31 */ 32 33// must be run within DokuWiki 34if (!defined('DOKU_INC')) die(); 35 36class helper_plugin_annotations extends DokuWiki_Plugin 37{ 38 /** storage schema version, written into each file */ 39 const SCHEMA_VERSION = 1; 40 41 /** longest quoted selection stored, in characters */ 42 const MAX_QUOTE = 1000; 43 44 /** length of the prefix/suffix context slices, in characters */ 45 const MAX_CONTEXT = 64; 46 47 /** longest annotation/reply body, in characters */ 48 const MAX_BODY = 10000; 49 50 // --------------------------------------------------------------------- 51 // Storage 52 // --------------------------------------------------------------------- 53 54 /** 55 * Path of a page's annotation file. 56 * 57 * @param string $id page id 58 * @return string 59 */ 60 protected function getFile($id) 61 { 62 return metaFN($id, '.annotations'); 63 } 64 65 /** 66 * All annotations stored for a page. 67 * 68 * @param string $id page id 69 * @return array list of annotation arrays (empty if none) 70 */ 71 public function getAnnotations($id) 72 { 73 $file = $this->getFile($id); 74 if (!file_exists($file)) { 75 return []; 76 } 77 $raw = io_readFile($file, false); 78 if ($raw === '') { 79 return []; 80 } 81 $data = json_decode($raw, true); 82 if (!is_array($data) || !isset($data['annotations']) || !is_array($data['annotations'])) { 83 return []; 84 } 85 return $data['annotations']; 86 } 87 88 /** 89 * A single annotation by id. 90 * 91 * @param string $id page id 92 * @param string $annId annotation id 93 * @return array|null 94 */ 95 public function getAnnotation($id, $annId) 96 { 97 foreach ($this->getAnnotations($id) as $a) { 98 if (($a['id'] ?? '') === $annId) { 99 return $a; 100 } 101 } 102 return null; 103 } 104 105 /** 106 * Counts for the on-page indicator. The orphan count is deliberately not 107 * here — it depends on the rendered page and is computed client-side. 108 * 109 * @param string $id page id 110 * @return array ['total'=>int, 'open'=>int, 'resolved'=>int] 111 */ 112 public function getStats($id) 113 { 114 return $this->statsFor($this->getAnnotations($id)); 115 } 116 117 /** 118 * Counts for the on-page indicator, computed from an already-loaded list. 119 * Split out from getStats() so callers that already hold the annotation 120 * array (e.g. the page-load JSINFO injector, which embeds the same list) 121 * don't re-read the file. 122 * 123 * @param array $annotations annotation list 124 * @return array ['total'=>int, 'open'=>int, 'resolved'=>int] 125 */ 126 public function statsFor(array $annotations) 127 { 128 $open = 0; 129 $resolved = 0; 130 foreach ($annotations as $a) { 131 if (($a['status'] ?? 'open') === 'resolved') { 132 $resolved++; 133 } else { 134 $open++; 135 } 136 } 137 return ['total' => $open + $resolved, 'open' => $open, 'resolved' => $resolved]; 138 } 139 140 /** 141 * Write a page's annotation list to disk. 142 * 143 * @param string $id page id 144 * @param array $list annotations 145 * @return bool 146 */ 147 protected function writeFile($id, array $list) 148 { 149 $payload = [ 150 'version' => self::SCHEMA_VERSION, 151 'annotations' => array_values($list), 152 ]; 153 return (bool) io_saveFile( 154 $this->getFile($id), 155 json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) 156 ); 157 } 158 159 /** 160 * Run a modification against a page's annotations under a write lock. 161 * 162 * The modifier receives the annotation list by reference and returns an 163 * outcome value. Returning the boolean false aborts the write (used for 164 * "target not found"); any other value is returned to the caller after a 165 * successful save. 166 * 167 * @param string $id page id 168 * @param callable $modifier function(array &$annotations): mixed 169 * @return mixed the modifier's outcome on success, or false on failure 170 */ 171 protected function mutate($id, callable $modifier) 172 { 173 $file = $this->getFile($id); 174 io_lock($file); 175 176 $annotations = $this->getAnnotations($id); 177 $outcome = $modifier($annotations); 178 179 if ($outcome === false) { 180 io_unlock($file); 181 return false; 182 } 183 184 $ok = $this->writeFile($id, $annotations); 185 io_unlock($file); 186 return $ok ? $outcome : false; 187 } 188 189 // --------------------------------------------------------------------- 190 // Annotation CRUD 191 // --------------------------------------------------------------------- 192 193 /** 194 * Create an annotation. 195 * 196 * @param string $id page id 197 * @param array $anchor raw anchor {exact, prefix, suffix, start} 198 * @param string $author username 199 * @param string $body annotation text 200 * @return array|false the created annotation, or false on invalid input 201 */ 202 public function createAnnotation($id, $anchor, $author, $body) 203 { 204 if ($id === '' || $author === '' || $author === null) { 205 return false; 206 } 207 $body = $this->cleanBody($body); 208 if ($body === '') { 209 return false; 210 } 211 $anchor = $this->cleanAnchor($anchor); 212 if ($anchor === null) { 213 return false; 214 } 215 216 $now = time(); 217 $new = [ 218 'id' => $this->newId(), 219 'anchor' => $anchor, 220 'author' => $author, 221 'created' => $now, 222 'modified' => $now, 223 'body' => $body, 224 'status' => 'open', 225 'resolved_by' => '', 226 'resolved_at' => 0, 227 'replies' => [], 228 ]; 229 230 return $this->mutate($id, function (array &$annotations) use ($new) { 231 $annotations[] = $new; 232 return $new; 233 }); 234 } 235 236 /** 237 * Edit an annotation's body text. 238 * 239 * @param string $id page id 240 * @param string $annId annotation id 241 * @param string $body new text 242 * @return bool 243 */ 244 public function updateAnnotationBody($id, $annId, $body) 245 { 246 $body = $this->cleanBody($body); 247 if ($body === '') { 248 return false; 249 } 250 return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $body) { 251 foreach ($annotations as $i => $a) { 252 if (($a['id'] ?? '') === $annId) { 253 $annotations[$i]['body'] = $body; 254 $annotations[$i]['modified'] = time(); 255 return true; 256 } 257 } 258 return false; 259 }); 260 } 261 262 /** 263 * Delete an annotation and all its replies. 264 * 265 * @param string $id page id 266 * @param string $annId annotation id 267 * @return bool 268 */ 269 public function deleteAnnotation($id, $annId) 270 { 271 return (bool) $this->mutate($id, function (array &$annotations) use ($annId) { 272 foreach ($annotations as $i => $a) { 273 if (($a['id'] ?? '') === $annId) { 274 array_splice($annotations, $i, 1); 275 return true; 276 } 277 } 278 return false; 279 }); 280 } 281 282 /** 283 * Mark an annotation open or resolved. 284 * 285 * @param string $id page id 286 * @param string $annId annotation id 287 * @param string $status 'open' or 'resolved' 288 * @param string $actor username making the change (recorded when resolving) 289 * @return bool 290 */ 291 public function setStatus($id, $annId, $status, $actor) 292 { 293 if (!in_array($status, ['open', 'resolved'], true)) { 294 return false; 295 } 296 return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $status, $actor) { 297 foreach ($annotations as $i => $a) { 298 if (($a['id'] ?? '') === $annId) { 299 $annotations[$i]['status'] = $status; 300 if ($status === 'resolved') { 301 $annotations[$i]['resolved_by'] = $actor; 302 $annotations[$i]['resolved_at'] = time(); 303 } else { 304 $annotations[$i]['resolved_by'] = ''; 305 $annotations[$i]['resolved_at'] = 0; 306 } 307 return true; 308 } 309 } 310 return false; 311 }); 312 } 313 314 // --------------------------------------------------------------------- 315 // Reply CRUD 316 // --------------------------------------------------------------------- 317 318 /** 319 * Add a reply to an annotation. 320 * 321 * @param string $id page id 322 * @param string $annId annotation id 323 * @param string $author username 324 * @param string $body reply text 325 * @param string $parentId id of the reply being replied to, or '' for root-level 326 * @return array|false the created reply, or false on invalid input 327 */ 328 public function addReply($id, $annId, $author, $body, $parentId = '') 329 { 330 if ($author === '' || $author === null) { 331 return false; 332 } 333 $body = $this->cleanBody($body); 334 if ($body === '') { 335 return false; 336 } 337 $now = time(); 338 $reply = [ 339 'id' => $this->newId(), 340 'parentId' => preg_replace('/[^a-f0-9]/', '', (string) $parentId), 341 'author' => $author, 342 'created' => $now, 343 'modified' => $now, 344 'body' => $body, 345 ]; 346 347 return $this->mutate($id, function (array &$annotations) use ($annId, $reply) { 348 foreach ($annotations as $i => $a) { 349 if (($a['id'] ?? '') === $annId) { 350 $annotations[$i]['replies'][] = $reply; 351 return $reply; 352 } 353 } 354 return false; 355 }); 356 } 357 358 /** 359 * Edit a reply's body text. 360 * 361 * @param string $id page id 362 * @param string $annId annotation id 363 * @param string $replyId reply id 364 * @param string $body new text 365 * @return bool 366 */ 367 public function updateReply($id, $annId, $replyId, $body) 368 { 369 $body = $this->cleanBody($body); 370 if ($body === '') { 371 return false; 372 } 373 return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId, $body) { 374 foreach ($annotations as $i => $a) { 375 if (($a['id'] ?? '') !== $annId) { 376 continue; 377 } 378 foreach (($a['replies'] ?? []) as $j => $r) { 379 if (($r['id'] ?? '') === $replyId) { 380 $annotations[$i]['replies'][$j]['body'] = $body; 381 $annotations[$i]['replies'][$j]['modified'] = time(); 382 return true; 383 } 384 } 385 } 386 return false; 387 }); 388 } 389 390 /** 391 * Delete a reply. 392 * 393 * @param string $id page id 394 * @param string $annId annotation id 395 * @param string $replyId reply id 396 * @return bool 397 */ 398 public function deleteReply($id, $annId, $replyId) 399 { 400 return (bool) $this->mutate($id, function (array &$annotations) use ($annId, $replyId) { 401 foreach ($annotations as $i => $a) { 402 if (($a['id'] ?? '') !== $annId) { 403 continue; 404 } 405 foreach (($a['replies'] ?? []) as $j => $r) { 406 if (($r['id'] ?? '') === $replyId) { 407 array_splice($annotations[$i]['replies'], $j, 1); 408 return true; 409 } 410 } 411 } 412 return false; 413 }); 414 } 415 416 // --------------------------------------------------------------------- 417 // Bulk maintenance (admin, per page) 418 // --------------------------------------------------------------------- 419 420 /** 421 * Remove every resolved annotation from a page. 422 * 423 * @param string $id page id 424 * @return int|false number removed, or false on write failure 425 */ 426 public function clearResolved($id) 427 { 428 if (empty($this->getAnnotations($id))) { 429 return 0; 430 } 431 return $this->mutate($id, function (array &$annotations) { 432 $before = count($annotations); 433 $annotations = array_values(array_filter($annotations, function ($a) { 434 return ($a['status'] ?? 'open') !== 'resolved'; 435 })); 436 return $before - count($annotations); 437 }); 438 } 439 440 /** 441 * Remove every orphaned annotation from a page — those whose quoted text 442 * no longer appears in the rendered page. The page is re-checked here, so 443 * this is authoritative regardless of what a client believed. 444 * 445 * @param string $id page id 446 * @return int|false number removed, or false on write failure 447 */ 448 public function clearOrphaned($id) 449 { 450 $orphanIds = []; 451 foreach ($this->findOrphaned($id) as $a) { 452 $orphanIds[] = $a['id']; 453 } 454 if (empty($orphanIds)) { 455 return 0; 456 } 457 return $this->mutate($id, function (array &$annotations) use ($orphanIds) { 458 $before = count($annotations); 459 $annotations = array_values(array_filter($annotations, function ($a) use ($orphanIds) { 460 return !in_array($a['id'] ?? '', $orphanIds, true); 461 })); 462 return $before - count($annotations); 463 }); 464 } 465 466 // --------------------------------------------------------------------- 467 // Orphan detection 468 // --------------------------------------------------------------------- 469 470 /** 471 * Render a page to normalised plain text, for quote searching. 472 * 473 * Block-level closing tags become spaces so adjacent blocks do not fuse 474 * into one run of text; then tags are stripped, entities decoded, and 475 * whitespace collapsed — the same normalisation applied to stored quotes. 476 * 477 * @param string $id page id 478 * @return string 479 */ 480 public function getPageText($id) 481 { 482 if (!page_exists($id)) { 483 return ''; 484 } 485 $xhtml = p_wiki_xhtml($id, '', false); 486 if (!is_string($xhtml) || $xhtml === '') { 487 return ''; 488 } 489 $xhtml = preg_replace('#</(p|div|li|h[1-6]|td|th|tr|blockquote|pre|dt|dd)>#i', ' ', $xhtml); 490 $xhtml = preg_replace('#<br\s*/?>#i', ' ', $xhtml); 491 $text = strip_tags($xhtml); 492 $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8'); 493 return $this->normalizeWhitespace($text); 494 } 495 496 /** 497 * The annotations on a page whose quoted text is no longer present. 498 * 499 * @param string $id page id 500 * @return array list of orphaned annotation arrays 501 */ 502 public function findOrphaned($id) 503 { 504 $annotations = $this->getAnnotations($id); 505 if (empty($annotations)) { 506 return []; 507 } 508 $pageText = $this->getPageText($id); 509 510 $orphaned = []; 511 foreach ($annotations as $a) { 512 $exact = $this->normalizeWhitespace($a['anchor']['exact'] ?? ''); 513 if ($exact === '' || mb_strpos($pageText, $exact) === false) { 514 $orphaned[] = $a; 515 } 516 } 517 return $orphaned; 518 } 519 520 // --------------------------------------------------------------------- 521 // Permission rules (single source of truth) 522 // --------------------------------------------------------------------- 523 524 /** 525 * May this user create an annotation, reply, or change a resolve status? 526 * 527 * Requires only read access to the page — annotations are out-of-band, so 528 * a user whose page edit access is blocked may still annotate. 529 * 530 * @param string $user current username ('' for anonymous) 531 * @param int $aclLevel the user's ACL level on the page 532 * @return bool 533 */ 534 public function canAnnotate($user, $aclLevel) 535 { 536 return $user !== '' && $user !== null && $aclLevel >= AUTH_READ; 537 } 538 539 /** 540 * May this user edit or delete the given annotation? Author or admin. 541 * 542 * @param array $annotation 543 * @param string $user 544 * @param bool $isAdmin 545 * @return bool 546 */ 547 public function canEditAnnotation(array $annotation, $user, $isAdmin) 548 { 549 if ($user === '' || $user === null) { 550 return false; 551 } 552 return $isAdmin || (($annotation['author'] ?? '') === $user); 553 } 554 555 /** 556 * May this user edit or delete the given reply? Author or admin. 557 * 558 * @param array $reply 559 * @param string $user 560 * @param bool $isAdmin 561 * @return bool 562 */ 563 public function canEditReply(array $reply, $user, $isAdmin) 564 { 565 if ($user === '' || $user === null) { 566 return false; 567 } 568 return $isAdmin || (($reply['author'] ?? '') === $user); 569 } 570 571 /** 572 * May this user run the per-page "clear resolved/orphaned" operations? 573 * Admins only. 574 * 575 * @param bool $isAdmin 576 * @return bool 577 */ 578 public function canClear($isAdmin) 579 { 580 return (bool) $isAdmin; 581 } 582 583 // --------------------------------------------------------------------- 584 // Input cleaning 585 // --------------------------------------------------------------------- 586 587 /** 588 * Validate and normalise a raw anchor. 589 * 590 * @param mixed $anchor 591 * @return array|null the cleaned anchor, or null if unusable 592 */ 593 protected function cleanAnchor($anchor) 594 { 595 if (!is_array($anchor)) { 596 return null; 597 } 598 599 $exact = (isset($anchor['exact']) && is_string($anchor['exact'])) 600 ? $this->normalizeWhitespace($anchor['exact']) 601 : ''; 602 if ($exact === '') { 603 return null; // an anchor without quoted text is unusable 604 } 605 if (mb_strlen($exact) > self::MAX_QUOTE) { 606 $exact = mb_substr($exact, 0, self::MAX_QUOTE); 607 } 608 609 $prefix = (isset($anchor['prefix']) && is_string($anchor['prefix'])) 610 ? $this->normalizeWhitespace($anchor['prefix']) 611 : ''; 612 $suffix = (isset($anchor['suffix']) && is_string($anchor['suffix'])) 613 ? $this->normalizeWhitespace($anchor['suffix']) 614 : ''; 615 if (mb_strlen($prefix) > self::MAX_CONTEXT) { 616 $prefix = mb_substr($prefix, -self::MAX_CONTEXT); 617 } 618 if (mb_strlen($suffix) > self::MAX_CONTEXT) { 619 $suffix = mb_substr($suffix, 0, self::MAX_CONTEXT); 620 } 621 622 $start = isset($anchor['start']) ? max(0, (int) $anchor['start']) : 0; 623 624 return [ 625 'exact' => $exact, 626 'prefix' => $prefix, 627 'suffix' => $suffix, 628 'start' => $start, 629 ]; 630 } 631 632 /** 633 * Clean an annotation/reply body: a plain-text string, trimmed, with 634 * normalised line endings and a length cap. Newlines are kept; the text 635 * is escaped by the consumer at render time. 636 * 637 * @param mixed $body 638 * @return string 639 */ 640 protected function cleanBody($body) 641 { 642 if (!is_string($body)) { 643 return ''; 644 } 645 $body = str_replace("\r\n", "\n", $body); 646 $body = str_replace("\r", "\n", $body); 647 $body = trim($body); 648 if (mb_strlen($body) > self::MAX_BODY) { 649 $body = mb_substr($body, 0, self::MAX_BODY); 650 } 651 return $body; 652 } 653 654 /** 655 * Collapse every run of whitespace to a single space and trim. 656 * 657 * @param mixed $text 658 * @return string 659 */ 660 protected function normalizeWhitespace($text) 661 { 662 return trim(preg_replace('/\s+/u', ' ', (string) $text)); 663 } 664 665 /** 666 * A fresh identifier for an annotation or reply. 667 * 668 * @return string 16 hex characters 669 */ 670 protected function newId() 671 { 672 return bin2hex(random_bytes(8)); 673 } 674} 675