1<?php 2 3use dokuwiki\File\MediaResolver; 4use dokuwiki\File\PageResolver; 5use dokuwiki\Utf8\PhpString; 6 7/** 8 * The MetaData Renderer 9 * 10 * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content 11 * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and 12 * $persistent. 13 * 14 * Some simplified rendering to $doc is done to gather the page's (text-only) abstract. 15 * 16 * @author Esther Brunner <wikidesign@gmail.com> 17 */ 18class Doku_Renderer_metadata extends Doku_Renderer 19{ 20 /** the approximate byte lenght to capture for the abstract */ 21 public const ABSTRACT_LEN = 250; 22 23 /** the maximum UTF8 character length for the abstract */ 24 public const ABSTRACT_MAX = 500; 25 26 /** @var array transient meta data, will be reset on each rendering */ 27 public $meta = []; 28 29 /** @var array persistent meta data, will be kept until explicitly deleted */ 30 public $persistent = []; 31 32 /** @var array the list of headers used to create unique link ids */ 33 protected $headers = []; 34 35 /** @var string temporary $doc store */ 36 protected $store = ''; 37 38 /** @var string keeps the first image reference */ 39 protected $firstimage = ''; 40 41 /** @var bool whether or not data is being captured for the abstract, public to be accessible by plugins */ 42 public $capturing = true; 43 44 /** @var bool determines if enough data for the abstract was collected, yet */ 45 public $capture = true; 46 47 /** @var int number of bytes captured for abstract */ 48 protected $captured = 0; 49 50 /** 51 * Returns the format produced by this renderer. 52 * 53 * @return string always 'metadata' 54 */ 55 public function getFormat() 56 { 57 return 'metadata'; 58 } 59 60 /** 61 * Initialize the document 62 * 63 * Sets up some of the persistent info about the page if it doesn't exist, yet. 64 */ 65 public function document_start() 66 { 67 global $ID; 68 69 $this->headers = []; 70 71 // external pages are missing create date 72 if (!isset($this->persistent['date']['created']) || !$this->persistent['date']['created']) { 73 $this->persistent['date']['created'] = filectime(wikiFN($ID)); 74 } 75 if (!isset($this->persistent['user'])) { 76 $this->persistent['user'] = ''; 77 } 78 if (!isset($this->persistent['creator'])) { 79 $this->persistent['creator'] = ''; 80 } 81 // reset metadata to persistent values 82 $this->meta = $this->persistent; 83 } 84 85 /** 86 * Finalize the document 87 * 88 * Stores collected data in the metadata 89 */ 90 public function document_end() 91 { 92 global $ID; 93 94 // store internal info in metadata (notoc,nocache) 95 $this->meta['internal'] = $this->info; 96 97 if (!isset($this->meta['description']['abstract'])) { 98 // cut off too long abstracts 99 $this->doc = trim($this->doc); 100 if (strlen($this->doc) > self::ABSTRACT_MAX) { 101 $this->doc = PhpString::substr($this->doc, 0, self::ABSTRACT_MAX) . '…'; 102 } 103 $this->meta['description']['abstract'] = $this->doc; 104 } 105 106 $this->meta['relation']['firstimage'] = $this->firstimage; 107 108 if (!isset($this->meta['date']['modified'])) { 109 $this->meta['date']['modified'] = filemtime(wikiFN($ID)); 110 } 111 112 $this->doc = ''; 113 } 114 115 /** 116 * Render plain text data 117 * 118 * This function takes care of the amount captured data and will stop capturing when 119 * enough abstract data is available 120 * 121 * @param $text 122 */ 123 public function cdata($text) 124 { 125 if (!$this->capture || !$this->capturing) { 126 return; 127 } 128 129 $this->doc .= $text; 130 131 $this->captured += strlen($text); 132 if ($this->captured > self::ABSTRACT_LEN) { 133 $this->capture = false; 134 } 135 } 136 137 /** 138 * Add an item to the TOC 139 * 140 * @param string $id the hash link 141 * @param string $text the text to display 142 * @param int $level the nesting level 143 */ 144 public function toc_additem($id, $text, $level) 145 { 146 global $conf; 147 148 //only add items within configured levels 149 if ($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) { 150 // the TOC is one of our standard ul list arrays ;-) 151 $this->meta['description']['tableofcontents'][] = [ 152 'hid' => $id, 153 'title' => $text, 154 'type' => 'ul', 155 'level' => $level - $conf['toptoclevel'] + 1 156 ]; 157 } 158 } 159 160 /** 161 * Render a heading 162 * 163 * @param string $text the text to display 164 * @param int $level header level 165 * @param int $pos byte position in the original source 166 */ 167 public function header($text, $level, $pos) 168 { 169 if (!isset($this->meta['title'])) { 170 $this->meta['title'] = $text; 171 } 172 173 // add the header to the TOC 174 $hid = $this->_headerToLink($text, true); 175 $this->toc_additem($hid, $text, $level); 176 177 // add to summary 178 $this->cdata(DOKU_LF . $text . DOKU_LF); 179 } 180 181 /** 182 * Open a paragraph 183 */ 184 public function p_open() 185 { 186 $this->cdata(DOKU_LF); 187 } 188 189 /** 190 * Close a paragraph 191 */ 192 public function p_close() 193 { 194 $this->cdata(DOKU_LF); 195 } 196 197 /** 198 * Create a line break 199 */ 200 public function linebreak() 201 { 202 $this->cdata(DOKU_LF); 203 } 204 205 /** 206 * Create a horizontal line 207 */ 208 public function hr() 209 { 210 $this->cdata(DOKU_LF . '----------' . DOKU_LF); 211 } 212 213 /** 214 * Callback for footnote start syntax 215 * 216 * All following content will go to the footnote instead of 217 * the document. To achieve this the previous rendered content 218 * is moved to $store and $doc is cleared 219 * 220 * @author Andreas Gohr <andi@splitbrain.org> 221 */ 222 public function footnote_open() 223 { 224 if ($this->capture) { 225 // move current content to store 226 // this is required to ensure safe behaviour of plugins accessed within footnotes 227 $this->store = $this->doc; 228 $this->doc = ''; 229 230 // disable capturing 231 $this->capturing = false; 232 } 233 } 234 235 /** 236 * Callback for footnote end syntax 237 * 238 * All content rendered whilst within footnote syntax mode is discarded, 239 * the previously rendered content is restored and capturing is re-enabled. 240 * 241 * @author Andreas Gohr 242 */ 243 public function footnote_close() 244 { 245 if ($this->capture) { 246 // re-enable capturing 247 $this->capturing = true; 248 // restore previously rendered content 249 $this->doc = $this->store; 250 $this->store = ''; 251 } 252 } 253 254 /** 255 * Open an unordered list 256 */ 257 public function listu_open() 258 { 259 $this->cdata(DOKU_LF); 260 } 261 262 /** 263 * Open an ordered list 264 */ 265 public function listo_open() 266 { 267 $this->cdata(DOKU_LF); 268 } 269 270 /** 271 * Open a list item 272 * 273 * @param int $level the nesting level 274 * @param bool $node true when a node; false when a leaf 275 */ 276 public function listitem_open($level, $node = false) 277 { 278 $this->cdata(str_repeat(DOKU_TAB, $level) . '* '); 279 } 280 281 /** 282 * Close a list item 283 */ 284 public function listitem_close() 285 { 286 $this->cdata(DOKU_LF); 287 } 288 289 /** 290 * Output preformatted text 291 * 292 * @param string $text 293 */ 294 public function preformatted($text) 295 { 296 $this->cdata($text); 297 } 298 299 /** 300 * Start a block quote 301 */ 302 public function quote_open() 303 { 304 $this->cdata(DOKU_LF . DOKU_TAB . '"'); 305 } 306 307 /** 308 * Stop a block quote 309 */ 310 public function quote_close() 311 { 312 $this->cdata('"' . DOKU_LF); 313 } 314 315 /** 316 * Display text as file content, optionally syntax highlighted 317 * 318 * @param string $text text to show 319 * @param string $lang programming language to use for syntax highlighting 320 * @param string $file file path label 321 */ 322 public function file($text, $lang = null, $file = null) 323 { 324 $this->cdata(DOKU_LF . $text . DOKU_LF); 325 } 326 327 /** 328 * Display text as code content, optionally syntax highlighted 329 * 330 * @param string $text text to show 331 * @param string $language programming language to use for syntax highlighting 332 * @param string $file file path label 333 */ 334 public function code($text, $language = null, $file = null) 335 { 336 $this->cdata(DOKU_LF . $text . DOKU_LF); 337 } 338 339 /** 340 * Format an acronym 341 * 342 * Uses $this->acronyms 343 * 344 * @param string $acronym 345 */ 346 public function acronym($acronym) 347 { 348 $this->cdata($acronym); 349 } 350 351 /** 352 * Format a smiley 353 * 354 * Uses $this->smiley 355 * 356 * @param string $smiley 357 */ 358 public function smiley($smiley) 359 { 360 $this->cdata($smiley); 361 } 362 363 /** 364 * Format an entity 365 * 366 * Entities are basically small text replacements 367 * 368 * Uses $this->entities 369 * 370 * @param string $entity 371 */ 372 public function entity($entity) 373 { 374 $this->cdata($entity); 375 } 376 377 /** 378 * Typographically format a multiply sign 379 * 380 * Example: ($x=640, $y=480) should result in "640×480" 381 * 382 * @param string|int $x first value 383 * @param string|int $y second value 384 */ 385 public function multiplyentity($x, $y) 386 { 387 $this->cdata($x . '×' . $y); 388 } 389 390 /** 391 * Render an opening single quote char (language specific) 392 */ 393 public function singlequoteopening() 394 { 395 global $lang; 396 $this->cdata($lang['singlequoteopening']); 397 } 398 399 /** 400 * Render a closing single quote char (language specific) 401 */ 402 public function singlequoteclosing() 403 { 404 global $lang; 405 $this->cdata($lang['singlequoteclosing']); 406 } 407 408 /** 409 * Render an apostrophe char (language specific) 410 */ 411 public function apostrophe() 412 { 413 global $lang; 414 $this->cdata($lang['apostrophe']); 415 } 416 417 /** 418 * Render an opening double quote char (language specific) 419 */ 420 public function doublequoteopening() 421 { 422 global $lang; 423 $this->cdata($lang['doublequoteopening']); 424 } 425 426 /** 427 * Render an closinging double quote char (language specific) 428 */ 429 public function doublequoteclosing() 430 { 431 global $lang; 432 $this->cdata($lang['doublequoteclosing']); 433 } 434 435 /** 436 * Render a CamelCase link 437 * 438 * @param string $link The link name 439 * @see http://en.wikipedia.org/wiki/CamelCase 440 */ 441 public function camelcaselink($link) 442 { 443 $this->internallink($link, $link); 444 } 445 446 /** 447 * Render a page local link 448 * 449 * @param string $hash hash link identifier 450 * @param string $name name for the link 451 */ 452 public function locallink($hash, $name = null) 453 { 454 if (is_array($name)) { 455 $this->_firstimage($name['src']); 456 if ($name['type'] == 'internalmedia') { 457 $this->_recordMediaUsage($name['src']); 458 } 459 } 460 } 461 462 /** 463 * keep track of internal links in $this->meta['relation']['references'] 464 * 465 * @param string $id page ID to link to. eg. 'wiki:syntax' 466 * @param string|array|null $name name for the link, array for media file 467 */ 468 public function internallink($id, $name = null) 469 { 470 global $ID; 471 472 if (is_array($name)) { 473 $this->_firstimage($name['src']); 474 if ($name['type'] == 'internalmedia') { 475 $this->_recordMediaUsage($name['src']); 476 } 477 } 478 479 $parts = explode('?', $id, 2); 480 if (count($parts) === 2) { 481 $id = $parts[0]; 482 } 483 484 $default = $this->_simpleTitle($id); 485 486 // first resolve and clean up the $id 487 $resolver = new PageResolver($ID); 488 $id = $resolver->resolveId($id); 489 [$page] = sexplode('#', $id, 2); 490 491 // set metadata 492 $this->meta['relation']['references'][$page] = page_exists($page); 493 // $data = array('relation' => array('isreferencedby' => array($ID => true))); 494 // p_set_metadata($id, $data); 495 496 // add link title to summary 497 if ($this->capture) { 498 $name = $this->_getLinkTitle($name, $default, $id); 499 $this->doc .= $name; 500 } 501 } 502 503 /** 504 * Render an external link 505 * 506 * @param string $url full URL with scheme 507 * @param string|array|null $name name for the link, array for media file 508 */ 509 public function externallink($url, $name = null) 510 { 511 if (is_array($name)) { 512 $this->_firstimage($name['src']); 513 if ($name['type'] == 'internalmedia') { 514 $this->_recordMediaUsage($name['src']); 515 } 516 } 517 518 if ($this->capture) { 519 $this->doc .= $this->_getLinkTitle($name, '<' . $url . '>'); 520 } 521 } 522 523 /** 524 * Render an interwiki link 525 * 526 * You may want to use $this->_resolveInterWiki() here 527 * 528 * @param string $match original link - probably not much use 529 * @param string|array $name name for the link, array for media file 530 * @param string $wikiName indentifier (shortcut) for the remote wiki 531 * @param string $wikiUri the fragment parsed from the original link 532 */ 533 public function interwikilink($match, $name, $wikiName, $wikiUri) 534 { 535 if (is_array($name)) { 536 $this->_firstimage($name['src']); 537 if ($name['type'] == 'internalmedia') { 538 $this->_recordMediaUsage($name['src']); 539 } 540 } 541 542 if ($this->capture) { 543 [$wikiUri] = explode('#', $wikiUri, 2); 544 $name = $this->_getLinkTitle($name, $wikiUri); 545 $this->doc .= $name; 546 } 547 } 548 549 /** 550 * Link to windows share 551 * 552 * @param string $url the link 553 * @param string|array $name name for the link, array for media file 554 */ 555 public function windowssharelink($url, $name = null) 556 { 557 if (is_array($name)) { 558 $this->_firstimage($name['src']); 559 if ($name['type'] == 'internalmedia') { 560 $this->_recordMediaUsage($name['src']); 561 } 562 } 563 564 if ($this->capture) { 565 if ($name) { 566 $this->doc .= $name; 567 } else { 568 $this->doc .= '<' . $url . '>'; 569 } 570 } 571 } 572 573 /** 574 * Render a linked E-Mail Address 575 * 576 * Should honor $conf['mailguard'] setting 577 * 578 * @param string $address Email-Address 579 * @param string|array $name name for the link, array for media file 580 */ 581 public function emaillink($address, $name = null) 582 { 583 if (is_array($name)) { 584 $this->_firstimage($name['src']); 585 if ($name['type'] == 'internalmedia') { 586 $this->_recordMediaUsage($name['src']); 587 } 588 } 589 590 if ($this->capture) { 591 if ($name) { 592 $this->doc .= $name; 593 } else { 594 $this->doc .= '<' . $address . '>'; 595 } 596 } 597 } 598 599 /** 600 * Render an internal media file 601 * 602 * @param string $src media ID 603 * @param string $title descriptive text 604 * @param string $align left|center|right 605 * @param int $width width of media in pixel 606 * @param int $height height of media in pixel 607 * @param string $cache cache|recache|nocache 608 * @param string $linking linkonly|detail|nolink 609 */ 610 public function internalmedia( 611 $src, 612 $title = null, 613 $align = null, 614 $width = null, 615 $height = null, 616 $cache = null, 617 $linking = null 618 ) { 619 if ($this->capture && $title) { 620 $this->doc .= '[' . $title . ']'; 621 } 622 $this->_firstimage($src); 623 $this->_recordMediaUsage($src); 624 } 625 626 /** 627 * Render an external media file 628 * 629 * @param string $src full media URL 630 * @param string $title descriptive text 631 * @param string $align left|center|right 632 * @param int $width width of media in pixel 633 * @param int $height height of media in pixel 634 * @param string $cache cache|recache|nocache 635 * @param string $linking linkonly|detail|nolink 636 */ 637 public function externalmedia( 638 $src, 639 $title = null, 640 $align = null, 641 $width = null, 642 $height = null, 643 $cache = null, 644 $linking = null 645 ) { 646 if ($this->capture && $title) { 647 $this->doc .= '[' . $title . ']'; 648 } 649 $this->_firstimage($src); 650 } 651 652 /** 653 * Render the output of an RSS feed 654 * 655 * @param string $url URL of the feed 656 * @param array $params Finetuning of the output 657 */ 658 public function rss($url, $params) 659 { 660 $this->meta['relation']['haspart'][$url] = true; 661 662 $this->meta['date']['valid']['age'] = 663 isset($this->meta['date']['valid']['age']) ? 664 min($this->meta['date']['valid']['age'], $params['refresh']) : 665 $params['refresh']; 666 } 667 668 #region Utils 669 670 /** 671 * Removes any Namespace from the given name but keeps 672 * casing and special chars 673 * 674 * @param string $name 675 * 676 * @return mixed|string 677 * @author Andreas Gohr <andi@splitbrain.org> 678 * 679 */ 680 public function _simpleTitle($name) 681 { 682 global $conf; 683 684 if (is_array($name)) { 685 return ''; 686 } 687 688 if ($conf['useslash']) { 689 $nssep = '[:;/]'; 690 } else { 691 $nssep = '[:;]'; 692 } 693 $name = preg_replace('!.*' . $nssep . '!', '', $name); 694 //if there is a hash we use the anchor name only 695 $name = preg_replace('!.*#!', '', $name); 696 return $name; 697 } 698 699 /** 700 * Construct a title and handle images in titles 701 * 702 * @param string|array|null $title either string title or media array 703 * @param string $default default title if nothing else is found 704 * @param null|string $id linked page id (used to extract title from first heading) 705 * @return string title text 706 * @author Harry Fuecks <hfuecks@gmail.com> 707 */ 708 public function _getLinkTitle($title, $default, $id = null) 709 { 710 if (is_array($title)) { 711 if ($title['title']) { 712 return '[' . $title['title'] . ']'; 713 } else { 714 return $default; 715 } 716 } elseif (is_null($title) || trim($title) == '') { 717 if (useHeading('content') && $id) { 718 $heading = p_get_first_heading($id, METADATA_DONT_RENDER); 719 if ($heading) { 720 return $heading; 721 } 722 } 723 return $default; 724 } else { 725 return $title; 726 } 727 } 728 729 /** 730 * Remember first image 731 * 732 * @param string $src image URL or ID 733 */ 734 protected function _firstimage($src) 735 { 736 global $ID; 737 738 if ($this->firstimage) { 739 return; 740 } 741 742 [$src] = explode('#', $src, 2); 743 if (!media_isexternal($src)) { 744 $src = (new MediaResolver($ID))->resolveId($src); 745 } 746 if (preg_match('/.(jpe?g|gif|png|webp|svg)$/i', $src)) { 747 $this->firstimage = $src; 748 } 749 } 750 751 /** 752 * Store list of used media files in metadata 753 * 754 * @param string $src media ID 755 */ 756 protected function _recordMediaUsage($src) 757 { 758 global $ID; 759 760 [$src] = explode('#', $src, 2); 761 if (media_isexternal($src)) { 762 return; 763 } 764 $src = (new MediaResolver($ID))->resolveId($src); 765 $file = mediaFN($src); 766 $this->meta['relation']['media'][$src] = file_exists($file); 767 } 768 769 #endregion 770} 771 772//Setup VIM: ex: et ts=4 : 773