1<?php 2 3use dokuwiki\File\MediaResolver; 4use dokuwiki\File\PageResolver; 5use dokuwiki\Utf8\PhpString; 6 7/** 8 * The MetaData Renderer 9 * 10 * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content 11 * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and 12 * $persistent. 13 * 14 * Some simplified rendering to $doc is done to gather the page's (text-only) abstract. 15 * 16 * @author Esther Brunner <wikidesign@gmail.com> 17 */ 18class Doku_Renderer_metadata extends Doku_Renderer 19{ 20 /** the approximate byte lenght to capture for the abstract */ 21 public const ABSTRACT_LEN = 250; 22 23 /** the maximum UTF8 character length for the abstract */ 24 public const ABSTRACT_MAX = 500; 25 26 /** @var array transient meta data, will be reset on each rendering */ 27 public $meta = []; 28 29 /** @var array persistent meta data, will be kept until explicitly deleted */ 30 public $persistent = []; 31 32 /** @var array the list of headers used to create unique link ids */ 33 protected $headers = []; 34 35 /** @var string temporary $doc store */ 36 protected $store = ''; 37 38 /** @var string keeps the first image reference */ 39 protected $firstimage = ''; 40 41 /** @var bool whether or not data is being captured for the abstract, public to be accessible by plugins */ 42 public $capturing = true; 43 44 /** @var bool determines if enough data for the abstract was collected, yet */ 45 public $capture = true; 46 47 /** @var int number of bytes captured for abstract */ 48 protected $captured = 0; 49 50 /** 51 * Returns the format produced by this renderer. 52 * 53 * @return string always 'metadata' 54 */ 55 public function getFormat() 56 { 57 return 'metadata'; 58 } 59 60 /** 61 * Initialize the document 62 * 63 * Sets up some of the persistent info about the page if it doesn't exist, yet. 64 */ 65 public function document_start() 66 { 67 global $ID; 68 69 $this->headers = []; 70 71 // external pages are missing create date 72 if (!isset($this->persistent['date']['created']) || !$this->persistent['date']['created']) { 73 $this->persistent['date']['created'] = filectime(wikiFN($ID)); 74 } 75 if (!isset($this->persistent['user'])) { 76 $this->persistent['user'] = ''; 77 } 78 if (!isset($this->persistent['creator'])) { 79 $this->persistent['creator'] = ''; 80 } 81 // reset metadata to persistent values 82 $this->meta = $this->persistent; 83 } 84 85 /** 86 * Finalize the document 87 * 88 * Stores collected data in the metadata 89 */ 90 public function document_end() 91 { 92 global $ID; 93 94 // store internal info in metadata (notoc,nocache) 95 $this->meta['internal'] = $this->info; 96 97 if (!isset($this->meta['description']['abstract'])) { 98 // cut off too long abstracts 99 $this->doc = trim($this->doc); 100 if (strlen($this->doc) > self::ABSTRACT_MAX) { 101 $this->doc = PhpString::substr($this->doc, 0, self::ABSTRACT_MAX) . '…'; 102 } 103 $this->meta['description']['abstract'] = $this->doc; 104 } 105 106 $this->meta['relation']['firstimage'] = $this->firstimage; 107 108 if (!isset($this->meta['date']['modified'])) { 109 $this->meta['date']['modified'] = filemtime(wikiFN($ID)); 110 } 111 112 $this->doc = ''; 113 } 114 115 /** 116 * Render plain text data 117 * 118 * This function takes care of the amount captured data and will stop capturing when 119 * enough abstract data is available 120 * 121 * @param $text 122 */ 123 public function cdata($text) 124 { 125 if (!$this->capture || !$this->capturing) { 126 return; 127 } 128 129 $this->doc .= $text; 130 131 $this->captured += strlen($text); 132 if ($this->captured > self::ABSTRACT_LEN) { 133 $this->capture = false; 134 } 135 } 136 137 /** 138 * Add an item to the TOC 139 * 140 * @param string $id the hash link 141 * @param string $text the text to display 142 * @param int $level the nesting level 143 */ 144 public function toc_additem($id, $text, $level) 145 { 146 global $conf; 147 148 //only add items within configured levels 149 if ($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) { 150 // the TOC is one of our standard ul list arrays ;-) 151 $this->meta['description']['tableofcontents'][] = [ 152 'hid' => $id, 153 'title' => $text, 154 'type' => 'ul', 155 'level' => $level - $conf['toptoclevel'] + 1 156 ]; 157 } 158 } 159 160 /** 161 * Render a heading 162 * 163 * @param string $text the text to display 164 * @param int $level header level 165 * @param int $pos byte position in the original source 166 */ 167 public function header($text, $level, $pos) 168 { 169 if (!isset($this->meta['title'])) { 170 $this->meta['title'] = $text; 171 } 172 173 // add the header to the TOC 174 $hid = $this->_headerToLink($text, true); 175 $this->toc_additem($hid, $text, $level); 176 177 // add to summary 178 $this->cdata(DOKU_LF . $text . DOKU_LF); 179 } 180 181 /** 182 * Open a paragraph 183 */ 184 public function p_open() 185 { 186 $this->cdata(DOKU_LF); 187 } 188 189 /** 190 * Close a paragraph 191 */ 192 public function p_close() 193 { 194 $this->cdata(DOKU_LF); 195 } 196 197 /** 198 * Create a line break 199 */ 200 public function linebreak() 201 { 202 $this->cdata(DOKU_LF); 203 } 204 205 /** 206 * Create a horizontal line 207 */ 208 public function hr() 209 { 210 $this->cdata(DOKU_LF . '----------' . DOKU_LF); 211 } 212 213 /** 214 * Callback for footnote start syntax 215 * 216 * All following content will go to the footnote instead of 217 * the document. To achieve this the previous rendered content 218 * is moved to $store and $doc is cleared 219 * 220 * @author Andreas Gohr <andi@splitbrain.org> 221 */ 222 public function footnote_open() 223 { 224 if ($this->capture) { 225 // move current content to store 226 // this is required to ensure safe behaviour of plugins accessed within footnotes 227 $this->store = $this->doc; 228 $this->doc = ''; 229 230 // disable capturing 231 $this->capturing = false; 232 } 233 } 234 235 /** 236 * Callback for footnote end syntax 237 * 238 * All content rendered whilst within footnote syntax mode is discarded, 239 * the previously rendered content is restored and capturing is re-enabled. 240 * 241 * @author Andreas Gohr 242 */ 243 public function footnote_close() 244 { 245 if ($this->capture) { 246 // re-enable capturing 247 $this->capturing = true; 248 // restore previously rendered content 249 $this->doc = $this->store; 250 $this->store = ''; 251 } 252 } 253 254 /** 255 * Open an unordered list 256 */ 257 public function listu_open() 258 { 259 $this->cdata(DOKU_LF); 260 } 261 262 /** 263 * Open an ordered list 264 * 265 * @param string|string[]|null $classes Optional CSS classes (ignored by metadata) 266 * @param int $start Starting number (ignored by metadata) 267 */ 268 public function listo_open($classes = null, $start = 1) 269 { 270 $this->cdata(DOKU_LF); 271 } 272 273 /** 274 * Open a list item 275 * 276 * @param int $level the nesting level 277 * @param bool $node true when a node; false when a leaf 278 */ 279 public function listitem_open($level, $node = false) 280 { 281 $this->cdata(str_repeat(DOKU_TAB, $level) . '* '); 282 } 283 284 /** 285 * Close a list item 286 */ 287 public function listitem_close() 288 { 289 $this->cdata(DOKU_LF); 290 } 291 292 /** 293 * Output preformatted text 294 * 295 * @param string $text 296 */ 297 public function preformatted($text) 298 { 299 $this->cdata($text); 300 } 301 302 /** 303 * Start a block quote 304 */ 305 public function quote_open() 306 { 307 $this->cdata(DOKU_LF . DOKU_TAB . '"'); 308 } 309 310 /** 311 * Stop a block quote 312 */ 313 public function quote_close() 314 { 315 $this->cdata('"' . DOKU_LF); 316 } 317 318 /** 319 * Display text as file content, optionally syntax highlighted 320 * 321 * @param string $text text to show 322 * @param string $lang programming language to use for syntax highlighting 323 * @param string $file file path label 324 */ 325 public function file($text, $lang = null, $file = null) 326 { 327 $this->cdata(DOKU_LF . $text . DOKU_LF); 328 } 329 330 /** 331 * Display text as code content, optionally syntax highlighted 332 * 333 * @param string $text text to show 334 * @param string $language programming language to use for syntax highlighting 335 * @param string $file file path label 336 */ 337 public function code($text, $language = null, $file = null) 338 { 339 $this->cdata(DOKU_LF . $text . DOKU_LF); 340 } 341 342 /** 343 * Format an acronym 344 * 345 * Uses $this->acronyms 346 * 347 * @param string $acronym 348 */ 349 public function acronym($acronym) 350 { 351 $this->cdata($acronym); 352 } 353 354 /** 355 * Format a smiley 356 * 357 * Uses $this->smiley 358 * 359 * @param string $smiley 360 */ 361 public function smiley($smiley) 362 { 363 $this->cdata($smiley); 364 } 365 366 /** 367 * Format an entity 368 * 369 * Entities are basically small text replacements 370 * 371 * Uses $this->entities 372 * 373 * @param string $entity 374 */ 375 public function entity($entity) 376 { 377 $this->cdata($entity); 378 } 379 380 /** 381 * Typographically format a multiply sign 382 * 383 * Example: ($x=640, $y=480) should result in "640×480" 384 * 385 * @param string|int $x first value 386 * @param string|int $y second value 387 */ 388 public function multiplyentity($x, $y) 389 { 390 $this->cdata($x . '×' . $y); 391 } 392 393 /** 394 * Render an opening single quote char (language specific) 395 */ 396 public function singlequoteopening() 397 { 398 global $lang; 399 $this->cdata($lang['singlequoteopening']); 400 } 401 402 /** 403 * Render a closing single quote char (language specific) 404 */ 405 public function singlequoteclosing() 406 { 407 global $lang; 408 $this->cdata($lang['singlequoteclosing']); 409 } 410 411 /** 412 * Render an apostrophe char (language specific) 413 */ 414 public function apostrophe() 415 { 416 global $lang; 417 $this->cdata($lang['apostrophe']); 418 } 419 420 /** 421 * Render an opening double quote char (language specific) 422 */ 423 public function doublequoteopening() 424 { 425 global $lang; 426 $this->cdata($lang['doublequoteopening']); 427 } 428 429 /** 430 * Render an closinging double quote char (language specific) 431 */ 432 public function doublequoteclosing() 433 { 434 global $lang; 435 $this->cdata($lang['doublequoteclosing']); 436 } 437 438 /** 439 * Render a CamelCase link 440 * 441 * @param string $link The link name 442 * @see http://en.wikipedia.org/wiki/CamelCase 443 */ 444 public function camelcaselink($link) 445 { 446 $this->internallink($link, $link); 447 } 448 449 /** 450 * Render a page local link 451 * 452 * @param string $hash hash link identifier 453 * @param string $name name for the link 454 */ 455 public function locallink($hash, $name = null) 456 { 457 if (is_array($name)) { 458 $this->_firstimage($name['src']); 459 if ($name['type'] == 'internalmedia') { 460 $this->_recordMediaUsage($name['src']); 461 } 462 } 463 } 464 465 /** 466 * keep track of internal links in $this->meta['relation']['references'] 467 * 468 * @param string $id page ID to link to. eg. 'wiki:syntax' 469 * @param string|array|null $name name for the link, array for media file 470 */ 471 public function internallink($id, $name = null) 472 { 473 global $ID; 474 475 if (is_array($name)) { 476 $this->_firstimage($name['src']); 477 if ($name['type'] == 'internalmedia') { 478 $this->_recordMediaUsage($name['src']); 479 } 480 } 481 482 $parts = explode('?', $id, 2); 483 if (count($parts) === 2) { 484 $id = $parts[0]; 485 } 486 487 $default = $this->_simpleTitle($id); 488 489 // first resolve and clean up the $id 490 $resolver = new PageResolver($ID); 491 $id = $resolver->resolveId($id); 492 [$page] = sexplode('#', $id, 2); 493 494 // set metadata 495 $this->meta['relation']['references'][$page] = page_exists($page); 496 // $data = array('relation' => array('isreferencedby' => array($ID => true))); 497 // p_set_metadata($id, $data); 498 499 // add link title to summary 500 if ($this->capture) { 501 $name = $this->_getLinkTitle($name, $default, $id); 502 $this->doc .= $name; 503 } 504 } 505 506 /** 507 * Render an external link 508 * 509 * @param string $url full URL with scheme 510 * @param string|array|null $name name for the link, array for media file 511 */ 512 public function externallink($url, $name = null) 513 { 514 if (is_array($name)) { 515 $this->_firstimage($name['src']); 516 if ($name['type'] == 'internalmedia') { 517 $this->_recordMediaUsage($name['src']); 518 } 519 } 520 521 if ($this->capture) { 522 $this->doc .= $this->_getLinkTitle($name, '<' . $url . '>'); 523 } 524 } 525 526 /** 527 * Render an interwiki link 528 * 529 * You may want to use $this->_resolveInterWiki() here 530 * 531 * @param string $match original link - probably not much use 532 * @param string|array $name name for the link, array for media file 533 * @param string $wikiName indentifier (shortcut) for the remote wiki 534 * @param string $wikiUri the fragment parsed from the original link 535 */ 536 public function interwikilink($match, $name, $wikiName, $wikiUri) 537 { 538 if (is_array($name)) { 539 $this->_firstimage($name['src']); 540 if ($name['type'] == 'internalmedia') { 541 $this->_recordMediaUsage($name['src']); 542 } 543 } 544 545 if ($this->capture) { 546 [$wikiUri] = explode('#', $wikiUri, 2); 547 $name = $this->_getLinkTitle($name, $wikiUri); 548 $this->doc .= $name; 549 } 550 } 551 552 /** 553 * Link to windows share 554 * 555 * @param string $url the link 556 * @param string|array $name name for the link, array for media file 557 */ 558 public function windowssharelink($url, $name = null) 559 { 560 if (is_array($name)) { 561 $this->_firstimage($name['src']); 562 if ($name['type'] == 'internalmedia') { 563 $this->_recordMediaUsage($name['src']); 564 } 565 } 566 567 if ($this->capture) { 568 if ($name) { 569 $this->doc .= $name; 570 } else { 571 $this->doc .= '<' . $url . '>'; 572 } 573 } 574 } 575 576 /** 577 * Render a linked E-Mail Address 578 * 579 * Should honor $conf['mailguard'] setting 580 * 581 * @param string $address Email-Address 582 * @param string|array $name name for the link, array for media file 583 */ 584 public function emaillink($address, $name = null) 585 { 586 if (is_array($name)) { 587 $this->_firstimage($name['src']); 588 if ($name['type'] == 'internalmedia') { 589 $this->_recordMediaUsage($name['src']); 590 } 591 } 592 593 if ($this->capture) { 594 if ($name) { 595 $this->doc .= $name; 596 } else { 597 $this->doc .= '<' . $address . '>'; 598 } 599 } 600 } 601 602 /** 603 * Render an internal media file 604 * 605 * @param string $src media ID 606 * @param string $title descriptive text 607 * @param string $align left|center|right 608 * @param int $width width of media in pixel 609 * @param int $height height of media in pixel 610 * @param string $cache cache|recache|nocache 611 * @param string $linking linkonly|detail|nolink 612 */ 613 public function internalmedia( 614 $src, 615 $title = null, 616 $align = null, 617 $width = null, 618 $height = null, 619 $cache = null, 620 $linking = null 621 ) { 622 if ($this->capture && $title) { 623 $this->doc .= '[' . $title . ']'; 624 } 625 $this->_firstimage($src); 626 $this->_recordMediaUsage($src); 627 } 628 629 /** 630 * Render an external media file 631 * 632 * @param string $src full media URL 633 * @param string $title descriptive text 634 * @param string $align left|center|right 635 * @param int $width width of media in pixel 636 * @param int $height height of media in pixel 637 * @param string $cache cache|recache|nocache 638 * @param string $linking linkonly|detail|nolink 639 */ 640 public function externalmedia( 641 $src, 642 $title = null, 643 $align = null, 644 $width = null, 645 $height = null, 646 $cache = null, 647 $linking = null 648 ) { 649 if ($this->capture && $title) { 650 $this->doc .= '[' . $title . ']'; 651 } 652 $this->_firstimage($src); 653 } 654 655 /** 656 * Render the output of an RSS feed 657 * 658 * @param string $url URL of the feed 659 * @param array $params Finetuning of the output 660 */ 661 public function rss($url, $params) 662 { 663 $this->meta['relation']['haspart'][$url] = true; 664 665 $this->meta['date']['valid']['age'] = 666 isset($this->meta['date']['valid']['age']) ? 667 min($this->meta['date']['valid']['age'], $params['refresh']) : 668 $params['refresh']; 669 } 670 671 #region Utils 672 673 /** 674 * Removes any Namespace from the given name but keeps 675 * casing and special chars 676 * 677 * @param string $name 678 * 679 * @return mixed|string 680 * @author Andreas Gohr <andi@splitbrain.org> 681 * 682 */ 683 public function _simpleTitle($name) 684 { 685 global $conf; 686 687 if (is_array($name)) { 688 return ''; 689 } 690 691 if ($conf['useslash']) { 692 $nssep = '[:;/]'; 693 } else { 694 $nssep = '[:;]'; 695 } 696 $name = preg_replace('!.*' . $nssep . '!', '', $name); 697 //if there is a hash we use the anchor name only 698 $name = preg_replace('!.*#!', '', $name); 699 return $name; 700 } 701 702 /** 703 * Construct a title and handle images in titles 704 * 705 * @param string|array|null $title either string title or media array 706 * @param string $default default title if nothing else is found 707 * @param null|string $id linked page id (used to extract title from first heading) 708 * @return string title text 709 * @author Harry Fuecks <hfuecks@gmail.com> 710 */ 711 public function _getLinkTitle($title, $default, $id = null) 712 { 713 if (is_array($title)) { 714 if ($title['title']) { 715 return '[' . $title['title'] . ']'; 716 } else { 717 return $default; 718 } 719 } elseif (is_null($title) || trim($title) == '') { 720 if (useHeading('content') && $id) { 721 $heading = p_get_first_heading($id, METADATA_DONT_RENDER); 722 if ($heading) { 723 return $heading; 724 } 725 } 726 return $default; 727 } else { 728 return $title; 729 } 730 } 731 732 /** 733 * Remember first image 734 * 735 * @param string $src image URL or ID 736 */ 737 protected function _firstimage($src) 738 { 739 global $ID; 740 741 if ($this->firstimage) { 742 return; 743 } 744 745 [$src] = explode('#', $src, 2); 746 if (!media_isexternal($src)) { 747 $src = (new MediaResolver($ID))->resolveId($src); 748 } 749 if (preg_match('/.(jpe?g|gif|png|webp|svg)$/i', $src)) { 750 $this->firstimage = $src; 751 } 752 } 753 754 /** 755 * Store list of used media files in metadata 756 * 757 * @param string $src media ID 758 */ 759 protected function _recordMediaUsage($src) 760 { 761 global $ID; 762 763 [$src] = explode('#', $src, 2); 764 if (media_isexternal($src)) { 765 return; 766 } 767 $src = (new MediaResolver($ID))->resolveId($src); 768 $file = mediaFN($src); 769 $this->meta['relation']['media'][$src] = file_exists($file); 770 } 771 772 #endregion 773} 774 775//Setup VIM: ex: et ts=4 : 776