1 <?php
2 
3 use dokuwiki\File\MediaResolver;
4 use dokuwiki\File\PageResolver;
5 use dokuwiki\Utf8\PhpString;
6 
7 /**
8  * The MetaData Renderer
9  *
10  * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content
11  * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and
12  * $persistent.
13  *
14  * Some simplified rendering to $doc is done to gather the page's (text-only) abstract.
15  *
16  * @author Esther Brunner <wikidesign@gmail.com>
17  */
18 class Doku_Renderer_metadata extends Doku_Renderer
19 {
20     /** the approximate byte lenght to capture for the abstract */
21     public const ABSTRACT_LEN = 250;
22 
23     /** the maximum UTF8 character length for the abstract */
24     public const ABSTRACT_MAX = 500;
25 
26     /** @var array transient meta data, will be reset on each rendering */
27     public $meta = [];
28 
29     /** @var array persistent meta data, will be kept until explicitly deleted */
30     public $persistent = [];
31 
32     /** @var array the list of headers used to create unique link ids */
33     protected $headers = [];
34 
35     /** @var string temporary $doc store */
36     protected $store = '';
37 
38     /** @var string keeps the first image reference */
39     protected $firstimage = '';
40 
41     /** @var bool whether or not data is being captured for the abstract, public to be accessible by plugins */
42     public $capturing = true;
43 
44     /** @var bool determines if enough data for the abstract was collected, yet */
45     public $capture = true;
46 
47     /** @var int number of bytes captured for abstract */
48     protected $captured = 0;
49 
50     /**
51      * Returns the format produced by this renderer.
52      *
53      * @return string always 'metadata'
54      */
55     public function getFormat()
56     {
57         return 'metadata';
58     }
59 
60     /**
61      * Initialize the document
62      *
63      * Sets up some of the persistent info about the page if it doesn't exist, yet.
64      */
65     public function document_start()
66     {
67         global $ID;
68 
69         $this->headers = [];
70 
71         // external pages are missing create date
72         if (!isset($this->persistent['date']['created']) || !$this->persistent['date']['created']) {
73             $this->persistent['date']['created'] = filectime(wikiFN($ID));
74         }
75         if (!isset($this->persistent['user'])) {
76             $this->persistent['user'] = '';
77         }
78         if (!isset($this->persistent['creator'])) {
79             $this->persistent['creator'] = '';
80         }
81         // reset metadata to persistent values
82         $this->meta = $this->persistent;
83     }
84 
85     /**
86      * Finalize the document
87      *
88      * Stores collected data in the metadata
89      */
90     public function document_end()
91     {
92         global $ID;
93 
94         // store internal info in metadata (notoc,nocache)
95         $this->meta['internal'] = $this->info;
96 
97         if (!isset($this->meta['description']['abstract'])) {
98             // cut off too long abstracts
99             $this->doc = trim($this->doc);
100             if (strlen($this->doc) > self::ABSTRACT_MAX) {
101                 $this->doc = PhpString::substr($this->doc, 0, self::ABSTRACT_MAX) . '…';
102             }
103             $this->meta['description']['abstract'] = $this->doc;
104         }
105 
106         $this->meta['relation']['firstimage'] = $this->firstimage;
107 
108         if (!isset($this->meta['date']['modified'])) {
109             $this->meta['date']['modified'] = filemtime(wikiFN($ID));
110         }
111 
112         $this->doc = '';
113     }
114 
115     /**
116      * Render plain text data
117      *
118      * This function takes care of the amount captured data and will stop capturing when
119      * enough abstract data is available
120      *
121      * @param $text
122      */
123     public function cdata($text)
124     {
125         if (!$this->capture || !$this->capturing) {
126             return;
127         }
128 
129         $this->doc .= $text;
130 
131         $this->captured += strlen($text);
132         if ($this->captured > self::ABSTRACT_LEN) {
133             $this->capture = false;
134         }
135     }
136 
137     /**
138      * Add an item to the TOC
139      *
140      * @param string $id the hash link
141      * @param string $text the text to display
142      * @param int $level the nesting level
143      */
144     public function toc_additem($id, $text, $level)
145     {
146         global $conf;
147 
148         //only add items within configured levels
149         if ($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) {
150             // the TOC is one of our standard ul list arrays ;-)
151             $this->meta['description']['tableofcontents'][] = [
152                 'hid' => $id,
153                 'title' => $text,
154                 'type' => 'ul',
155                 'level' => $level - $conf['toptoclevel'] + 1
156             ];
157         }
158     }
159 
160     /**
161      * Render a heading
162      *
163      * @param string $text the text to display
164      * @param int $level header level
165      * @param int $pos byte position in the original source
166      */
167     public function header($text, $level, $pos)
168     {
169         if (!isset($this->meta['title'])) {
170             $this->meta['title'] = $text;
171         }
172 
173         // add the header to the TOC
174         $hid = $this->_headerToLink($text, true);
175         $this->toc_additem($hid, $text, $level);
176 
177         // add to summary
178         $this->cdata(DOKU_LF . $text . DOKU_LF);
179     }
180 
181     /**
182      * Open a paragraph
183      */
184     public function p_open()
185     {
186         $this->cdata(DOKU_LF);
187     }
188 
189     /**
190      * Close a paragraph
191      */
192     public function p_close()
193     {
194         $this->cdata(DOKU_LF);
195     }
196 
197     /**
198      * Create a line break
199      */
200     public function linebreak()
201     {
202         $this->cdata(DOKU_LF);
203     }
204 
205     /**
206      * Create a horizontal line
207      */
208     public function hr()
209     {
210         $this->cdata(DOKU_LF . '----------' . DOKU_LF);
211     }
212 
213     /**
214      * Callback for footnote start syntax
215      *
216      * All following content will go to the footnote instead of
217      * the document. To achieve this the previous rendered content
218      * is moved to $store and $doc is cleared
219      *
220      * @author Andreas Gohr <andi@splitbrain.org>
221      */
222     public function footnote_open()
223     {
224         if ($this->capture) {
225             // move current content to store
226             // this is required to ensure safe behaviour of plugins accessed within footnotes
227             $this->store = $this->doc;
228             $this->doc = '';
229 
230             // disable capturing
231             $this->capturing = false;
232         }
233     }
234 
235     /**
236      * Callback for footnote end syntax
237      *
238      * All content rendered whilst within footnote syntax mode is discarded,
239      * the previously rendered content is restored and capturing is re-enabled.
240      *
241      * @author Andreas Gohr
242      */
243     public function footnote_close()
244     {
245         if ($this->capture) {
246             // re-enable capturing
247             $this->capturing = true;
248             // restore previously rendered content
249             $this->doc = $this->store;
250             $this->store = '';
251         }
252     }
253 
254     /**
255      * Open an unordered list
256      */
257     public function listu_open()
258     {
259         $this->cdata(DOKU_LF);
260     }
261 
262     /**
263      * Open an ordered list
264      */
265     public function listo_open()
266     {
267         $this->cdata(DOKU_LF);
268     }
269 
270     /**
271      * Open a list item
272      *
273      * @param int $level the nesting level
274      * @param bool $node true when a node; false when a leaf
275      */
276     public function listitem_open($level, $node = false)
277     {
278         $this->cdata(str_repeat(DOKU_TAB, $level) . '* ');
279     }
280 
281     /**
282      * Close a list item
283      */
284     public function listitem_close()
285     {
286         $this->cdata(DOKU_LF);
287     }
288 
289     /**
290      * Output preformatted text
291      *
292      * @param string $text
293      */
294     public function preformatted($text)
295     {
296         $this->cdata($text);
297     }
298 
299     /**
300      * Start a block quote
301      */
302     public function quote_open()
303     {
304         $this->cdata(DOKU_LF . DOKU_TAB . '"');
305     }
306 
307     /**
308      * Stop a block quote
309      */
310     public function quote_close()
311     {
312         $this->cdata('"' . DOKU_LF);
313     }
314 
315     /**
316      * Display text as file content, optionally syntax highlighted
317      *
318      * @param string $text text to show
319      * @param string $lang programming language to use for syntax highlighting
320      * @param string $file file path label
321      */
322     public function file($text, $lang = null, $file = null)
323     {
324         $this->cdata(DOKU_LF . $text . DOKU_LF);
325     }
326 
327     /**
328      * Display text as code content, optionally syntax highlighted
329      *
330      * @param string $text text to show
331      * @param string $language programming language to use for syntax highlighting
332      * @param string $file file path label
333      */
334     public function code($text, $language = null, $file = null)
335     {
336         $this->cdata(DOKU_LF . $text . DOKU_LF);
337     }
338 
339     /**
340      * Format an acronym
341      *
342      * Uses $this->acronyms
343      *
344      * @param string $acronym
345      */
346     public function acronym($acronym)
347     {
348         $this->cdata($acronym);
349     }
350 
351     /**
352      * Format a smiley
353      *
354      * Uses $this->smiley
355      *
356      * @param string $smiley
357      */
358     public function smiley($smiley)
359     {
360         $this->cdata($smiley);
361     }
362 
363     /**
364      * Format an entity
365      *
366      * Entities are basically small text replacements
367      *
368      * Uses $this->entities
369      *
370      * @param string $entity
371      */
372     public function entity($entity)
373     {
374         $this->cdata($entity);
375     }
376 
377     /**
378      * Typographically format a multiply sign
379      *
380      * Example: ($x=640, $y=480) should result in "640×480"
381      *
382      * @param string|int $x first value
383      * @param string|int $y second value
384      */
385     public function multiplyentity($x, $y)
386     {
387         $this->cdata($x . '×' . $y);
388     }
389 
390     /**
391      * Render an opening single quote char (language specific)
392      */
393     public function singlequoteopening()
394     {
395         global $lang;
396         $this->cdata($lang['singlequoteopening']);
397     }
398 
399     /**
400      * Render a closing single quote char (language specific)
401      */
402     public function singlequoteclosing()
403     {
404         global $lang;
405         $this->cdata($lang['singlequoteclosing']);
406     }
407 
408     /**
409      * Render an apostrophe char (language specific)
410      */
411     public function apostrophe()
412     {
413         global $lang;
414         $this->cdata($lang['apostrophe']);
415     }
416 
417     /**
418      * Render an opening double quote char (language specific)
419      */
420     public function doublequoteopening()
421     {
422         global $lang;
423         $this->cdata($lang['doublequoteopening']);
424     }
425 
426     /**
427      * Render an closinging double quote char (language specific)
428      */
429     public function doublequoteclosing()
430     {
431         global $lang;
432         $this->cdata($lang['doublequoteclosing']);
433     }
434 
435     /**
436      * Render a CamelCase link
437      *
438      * @param string $link The link name
439      * @see http://en.wikipedia.org/wiki/CamelCase
440      */
441     public function camelcaselink($link)
442     {
443         $this->internallink($link, $link);
444     }
445 
446     /**
447      * Render a page local link
448      *
449      * @param string $hash hash link identifier
450      * @param string $name name for the link
451      */
452     public function locallink($hash, $name = null)
453     {
454         if (is_array($name)) {
455             $this->_firstimage($name['src']);
456             if ($name['type'] == 'internalmedia') {
457                 $this->_recordMediaUsage($name['src']);
458             }
459         }
460     }
461 
462     /**
463      * keep track of internal links in $this->meta['relation']['references']
464      *
465      * @param string $id page ID to link to. eg. 'wiki:syntax'
466      * @param string|array|null $name name for the link, array for media file
467      */
468     public function internallink($id, $name = null)
469     {
470         global $ID;
471 
472         if (is_array($name)) {
473             $this->_firstimage($name['src']);
474             if ($name['type'] == 'internalmedia') {
475                 $this->_recordMediaUsage($name['src']);
476             }
477         }
478 
479         $parts = explode('?', $id, 2);
480         if (count($parts) === 2) {
481             $id = $parts[0];
482         }
483 
484         $default = $this->_simpleTitle($id);
485 
486         // first resolve and clean up the $id
487         $resolver = new PageResolver($ID);
488         $id = $resolver->resolveId($id);
489         [$page] = sexplode('#', $id, 2);
490 
491         // set metadata
492         $this->meta['relation']['references'][$page] = page_exists($page);
493         // $data = array('relation' => array('isreferencedby' => array($ID => true)));
494         // p_set_metadata($id, $data);
495 
496         // add link title to summary
497         if ($this->capture) {
498             $name = $this->_getLinkTitle($name, $default, $id);
499             $this->doc .= $name;
500         }
501     }
502 
503     /**
504      * Render an external link
505      *
506      * @param string $url full URL with scheme
507      * @param string|array|null $name name for the link, array for media file
508      */
509     public function externallink($url, $name = null)
510     {
511         if (is_array($name)) {
512             $this->_firstimage($name['src']);
513             if ($name['type'] == 'internalmedia') {
514                 $this->_recordMediaUsage($name['src']);
515             }
516         }
517 
518         if ($this->capture) {
519             $this->doc .= $this->_getLinkTitle($name, '<' . $url . '>');
520         }
521     }
522 
523     /**
524      * Render an interwiki link
525      *
526      * You may want to use $this->_resolveInterWiki() here
527      *
528      * @param string $match original link - probably not much use
529      * @param string|array $name name for the link, array for media file
530      * @param string $wikiName indentifier (shortcut) for the remote wiki
531      * @param string $wikiUri the fragment parsed from the original link
532      */
533     public function interwikilink($match, $name, $wikiName, $wikiUri)
534     {
535         if (is_array($name)) {
536             $this->_firstimage($name['src']);
537             if ($name['type'] == 'internalmedia') {
538                 $this->_recordMediaUsage($name['src']);
539             }
540         }
541 
542         if ($this->capture) {
543             [$wikiUri] = explode('#', $wikiUri, 2);
544             $name = $this->_getLinkTitle($name, $wikiUri);
545             $this->doc .= $name;
546         }
547     }
548 
549     /**
550      * Link to windows share
551      *
552      * @param string $url the link
553      * @param string|array $name name for the link, array for media file
554      */
555     public function windowssharelink($url, $name = null)
556     {
557         if (is_array($name)) {
558             $this->_firstimage($name['src']);
559             if ($name['type'] == 'internalmedia') {
560                 $this->_recordMediaUsage($name['src']);
561             }
562         }
563 
564         if ($this->capture) {
565             if ($name) {
566                 $this->doc .= $name;
567             } else {
568                 $this->doc .= '<' . $url . '>';
569             }
570         }
571     }
572 
573     /**
574      * Render a linked E-Mail Address
575      *
576      * Should honor $conf['mailguard'] setting
577      *
578      * @param string $address Email-Address
579      * @param string|array $name name for the link, array for media file
580      */
581     public function emaillink($address, $name = null)
582     {
583         if (is_array($name)) {
584             $this->_firstimage($name['src']);
585             if ($name['type'] == 'internalmedia') {
586                 $this->_recordMediaUsage($name['src']);
587             }
588         }
589 
590         if ($this->capture) {
591             if ($name) {
592                 $this->doc .= $name;
593             } else {
594                 $this->doc .= '<' . $address . '>';
595             }
596         }
597     }
598 
599     /**
600      * Render an internal media file
601      *
602      * @param string $src media ID
603      * @param string $title descriptive text
604      * @param string $align left|center|right
605      * @param int $width width of media in pixel
606      * @param int $height height of media in pixel
607      * @param string $cache cache|recache|nocache
608      * @param string $linking linkonly|detail|nolink
609      */
610     public function internalmedia(
611         $src,
612         $title = null,
613         $align = null,
614         $width = null,
615         $height = null,
616         $cache = null,
617         $linking = null
618     ) {
619         if ($this->capture && $title) {
620             $this->doc .= '[' . $title . ']';
621         }
622         $this->_firstimage($src);
623         $this->_recordMediaUsage($src);
624     }
625 
626     /**
627      * Render an external media file
628      *
629      * @param string $src full media URL
630      * @param string $title descriptive text
631      * @param string $align left|center|right
632      * @param int $width width of media in pixel
633      * @param int $height height of media in pixel
634      * @param string $cache cache|recache|nocache
635      * @param string $linking linkonly|detail|nolink
636      */
637     public function externalmedia(
638         $src,
639         $title = null,
640         $align = null,
641         $width = null,
642         $height = null,
643         $cache = null,
644         $linking = null
645     ) {
646         if ($this->capture && $title) {
647             $this->doc .= '[' . $title . ']';
648         }
649         $this->_firstimage($src);
650     }
651 
652     /**
653      * Render the output of an RSS feed
654      *
655      * @param string $url URL of the feed
656      * @param array $params Finetuning of the output
657      */
658     public function rss($url, $params)
659     {
660         $this->meta['relation']['haspart'][$url] = true;
661 
662         $this->meta['date']['valid']['age'] =
663             isset($this->meta['date']['valid']['age']) ?
664                 min($this->meta['date']['valid']['age'], $params['refresh']) :
665                 $params['refresh'];
666     }
667 
668     #region Utils
669 
670     /**
671      * Removes any Namespace from the given name but keeps
672      * casing and special chars
673      *
674      * @param string $name
675      *
676      * @return mixed|string
677      * @author Andreas Gohr <andi@splitbrain.org>
678      *
679      */
680     public function _simpleTitle($name)
681     {
682         global $conf;
683 
684         if (is_array($name)) {
685             return '';
686         }
687 
688         if ($conf['useslash']) {
689             $nssep = '[:;/]';
690         } else {
691             $nssep = '[:;]';
692         }
693         $name = preg_replace('!.*' . $nssep . '!', '', $name);
694         //if there is a hash we use the anchor name only
695         $name = preg_replace('!.*#!', '', $name);
696         return $name;
697     }
698 
699     /**
700      * Construct a title and handle images in titles
701      *
702      * @param string|array|null $title either string title or media array
703      * @param string $default default title if nothing else is found
704      * @param null|string $id linked page id (used to extract title from first heading)
705      * @return string title text
706      * @author Harry Fuecks <hfuecks@gmail.com>
707      */
708     public function _getLinkTitle($title, $default, $id = null)
709     {
710         if (is_array($title)) {
711             if ($title['title']) {
712                 return '[' . $title['title'] . ']';
713             } else {
714                 return $default;
715             }
716         } elseif (is_null($title) || trim($title) == '') {
717             if (useHeading('content') && $id) {
718                 $heading = p_get_first_heading($id, METADATA_DONT_RENDER);
719                 if ($heading) {
720                     return $heading;
721                 }
722             }
723             return $default;
724         } else {
725             return $title;
726         }
727     }
728 
729     /**
730      * Remember first image
731      *
732      * @param string $src image URL or ID
733      */
734     protected function _firstimage($src)
735     {
736         global $ID;
737 
738         if ($this->firstimage) {
739             return;
740         }
741 
742         [$src] = explode('#', $src, 2);
743         if (!media_isexternal($src)) {
744             $src = (new MediaResolver($ID))->resolveId($src);
745         }
746         if (preg_match('/.(jpe?g|gif|png|webp|svg)$/i', $src)) {
747             $this->firstimage = $src;
748         }
749     }
750 
751     /**
752      * Store list of used media files in metadata
753      *
754      * @param string $src media ID
755      */
756     protected function _recordMediaUsage($src)
757     {
758         global $ID;
759 
760         [$src] = explode('#', $src, 2);
761         if (media_isexternal($src)) {
762             return;
763         }
764         $src = (new MediaResolver($ID))->resolveId($src);
765         $file = mediaFN($src);
766         $this->meta['relation']['media'][$src] = file_exists($file);
767     }
768 
769     #endregion
770 }
771 
772 //Setup VIM: ex: et ts=4 :
773