1<?php
2
3use dokuwiki\File\MediaResolver;
4use dokuwiki\File\PageResolver;
5use dokuwiki\Utf8\PhpString;
6
7/**
8 * The MetaData Renderer
9 *
10 * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content
11 * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and
12 * $persistent.
13 *
14 * Some simplified rendering to $doc is done to gather the page's (text-only) abstract.
15 *
16 * @author Esther Brunner <wikidesign@gmail.com>
17 */
18class Doku_Renderer_metadata extends Doku_Renderer
19{
20    /** the approximate byte lenght to capture for the abstract */
21    public const ABSTRACT_LEN = 250;
22
23    /** the maximum UTF8 character length for the abstract */
24    public const ABSTRACT_MAX = 500;
25
26    /** @var array transient meta data, will be reset on each rendering */
27    public $meta = [];
28
29    /** @var array persistent meta data, will be kept until explicitly deleted */
30    public $persistent = [];
31
32    /** @var array the list of headers used to create unique link ids */
33    protected $headers = [];
34
35    /** @var string temporary $doc store */
36    protected $store = '';
37
38    /** @var string keeps the first image reference */
39    protected $firstimage = '';
40
41    /** @var bool whether or not data is being captured for the abstract, public to be accessible by plugins */
42    public $capturing = true;
43
44    /** @var bool determines if enough data for the abstract was collected, yet */
45    public $capture = true;
46
47    /** @var int number of bytes captured for abstract */
48    protected $captured = 0;
49
50    /**
51     * Returns the format produced by this renderer.
52     *
53     * @return string always 'metadata'
54     */
55    public function getFormat()
56    {
57        return 'metadata';
58    }
59
60    /**
61     * Initialize the document
62     *
63     * Sets up some of the persistent info about the page if it doesn't exist, yet.
64     */
65    public function document_start()
66    {
67        global $ID;
68
69        $this->headers = [];
70
71        // external pages are missing create date
72        if (!isset($this->persistent['date']['created']) || !$this->persistent['date']['created']) {
73            $this->persistent['date']['created'] = filectime(wikiFN($ID));
74        }
75        if (!isset($this->persistent['user'])) {
76            $this->persistent['user'] = '';
77        }
78        if (!isset($this->persistent['creator'])) {
79            $this->persistent['creator'] = '';
80        }
81        // reset metadata to persistent values
82        $this->meta = $this->persistent;
83    }
84
85    /**
86     * Finalize the document
87     *
88     * Stores collected data in the metadata
89     */
90    public function document_end()
91    {
92        global $ID;
93
94        // store internal info in metadata (notoc,nocache)
95        $this->meta['internal'] = $this->info;
96
97        if (!isset($this->meta['description']['abstract'])) {
98            // cut off too long abstracts
99            $this->doc = trim($this->doc);
100            if (strlen($this->doc) > self::ABSTRACT_MAX) {
101                $this->doc = PhpString::substr($this->doc, 0, self::ABSTRACT_MAX) . '…';
102            }
103            $this->meta['description']['abstract'] = $this->doc;
104        }
105
106        $this->meta['relation']['firstimage'] = $this->firstimage;
107
108        if (!isset($this->meta['date']['modified'])) {
109            $this->meta['date']['modified'] = filemtime(wikiFN($ID));
110        }
111    }
112
113    /**
114     * Render plain text data
115     *
116     * This function takes care of the amount captured data and will stop capturing when
117     * enough abstract data is available
118     *
119     * @param $text
120     */
121    public function cdata($text)
122    {
123        if (!$this->capture || !$this->capturing) {
124            return;
125        }
126
127        $this->doc .= $text;
128
129        $this->captured += strlen($text);
130        if ($this->captured > self::ABSTRACT_LEN) {
131            $this->capture = false;
132        }
133    }
134
135    /**
136     * Add an item to the TOC
137     *
138     * @param string $id the hash link
139     * @param string $text the text to display
140     * @param int $level the nesting level
141     */
142    public function toc_additem($id, $text, $level)
143    {
144        global $conf;
145
146        //only add items within configured levels
147        if ($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) {
148            // the TOC is one of our standard ul list arrays ;-)
149            $this->meta['description']['tableofcontents'][] = [
150                'hid' => $id,
151                'title' => $text,
152                'type' => 'ul',
153                'level' => $level - $conf['toptoclevel'] + 1
154            ];
155        }
156    }
157
158    /**
159     * Render a heading
160     *
161     * @param string $text the text to display
162     * @param int $level header level
163     * @param int $pos byte position in the original source
164     */
165    public function header($text, $level, $pos)
166    {
167        if (!isset($this->meta['title'])) {
168            $this->meta['title'] = $text;
169        }
170
171        // add the header to the TOC
172        $hid = $this->_headerToLink($text, true);
173        $this->toc_additem($hid, $text, $level);
174
175        // add to summary
176        $this->cdata(DOKU_LF . $text . DOKU_LF);
177    }
178
179    /**
180     * Open a paragraph
181     */
182    public function p_open()
183    {
184        $this->cdata(DOKU_LF);
185    }
186
187    /**
188     * Close a paragraph
189     */
190    public function p_close()
191    {
192        $this->cdata(DOKU_LF);
193    }
194
195    /**
196     * Create a line break
197     */
198    public function linebreak()
199    {
200        $this->cdata(DOKU_LF);
201    }
202
203    /**
204     * Create a horizontal line
205     */
206    public function hr()
207    {
208        $this->cdata(DOKU_LF . '----------' . DOKU_LF);
209    }
210
211    /**
212     * Callback for footnote start syntax
213     *
214     * All following content will go to the footnote instead of
215     * the document. To achieve this the previous rendered content
216     * is moved to $store and $doc is cleared
217     *
218     * @author Andreas Gohr <andi@splitbrain.org>
219     */
220    public function footnote_open()
221    {
222        if ($this->capture) {
223            // move current content to store
224            // this is required to ensure safe behaviour of plugins accessed within footnotes
225            $this->store = $this->doc;
226            $this->doc = '';
227
228            // disable capturing
229            $this->capturing = false;
230        }
231    }
232
233    /**
234     * Callback for footnote end syntax
235     *
236     * All content rendered whilst within footnote syntax mode is discarded,
237     * the previously rendered content is restored and capturing is re-enabled.
238     *
239     * @author Andreas Gohr
240     */
241    public function footnote_close()
242    {
243        if ($this->capture) {
244            // re-enable capturing
245            $this->capturing = true;
246            // restore previously rendered content
247            $this->doc = $this->store;
248            $this->store = '';
249        }
250    }
251
252    /**
253     * Open an unordered list
254     */
255    public function listu_open()
256    {
257        $this->cdata(DOKU_LF);
258    }
259
260    /**
261     * Open an ordered list
262     */
263    public function listo_open()
264    {
265        $this->cdata(DOKU_LF);
266    }
267
268    /**
269     * Open a list item
270     *
271     * @param int $level the nesting level
272     * @param bool $node true when a node; false when a leaf
273     */
274    public function listitem_open($level, $node = false)
275    {
276        $this->cdata(str_repeat(DOKU_TAB, $level) . '* ');
277    }
278
279    /**
280     * Close a list item
281     */
282    public function listitem_close()
283    {
284        $this->cdata(DOKU_LF);
285    }
286
287    /**
288     * Output preformatted text
289     *
290     * @param string $text
291     */
292    public function preformatted($text)
293    {
294        $this->cdata($text);
295    }
296
297    /**
298     * Start a block quote
299     */
300    public function quote_open()
301    {
302        $this->cdata(DOKU_LF . DOKU_TAB . '"');
303    }
304
305    /**
306     * Stop a block quote
307     */
308    public function quote_close()
309    {
310        $this->cdata('"' . DOKU_LF);
311    }
312
313    /**
314     * Display text as file content, optionally syntax highlighted
315     *
316     * @param string $text text to show
317     * @param string $lang programming language to use for syntax highlighting
318     * @param string $file file path label
319     */
320    public function file($text, $lang = null, $file = null)
321    {
322        $this->cdata(DOKU_LF . $text . DOKU_LF);
323    }
324
325    /**
326     * Display text as code content, optionally syntax highlighted
327     *
328     * @param string $text text to show
329     * @param string $language programming language to use for syntax highlighting
330     * @param string $file file path label
331     */
332    public function code($text, $language = null, $file = null)
333    {
334        $this->cdata(DOKU_LF . $text . DOKU_LF);
335    }
336
337    /**
338     * Format an acronym
339     *
340     * Uses $this->acronyms
341     *
342     * @param string $acronym
343     */
344    public function acronym($acronym)
345    {
346        $this->cdata($acronym);
347    }
348
349    /**
350     * Format a smiley
351     *
352     * Uses $this->smiley
353     *
354     * @param string $smiley
355     */
356    public function smiley($smiley)
357    {
358        $this->cdata($smiley);
359    }
360
361    /**
362     * Format an entity
363     *
364     * Entities are basically small text replacements
365     *
366     * Uses $this->entities
367     *
368     * @param string $entity
369     */
370    public function entity($entity)
371    {
372        $this->cdata($entity);
373    }
374
375    /**
376     * Typographically format a multiply sign
377     *
378     * Example: ($x=640, $y=480) should result in "640×480"
379     *
380     * @param string|int $x first value
381     * @param string|int $y second value
382     */
383    public function multiplyentity($x, $y)
384    {
385        $this->cdata($x . '×' . $y);
386    }
387
388    /**
389     * Render an opening single quote char (language specific)
390     */
391    public function singlequoteopening()
392    {
393        global $lang;
394        $this->cdata($lang['singlequoteopening']);
395    }
396
397    /**
398     * Render a closing single quote char (language specific)
399     */
400    public function singlequoteclosing()
401    {
402        global $lang;
403        $this->cdata($lang['singlequoteclosing']);
404    }
405
406    /**
407     * Render an apostrophe char (language specific)
408     */
409    public function apostrophe()
410    {
411        global $lang;
412        $this->cdata($lang['apostrophe']);
413    }
414
415    /**
416     * Render an opening double quote char (language specific)
417     */
418    public function doublequoteopening()
419    {
420        global $lang;
421        $this->cdata($lang['doublequoteopening']);
422    }
423
424    /**
425     * Render an closinging double quote char (language specific)
426     */
427    public function doublequoteclosing()
428    {
429        global $lang;
430        $this->cdata($lang['doublequoteclosing']);
431    }
432
433    /**
434     * Render a CamelCase link
435     *
436     * @param string $link The link name
437     * @see http://en.wikipedia.org/wiki/CamelCase
438     */
439    public function camelcaselink($link)
440    {
441        $this->internallink($link, $link);
442    }
443
444    /**
445     * Render a page local link
446     *
447     * @param string $hash hash link identifier
448     * @param string $name name for the link
449     */
450    public function locallink($hash, $name = null)
451    {
452        if (is_array($name)) {
453            $this->_firstimage($name['src']);
454            if ($name['type'] == 'internalmedia') {
455                $this->_recordMediaUsage($name['src']);
456            }
457        }
458    }
459
460    /**
461     * keep track of internal links in $this->meta['relation']['references']
462     *
463     * @param string $id page ID to link to. eg. 'wiki:syntax'
464     * @param string|array|null $name name for the link, array for media file
465     */
466    public function internallink($id, $name = null)
467    {
468        global $ID;
469
470        if (is_array($name)) {
471            $this->_firstimage($name['src']);
472            if ($name['type'] == 'internalmedia') {
473                $this->_recordMediaUsage($name['src']);
474            }
475        }
476
477        $parts = explode('?', $id, 2);
478        if (count($parts) === 2) {
479            $id = $parts[0];
480        }
481
482        $default = $this->_simpleTitle($id);
483
484        // first resolve and clean up the $id
485        $resolver = new PageResolver($ID);
486        $id = $resolver->resolveId($id);
487        [$page] = sexplode('#', $id, 2);
488
489        // set metadata
490        $this->meta['relation']['references'][$page] = page_exists($page);
491        // $data = array('relation' => array('isreferencedby' => array($ID => true)));
492        // p_set_metadata($id, $data);
493
494        // add link title to summary
495        if ($this->capture) {
496            $name = $this->_getLinkTitle($name, $default, $id);
497            $this->doc .= $name;
498        }
499    }
500
501    /**
502     * Render an external link
503     *
504     * @param string $url full URL with scheme
505     * @param string|array|null $name name for the link, array for media file
506     */
507    public function externallink($url, $name = null)
508    {
509        if (is_array($name)) {
510            $this->_firstimage($name['src']);
511            if ($name['type'] == 'internalmedia') {
512                $this->_recordMediaUsage($name['src']);
513            }
514        }
515
516        if ($this->capture) {
517            $this->doc .= $this->_getLinkTitle($name, '<' . $url . '>');
518        }
519    }
520
521    /**
522     * Render an interwiki link
523     *
524     * You may want to use $this->_resolveInterWiki() here
525     *
526     * @param string $match original link - probably not much use
527     * @param string|array $name name for the link, array for media file
528     * @param string $wikiName indentifier (shortcut) for the remote wiki
529     * @param string $wikiUri the fragment parsed from the original link
530     */
531    public function interwikilink($match, $name, $wikiName, $wikiUri)
532    {
533        if (is_array($name)) {
534            $this->_firstimage($name['src']);
535            if ($name['type'] == 'internalmedia') {
536                $this->_recordMediaUsage($name['src']);
537            }
538        }
539
540        if ($this->capture) {
541            [$wikiUri] = explode('#', $wikiUri, 2);
542            $name = $this->_getLinkTitle($name, $wikiUri);
543            $this->doc .= $name;
544        }
545    }
546
547    /**
548     * Link to windows share
549     *
550     * @param string $url the link
551     * @param string|array $name name for the link, array for media file
552     */
553    public function windowssharelink($url, $name = null)
554    {
555        if (is_array($name)) {
556            $this->_firstimage($name['src']);
557            if ($name['type'] == 'internalmedia') {
558                $this->_recordMediaUsage($name['src']);
559            }
560        }
561
562        if ($this->capture) {
563            if ($name) {
564                $this->doc .= $name;
565            } else {
566                $this->doc .= '<' . $url . '>';
567            }
568        }
569    }
570
571    /**
572     * Render a linked E-Mail Address
573     *
574     * Should honor $conf['mailguard'] setting
575     *
576     * @param string $address Email-Address
577     * @param string|array $name name for the link, array for media file
578     */
579    public function emaillink($address, $name = null)
580    {
581        if (is_array($name)) {
582            $this->_firstimage($name['src']);
583            if ($name['type'] == 'internalmedia') {
584                $this->_recordMediaUsage($name['src']);
585            }
586        }
587
588        if ($this->capture) {
589            if ($name) {
590                $this->doc .= $name;
591            } else {
592                $this->doc .= '<' . $address . '>';
593            }
594        }
595    }
596
597    /**
598     * Render an internal media file
599     *
600     * @param string $src media ID
601     * @param string $title descriptive text
602     * @param string $align left|center|right
603     * @param int $width width of media in pixel
604     * @param int $height height of media in pixel
605     * @param string $cache cache|recache|nocache
606     * @param string $linking linkonly|detail|nolink
607     */
608    public function internalmedia(
609        $src,
610        $title = null,
611        $align = null,
612        $width = null,
613        $height = null,
614        $cache = null,
615        $linking = null
616    ) {
617        if ($this->capture && $title) {
618            $this->doc .= '[' . $title . ']';
619        }
620        $this->_firstimage($src);
621        $this->_recordMediaUsage($src);
622    }
623
624    /**
625     * Render an external media file
626     *
627     * @param string $src full media URL
628     * @param string $title descriptive text
629     * @param string $align left|center|right
630     * @param int $width width of media in pixel
631     * @param int $height height of media in pixel
632     * @param string $cache cache|recache|nocache
633     * @param string $linking linkonly|detail|nolink
634     */
635    public function externalmedia(
636        $src,
637        $title = null,
638        $align = null,
639        $width = null,
640        $height = null,
641        $cache = null,
642        $linking = null
643    ) {
644        if ($this->capture && $title) {
645            $this->doc .= '[' . $title . ']';
646        }
647        $this->_firstimage($src);
648    }
649
650    /**
651     * Render the output of an RSS feed
652     *
653     * @param string $url URL of the feed
654     * @param array $params Finetuning of the output
655     */
656    public function rss($url, $params)
657    {
658        $this->meta['relation']['haspart'][$url] = true;
659
660        $this->meta['date']['valid']['age'] =
661            isset($this->meta['date']['valid']['age']) ?
662                min($this->meta['date']['valid']['age'], $params['refresh']) :
663                $params['refresh'];
664    }
665
666    #region Utils
667
668    /**
669     * Removes any Namespace from the given name but keeps
670     * casing and special chars
671     *
672     * @param string $name
673     *
674     * @return mixed|string
675     * @author Andreas Gohr <andi@splitbrain.org>
676     *
677     */
678    public function _simpleTitle($name)
679    {
680        global $conf;
681
682        if (is_array($name)) {
683            return '';
684        }
685
686        if ($conf['useslash']) {
687            $nssep = '[:;/]';
688        } else {
689            $nssep = '[:;]';
690        }
691        $name = preg_replace('!.*' . $nssep . '!', '', $name);
692        //if there is a hash we use the anchor name only
693        $name = preg_replace('!.*#!', '', $name);
694        return $name;
695    }
696
697    /**
698     * Construct a title and handle images in titles
699     *
700     * @param string|array|null $title either string title or media array
701     * @param string $default default title if nothing else is found
702     * @param null|string $id linked page id (used to extract title from first heading)
703     * @return string title text
704     * @author Harry Fuecks <hfuecks@gmail.com>
705     */
706    public function _getLinkTitle($title, $default, $id = null)
707    {
708        if (is_array($title)) {
709            if ($title['title']) {
710                return '[' . $title['title'] . ']';
711            } else {
712                return $default;
713            }
714        } elseif (is_null($title) || trim($title) == '') {
715            if (useHeading('content') && $id) {
716                $heading = p_get_first_heading($id, METADATA_DONT_RENDER);
717                if ($heading) {
718                    return $heading;
719                }
720            }
721            return $default;
722        } else {
723            return $title;
724        }
725    }
726
727    /**
728     * Remember first image
729     *
730     * @param string $src image URL or ID
731     */
732    protected function _firstimage($src)
733    {
734        global $ID;
735
736        if ($this->firstimage) {
737            return;
738        }
739
740        [$src] = explode('#', $src, 2);
741        if (!media_isexternal($src)) {
742            $src = (new MediaResolver($ID))->resolveId($src);
743        }
744        if (preg_match('/.(jpe?g|gif|png|webp|svg)$/i', $src)) {
745            $this->firstimage = $src;
746        }
747    }
748
749    /**
750     * Store list of used media files in metadata
751     *
752     * @param string $src media ID
753     */
754    protected function _recordMediaUsage($src)
755    {
756        global $ID;
757
758        [$src] = explode('#', $src, 2);
759        if (media_isexternal($src)) {
760            return;
761        }
762        $src = (new MediaResolver($ID))->resolveId($src);
763        $file = mediaFN($src);
764        $this->meta['relation']['media'][$src] = file_exists($file);
765    }
766
767    #endregion
768}
769
770//Setup VIM: ex: et ts=4 :
771