xref: /dokuwiki/inc/parser/metadata.php (revision bf6e4f0d2bea6ff572294f3280faef71d44e0917)
1<?php
2
3use dokuwiki\File\MediaResolver;
4use dokuwiki\File\PageResolver;
5use dokuwiki\Utf8\PhpString;
6
7/**
8 * The MetaData Renderer
9 *
10 * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content
11 * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and
12 * $persistent.
13 *
14 * Some simplified rendering to $doc is done to gather the page's (text-only) abstract.
15 *
16 * @author Esther Brunner <wikidesign@gmail.com>
17 */
18class Doku_Renderer_metadata extends Doku_Renderer
19{
20    /** the approximate byte lenght to capture for the abstract */
21    public const ABSTRACT_LEN = 250;
22
23    /** the maximum UTF8 character length for the abstract */
24    public const ABSTRACT_MAX = 500;
25
26    /** @var array transient meta data, will be reset on each rendering */
27    public $meta = [];
28
29    /** @var array persistent meta data, will be kept until explicitly deleted */
30    public $persistent = [];
31
32    /** @var array the list of headers used to create unique link ids */
33    protected $headers = [];
34
35    /** @var string temporary $doc store */
36    protected $store = '';
37
38    /** @var string keeps the first image reference */
39    protected $firstimage = '';
40
41    /** @var bool whether or not data is being captured for the abstract, public to be accessible by plugins */
42    public $capturing = true;
43
44    /** @var bool determines if enough data for the abstract was collected, yet */
45    public $capture = true;
46
47    /** @var int number of bytes captured for abstract */
48    protected $captured = 0;
49
50    /**
51     * Returns the format produced by this renderer.
52     *
53     * @return string always 'metadata'
54     */
55    public function getFormat()
56    {
57        return 'metadata';
58    }
59
60    /**
61     * Initialize the document
62     *
63     * Sets up some of the persistent info about the page if it doesn't exist, yet.
64     */
65    public function document_start()
66    {
67        global $ID;
68
69        $this->headers = [];
70
71        // external pages are missing create date
72        if (!isset($this->persistent['date']['created']) || !$this->persistent['date']['created']) {
73            $this->persistent['date']['created'] = filectime(wikiFN($ID));
74        }
75        if (!isset($this->persistent['user'])) {
76            $this->persistent['user'] = '';
77        }
78        if (!isset($this->persistent['creator'])) {
79            $this->persistent['creator'] = '';
80        }
81        // reset metadata to persistent values
82        $this->meta = $this->persistent;
83    }
84
85    /**
86     * Finalize the document
87     *
88     * Stores collected data in the metadata
89     */
90    public function document_end()
91    {
92        global $ID;
93
94        // store internal info in metadata (notoc,nocache)
95        $this->meta['internal'] = $this->info;
96
97        if (!isset($this->meta['description']['abstract'])) {
98            // cut off too long abstracts
99            $this->doc = trim($this->doc);
100            if (strlen($this->doc) > self::ABSTRACT_MAX) {
101                $this->doc = PhpString::substr($this->doc, 0, self::ABSTRACT_MAX) . '…';
102            }
103            $this->meta['description']['abstract'] = $this->doc;
104        }
105
106        $this->meta['relation']['firstimage'] = $this->firstimage;
107
108        if (!isset($this->meta['date']['modified'])) {
109            $this->meta['date']['modified'] = filemtime(wikiFN($ID));
110        }
111
112        $this->doc = '';
113    }
114
115    /**
116     * Render plain text data
117     *
118     * This function takes care of the amount captured data and will stop capturing when
119     * enough abstract data is available
120     *
121     * @param $text
122     */
123    public function cdata($text)
124    {
125        if (!$this->capture || !$this->capturing) {
126            return;
127        }
128
129        $this->doc .= $text;
130
131        $this->captured += strlen($text);
132        if ($this->captured > self::ABSTRACT_LEN) {
133            $this->capture = false;
134        }
135    }
136
137    /**
138     * Add an item to the TOC
139     *
140     * @param string $id the hash link
141     * @param string $text the text to display
142     * @param int $level the nesting level
143     */
144    public function toc_additem($id, $text, $level)
145    {
146        global $conf;
147
148        //only add items within configured levels
149        if ($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) {
150            // the TOC is one of our standard ul list arrays ;-)
151            $this->meta['description']['tableofcontents'][] = [
152                'hid' => $id,
153                'title' => $text,
154                'type' => 'ul',
155                'level' => $level - $conf['toptoclevel'] + 1
156            ];
157        }
158    }
159
160    /**
161     * Render a heading
162     *
163     * @param string $text the text to display
164     * @param int $level header level
165     * @param int $pos byte position in the original source
166     */
167    public function header($text, $level, $pos)
168    {
169        if (!isset($this->meta['title'])) {
170            $this->meta['title'] = $text;
171        }
172
173        // add the header to the TOC
174        $hid = $this->_headerToLink($text, true);
175        $this->toc_additem($hid, $text, $level);
176
177        // add to summary
178        $this->cdata(DOKU_LF . $text . DOKU_LF);
179    }
180
181    /**
182     * Open a paragraph
183     */
184    public function p_open()
185    {
186        $this->cdata(DOKU_LF);
187    }
188
189    /**
190     * Close a paragraph
191     */
192    public function p_close()
193    {
194        $this->cdata(DOKU_LF);
195    }
196
197    /**
198     * Create a line break
199     */
200    public function linebreak()
201    {
202        $this->cdata(DOKU_LF);
203    }
204
205    /**
206     * Create a horizontal line
207     */
208    public function hr()
209    {
210        $this->cdata(DOKU_LF . '----------' . DOKU_LF);
211    }
212
213    /**
214     * Callback for footnote start syntax
215     *
216     * All following content will go to the footnote instead of
217     * the document. To achieve this the previous rendered content
218     * is moved to $store and $doc is cleared
219     *
220     * @author Andreas Gohr <andi@splitbrain.org>
221     */
222    public function footnote_open()
223    {
224        if ($this->capture) {
225            // move current content to store
226            // this is required to ensure safe behaviour of plugins accessed within footnotes
227            $this->store = $this->doc;
228            $this->doc = '';
229
230            // disable capturing
231            $this->capturing = false;
232        }
233    }
234
235    /**
236     * Callback for footnote end syntax
237     *
238     * All content rendered whilst within footnote syntax mode is discarded,
239     * the previously rendered content is restored and capturing is re-enabled.
240     *
241     * @author Andreas Gohr
242     */
243    public function footnote_close()
244    {
245        if ($this->capture) {
246            // re-enable capturing
247            $this->capturing = true;
248            // restore previously rendered content
249            $this->doc = $this->store;
250            $this->store = '';
251        }
252    }
253
254    /**
255     * Open an unordered list
256     */
257    public function listu_open()
258    {
259        $this->cdata(DOKU_LF);
260    }
261
262    /**
263     * Open an ordered list
264     *
265     * @param string|string[]|null $classes Optional CSS classes (ignored by metadata)
266     * @param int $start Starting number (ignored by metadata)
267     */
268    public function listo_open($classes = null, $start = 1)
269    {
270        $this->cdata(DOKU_LF);
271    }
272
273    /**
274     * Open a list item
275     *
276     * @param int $level the nesting level
277     * @param bool $node true when a node; false when a leaf
278     */
279    public function listitem_open($level, $node = false)
280    {
281        $this->cdata(str_repeat(DOKU_TAB, $level) . '* ');
282    }
283
284    /**
285     * Close a list item
286     */
287    public function listitem_close()
288    {
289        $this->cdata(DOKU_LF);
290    }
291
292    /**
293     * Output preformatted text
294     *
295     * @param string $text
296     */
297    public function preformatted($text)
298    {
299        $this->cdata($text);
300    }
301
302    /**
303     * Start a block quote
304     */
305    public function quote_open()
306    {
307        $this->cdata(DOKU_LF . DOKU_TAB . '"');
308    }
309
310    /**
311     * Stop a block quote
312     */
313    public function quote_close()
314    {
315        $this->cdata('"' . DOKU_LF);
316    }
317
318    /**
319     * Display text as file content, optionally syntax highlighted
320     *
321     * @param string $text text to show
322     * @param string $lang programming language to use for syntax highlighting
323     * @param string $file file path label
324     */
325    public function file($text, $lang = null, $file = null)
326    {
327        $this->cdata(DOKU_LF . $text . DOKU_LF);
328    }
329
330    /**
331     * Display text as code content, optionally syntax highlighted
332     *
333     * @param string $text text to show
334     * @param string $language programming language to use for syntax highlighting
335     * @param string $file file path label
336     */
337    public function code($text, $language = null, $file = null)
338    {
339        $this->cdata(DOKU_LF . $text . DOKU_LF);
340    }
341
342    /**
343     * Format an acronym
344     *
345     * Uses $this->acronyms
346     *
347     * @param string $acronym
348     */
349    public function acronym($acronym)
350    {
351        $this->cdata($acronym);
352    }
353
354    /**
355     * Format a smiley
356     *
357     * Uses $this->smiley
358     *
359     * @param string $smiley
360     */
361    public function smiley($smiley)
362    {
363        $this->cdata($smiley);
364    }
365
366    /**
367     * Format an entity
368     *
369     * Entities are basically small text replacements
370     *
371     * Uses $this->entities
372     *
373     * @param string $entity
374     */
375    public function entity($entity)
376    {
377        $this->cdata($entity);
378    }
379
380    /**
381     * Typographically format a multiply sign
382     *
383     * Example: ($x=640, $y=480) should result in "640×480"
384     *
385     * @param string|int $x first value
386     * @param string|int $y second value
387     */
388    public function multiplyentity($x, $y)
389    {
390        $this->cdata($x . '×' . $y);
391    }
392
393    /**
394     * Render an opening single quote char (language specific)
395     */
396    public function singlequoteopening()
397    {
398        global $lang;
399        $this->cdata($lang['singlequoteopening']);
400    }
401
402    /**
403     * Render a closing single quote char (language specific)
404     */
405    public function singlequoteclosing()
406    {
407        global $lang;
408        $this->cdata($lang['singlequoteclosing']);
409    }
410
411    /**
412     * Render an apostrophe char (language specific)
413     */
414    public function apostrophe()
415    {
416        global $lang;
417        $this->cdata($lang['apostrophe']);
418    }
419
420    /**
421     * Render an opening double quote char (language specific)
422     */
423    public function doublequoteopening()
424    {
425        global $lang;
426        $this->cdata($lang['doublequoteopening']);
427    }
428
429    /**
430     * Render an closinging double quote char (language specific)
431     */
432    public function doublequoteclosing()
433    {
434        global $lang;
435        $this->cdata($lang['doublequoteclosing']);
436    }
437
438    /**
439     * Render a CamelCase link
440     *
441     * @param string $link The link name
442     * @see http://en.wikipedia.org/wiki/CamelCase
443     */
444    public function camelcaselink($link)
445    {
446        $this->internallink($link, $link);
447    }
448
449    /**
450     * Render a page local link
451     *
452     * @param string $hash hash link identifier
453     * @param string $name name for the link
454     */
455    public function locallink($hash, $name = null)
456    {
457        if (is_array($name)) {
458            $this->_firstimage($name['src']);
459            if ($name['type'] == 'internalmedia') {
460                $this->_recordMediaUsage($name['src']);
461            }
462        }
463    }
464
465    /**
466     * keep track of internal links in $this->meta['relation']['references']
467     *
468     * @param string $id page ID to link to. eg. 'wiki:syntax'
469     * @param string|array|null $name name for the link, array for media file
470     */
471    public function internallink($id, $name = null)
472    {
473        global $ID;
474
475        if (is_array($name)) {
476            $this->_firstimage($name['src']);
477            if ($name['type'] == 'internalmedia') {
478                $this->_recordMediaUsage($name['src']);
479            }
480        }
481
482        $parts = explode('?', $id, 2);
483        if (count($parts) === 2) {
484            $id = $parts[0];
485        }
486
487        $default = $this->_simpleTitle($id);
488
489        // first resolve and clean up the $id
490        $resolver = new PageResolver($ID);
491        $id = $resolver->resolveId($id);
492        [$page] = sexplode('#', $id, 2);
493
494        // set metadata
495        $this->meta['relation']['references'][$page] = page_exists($page);
496        // $data = array('relation' => array('isreferencedby' => array($ID => true)));
497        // p_set_metadata($id, $data);
498
499        // add link title to summary
500        if ($this->capture) {
501            $name = $this->_getLinkTitle($name, $default, $id);
502            $this->doc .= $name;
503        }
504    }
505
506    /**
507     * Render an external link
508     *
509     * @param string $url full URL with scheme
510     * @param string|array|null $name name for the link, array for media file
511     */
512    public function externallink($url, $name = null)
513    {
514        if (is_array($name)) {
515            $this->_firstimage($name['src']);
516            if ($name['type'] == 'internalmedia') {
517                $this->_recordMediaUsage($name['src']);
518            }
519        }
520
521        if ($this->capture) {
522            $this->doc .= $this->_getLinkTitle($name, '<' . $url . '>');
523        }
524    }
525
526    /**
527     * Render an interwiki link
528     *
529     * You may want to use $this->_resolveInterWiki() here
530     *
531     * @param string $match original link - probably not much use
532     * @param string|array $name name for the link, array for media file
533     * @param string $wikiName indentifier (shortcut) for the remote wiki
534     * @param string $wikiUri the fragment parsed from the original link
535     */
536    public function interwikilink($match, $name, $wikiName, $wikiUri)
537    {
538        if (is_array($name)) {
539            $this->_firstimage($name['src']);
540            if ($name['type'] == 'internalmedia') {
541                $this->_recordMediaUsage($name['src']);
542            }
543        }
544
545        if ($this->capture) {
546            [$wikiUri] = explode('#', $wikiUri, 2);
547            $name = $this->_getLinkTitle($name, $wikiUri);
548            $this->doc .= $name;
549        }
550    }
551
552    /**
553     * Link to windows share
554     *
555     * @param string $url the link
556     * @param string|array $name name for the link, array for media file
557     */
558    public function windowssharelink($url, $name = null)
559    {
560        if (is_array($name)) {
561            $this->_firstimage($name['src']);
562            if ($name['type'] == 'internalmedia') {
563                $this->_recordMediaUsage($name['src']);
564            }
565        }
566
567        if ($this->capture) {
568            if ($name) {
569                $this->doc .= $name;
570            } else {
571                $this->doc .= '<' . $url . '>';
572            }
573        }
574    }
575
576    /**
577     * Render a linked E-Mail Address
578     *
579     * Should honor $conf['mailguard'] setting
580     *
581     * @param string $address Email-Address
582     * @param string|array $name name for the link, array for media file
583     */
584    public function emaillink($address, $name = null)
585    {
586        if (is_array($name)) {
587            $this->_firstimage($name['src']);
588            if ($name['type'] == 'internalmedia') {
589                $this->_recordMediaUsage($name['src']);
590            }
591        }
592
593        if ($this->capture) {
594            if ($name) {
595                $this->doc .= $name;
596            } else {
597                $this->doc .= '<' . $address . '>';
598            }
599        }
600    }
601
602    /**
603     * Render an internal media file
604     *
605     * @param string $src media ID
606     * @param string $title descriptive text
607     * @param string $align left|center|right
608     * @param int $width width of media in pixel
609     * @param int $height height of media in pixel
610     * @param string $cache cache|recache|nocache
611     * @param string $linking linkonly|detail|nolink
612     */
613    public function internalmedia(
614        $src,
615        $title = null,
616        $align = null,
617        $width = null,
618        $height = null,
619        $cache = null,
620        $linking = null
621    ) {
622        if ($this->capture && $title) {
623            $this->doc .= '[' . $title . ']';
624        }
625        $this->_firstimage($src);
626        $this->_recordMediaUsage($src);
627    }
628
629    /**
630     * Render an external media file
631     *
632     * @param string $src full media URL
633     * @param string $title descriptive text
634     * @param string $align left|center|right
635     * @param int $width width of media in pixel
636     * @param int $height height of media in pixel
637     * @param string $cache cache|recache|nocache
638     * @param string $linking linkonly|detail|nolink
639     */
640    public function externalmedia(
641        $src,
642        $title = null,
643        $align = null,
644        $width = null,
645        $height = null,
646        $cache = null,
647        $linking = null
648    ) {
649        if ($this->capture && $title) {
650            $this->doc .= '[' . $title . ']';
651        }
652        $this->_firstimage($src);
653    }
654
655    /**
656     * Render the output of an RSS feed
657     *
658     * @param string $url URL of the feed
659     * @param array $params Finetuning of the output
660     */
661    public function rss($url, $params)
662    {
663        $this->meta['relation']['haspart'][$url] = true;
664
665        $this->meta['date']['valid']['age'] =
666            isset($this->meta['date']['valid']['age']) ?
667                min($this->meta['date']['valid']['age'], $params['refresh']) :
668                $params['refresh'];
669    }
670
671    #region Utils
672
673    /**
674     * Removes any Namespace from the given name but keeps
675     * casing and special chars
676     *
677     * @param string $name
678     *
679     * @return mixed|string
680     * @author Andreas Gohr <andi@splitbrain.org>
681     *
682     */
683    public function _simpleTitle($name)
684    {
685        global $conf;
686
687        if (is_array($name)) {
688            return '';
689        }
690
691        if ($conf['useslash']) {
692            $nssep = '[:;/]';
693        } else {
694            $nssep = '[:;]';
695        }
696        $name = preg_replace('!.*' . $nssep . '!', '', $name);
697        //if there is a hash we use the anchor name only
698        $name = preg_replace('!.*#!', '', $name);
699        return $name;
700    }
701
702    /**
703     * Construct a title and handle images in titles
704     *
705     * @param string|array|null $title either string title or media array
706     * @param string $default default title if nothing else is found
707     * @param null|string $id linked page id (used to extract title from first heading)
708     * @return string title text
709     * @author Harry Fuecks <hfuecks@gmail.com>
710     */
711    public function _getLinkTitle($title, $default, $id = null)
712    {
713        if (is_array($title)) {
714            if ($title['title']) {
715                return '[' . $title['title'] . ']';
716            } else {
717                return $default;
718            }
719        } elseif (is_null($title) || trim($title) == '') {
720            if (useHeading('content') && $id) {
721                $heading = p_get_first_heading($id, METADATA_DONT_RENDER);
722                if ($heading) {
723                    return $heading;
724                }
725            }
726            return $default;
727        } else {
728            return $title;
729        }
730    }
731
732    /**
733     * Remember first image
734     *
735     * @param string $src image URL or ID
736     */
737    protected function _firstimage($src)
738    {
739        global $ID;
740
741        if ($this->firstimage) {
742            return;
743        }
744
745        [$src] = explode('#', $src, 2);
746        if (!media_isexternal($src)) {
747            $src = (new MediaResolver($ID))->resolveId($src);
748        }
749        if (preg_match('/.(jpe?g|gif|png|webp|svg)$/i', $src)) {
750            $this->firstimage = $src;
751        }
752    }
753
754    /**
755     * Store list of used media files in metadata
756     *
757     * @param string $src media ID
758     */
759    protected function _recordMediaUsage($src)
760    {
761        global $ID;
762
763        [$src] = explode('#', $src, 2);
764        if (media_isexternal($src)) {
765            return;
766        }
767        $src = (new MediaResolver($ID))->resolveId($src);
768        $file = mediaFN($src);
769        $this->meta['relation']['media'][$src] = file_exists($file);
770    }
771
772    #endregion
773}
774
775//Setup VIM: ex: et ts=4 :
776