xref: /dokuwiki/inc/parser/metadata.php (revision 93075b016d22b16e7d11cd93fac9a54e91e22269)
1<?php
2/**
3 * Renderer for metadata
4 *
5 * @author Esther Brunner <wikidesign@gmail.com>
6 */
7if(!defined('DOKU_INC')) die('meh.');
8
9if(!defined('DOKU_LF')) {
10    // Some whitespace to help View > Source
11    define ('DOKU_LF', "\n");
12}
13
14if(!defined('DOKU_TAB')) {
15    // Some whitespace to help View > Source
16    define ('DOKU_TAB', "\t");
17}
18
19/**
20 * The MetaData Renderer
21 *
22 * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content
23 * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and
24 * $persistent.
25 *
26 * Some simplified rendering to $doc is done to gather the page's (text-only) abstract.
27 */
28class Doku_Renderer_metadata extends Doku_Renderer {
29
30    /** @var array transient meta data, will be reset on each rendering */
31    public $meta = array();
32
33    /** @var array persistent meta data, will be kept until explicitly deleted */
34    public $persistent = array();
35
36    /** @var array the list of headers used to create unique link ids */
37    protected $headers = array();
38
39    /** @var bool determines if enough data for a page summary was collected, yet */
40    protected $capture = true;
41
42    /** @var string temporary $doc store */
43    protected $store = '';
44
45    /** @var string keeps the first image reference */
46    protected $firstimage = '';
47
48    /**
49     * Returns the format produced by this renderer.
50     *
51     * @return string always 'metadata'
52     */
53    function getFormat() {
54        return 'metadata';
55    }
56
57    /**
58     * Initialize the document
59     *
60     * Sets up some of the persistent info about the page if it doesn't exist, yet.
61     */
62    function document_start() {
63        global $ID;
64
65        $this->headers = array();
66
67        // external pages are missing create date
68        if(!$this->persistent['date']['created']) {
69            $this->persistent['date']['created'] = filectime(wikiFN($ID));
70        }
71        if(!isset($this->persistent['user'])) {
72            $this->persistent['user'] = '';
73        }
74        if(!isset($this->persistent['creator'])) {
75            $this->persistent['creator'] = '';
76        }
77        // reset metadata to persistent values
78        $this->meta = $this->persistent;
79    }
80
81    /**
82     * Finalize the document
83     *
84     * Stores collected data in the metadata
85     */
86    function document_end() {
87        global $ID;
88
89        // store internal info in metadata (notoc,nocache)
90        $this->meta['internal'] = $this->info;
91
92        if(!isset($this->meta['description']['abstract'])) {
93            // cut off too long abstracts
94            $this->doc = trim($this->doc);
95            if(strlen($this->doc) > 500)
96                $this->doc = utf8_substr($this->doc, 0, 500).'…';
97            $this->meta['description']['abstract'] = $this->doc;
98        }
99
100        $this->meta['relation']['firstimage'] = $this->firstimage;
101
102        if(!isset($this->meta['date']['modified'])) {
103            $this->meta['date']['modified'] = filemtime(wikiFN($ID));
104        }
105
106    }
107
108    /**
109     * Add an item to the TOC
110     *
111     * @param string $id       the hash link
112     * @param string $text     the text to display
113     * @param int    $level    the nesting level
114     */
115    function toc_additem($id, $text, $level) {
116        global $conf;
117
118        //only add items within configured levels
119        if($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) {
120            // the TOC is one of our standard ul list arrays ;-)
121            $this->meta['description']['tableofcontents'][] = array(
122                'hid'   => $id,
123                'title' => $text,
124                'type'  => 'ul',
125                'level' => $level - $conf['toptoclevel'] + 1
126            );
127        }
128
129    }
130
131    /**
132     * Render a heading
133     *
134     * @param string $text  the text to display
135     * @param int    $level header level
136     * @param int    $pos   byte position in the original source
137     */
138    function header($text, $level, $pos) {
139        if(!isset($this->meta['title'])) $this->meta['title'] = $text;
140
141        // add the header to the TOC
142        $hid = $this->_headerToLink($text, 'true');
143        $this->toc_additem($hid, $text, $level);
144
145        // add to summary
146        if($this->capture && ($level > 1)) $this->doc .= DOKU_LF.$text.DOKU_LF;
147    }
148
149    /**
150     * Render plain text data
151     *
152     * @param $text
153     */
154    function cdata($text) {
155        if($this->capture) $this->doc .= $text;
156    }
157
158    /**
159     * Open a paragraph
160     */
161    function p_open() {
162        if($this->capture) $this->doc .= DOKU_LF;
163    }
164
165    /**
166     * Close a paragraph
167     */
168    function p_close() {
169        if($this->capture) {
170            if(strlen($this->doc) > 250) $this->capture = false;
171            else $this->doc .= DOKU_LF;
172        }
173    }
174
175    /**
176     * Create a line break
177     */
178    function linebreak() {
179        if($this->capture) $this->doc .= DOKU_LF;
180    }
181
182    /**
183     * Create a horizontal line
184     */
185    function hr() {
186        if($this->capture) {
187            if(strlen($this->doc) > 250) $this->capture = false;
188            else $this->doc .= DOKU_LF.'----------'.DOKU_LF;
189        }
190    }
191
192    /**
193     * Callback for footnote start syntax
194     *
195     * All following content will go to the footnote instead of
196     * the document. To achieve this the previous rendered content
197     * is moved to $store and $doc is cleared
198     *
199     * @author Andreas Gohr <andi@splitbrain.org>
200     */
201    function footnote_open() {
202        if($this->capture) {
203            // move current content to store and record footnote
204            $this->store = $this->doc;
205            $this->doc   = '';
206        }
207    }
208
209    /**
210     * Callback for footnote end syntax
211     *
212     * All rendered content is moved to the $footnotes array and the old
213     * content is restored from $store again
214     *
215     * @author Andreas Gohr
216     */
217    function footnote_close() {
218        if($this->capture) {
219            // restore old content
220            $this->doc   = $this->store;
221            $this->store = '';
222        }
223    }
224
225    /**
226     * Open an unordered list
227     */
228    function listu_open() {
229        if($this->capture) $this->doc .= DOKU_LF;
230    }
231
232    /**
233     * Close an unordered list
234     */
235    function listu_close() {
236        if($this->capture && (strlen($this->doc) > 250)) $this->capture = false;
237    }
238
239    /**
240     * Open an ordered list
241     */
242    function listo_open() {
243        if($this->capture) $this->doc .= DOKU_LF;
244    }
245
246    /**
247     * Close an ordered list
248     */
249    function listo_close() {
250        if($this->capture && (strlen($this->doc) > 250)) $this->capture = false;
251    }
252
253    /**
254     * Open a list item
255     *
256     * @param int $level the nesting level
257     */
258    function listitem_open($level) {
259        if($this->capture) $this->doc .= str_repeat(DOKU_TAB, $level).'* ';
260    }
261
262    /**
263     * Close a list item
264     */
265    function listitem_close() {
266        if($this->capture) $this->doc .= DOKU_LF;
267    }
268
269    /**
270     * Output preformatted text
271     *
272     * @param string $text
273     */
274    function preformatted($text) {
275        if($this->capture) $this->doc .= $text;
276    }
277
278    /**
279     * Start a block quote
280     */
281    function quote_open() {
282        if($this->capture) $this->doc .= DOKU_LF.DOKU_TAB.'"';
283    }
284
285    /**
286     * Stop a block quote
287     */
288    function quote_close() {
289        if($this->capture) {
290            $this->doc .= '"';
291            if(strlen($this->doc) > 250) $this->capture = false;
292            else $this->doc .= DOKU_LF;
293        }
294    }
295
296    /**
297     * Display text as file content, optionally syntax highlighted
298     *
299     * @param string $text text to show
300     * @param string $lang programming language to use for syntax highlighting
301     * @param string $file file path label
302     */
303    function file($text, $lang = null, $file = null) {
304        if($this->capture) {
305            $this->doc .= DOKU_LF.$text;
306            if(strlen($this->doc) > 250) $this->capture = false;
307            else $this->doc .= DOKU_LF;
308        }
309    }
310
311    /**
312     * Display text as code content, optionally syntax highlighted
313     *
314     * @param string $text     text to show
315     * @param string $language programming language to use for syntax highlighting
316     * @param string $file     file path label
317     */
318    function code($text, $language = null, $file = null) {
319        if($this->capture) {
320            $this->doc .= DOKU_LF.$text;
321            if(strlen($this->doc) > 250) $this->capture = false;
322            else $this->doc .= DOKU_LF;
323        }
324    }
325
326    /**
327     * Format an acronym
328     *
329     * Uses $this->acronyms
330     *
331     * @param string $acronym
332     */
333    function acronym($acronym) {
334        if($this->capture) $this->doc .= $acronym;
335    }
336
337    /**
338     * Format a smiley
339     *
340     * Uses $this->smiley
341     *
342     * @param string $smiley
343     */
344    function smiley($smiley) {
345        if($this->capture) $this->doc .= $smiley;
346    }
347
348    /**
349     * Format an entity
350     *
351     * Entities are basically small text replacements
352     *
353     * Uses $this->entities
354     *
355     * @param string $entity
356     */
357    function entity($entity) {
358        if($this->capture) $this->doc .= $entity;
359    }
360
361    /**
362     * Typographically format a multiply sign
363     *
364     * Example: ($x=640, $y=480) should result in "640×480"
365     *
366     * @param string|int $x first value
367     * @param string|int $y second value
368     */
369    function multiplyentity($x, $y) {
370        if($this->capture) $this->doc .= $x.'×'.$y;
371    }
372
373    /**
374     * Render an opening single quote char (language specific)
375     */
376    function singlequoteopening() {
377        global $lang;
378        if($this->capture) $this->doc .= $lang['singlequoteopening'];
379    }
380
381    /**
382     * Render a closing single quote char (language specific)
383     */
384    function singlequoteclosing() {
385        global $lang;
386        if($this->capture) $this->doc .= $lang['singlequoteclosing'];
387    }
388
389    /**
390     * Render an apostrophe char (language specific)
391     */
392    function apostrophe() {
393        global $lang;
394        if($this->capture) $this->doc .= $lang['apostrophe'];
395    }
396
397    /**
398     * Render an opening double quote char (language specific)
399     */
400    function doublequoteopening() {
401        global $lang;
402        if($this->capture) $this->doc .= $lang['doublequoteopening'];
403    }
404
405    /**
406     * Render an closinging double quote char (language specific)
407     */
408    function doublequoteclosing() {
409        global $lang;
410        if($this->capture) $this->doc .= $lang['doublequoteclosing'];
411    }
412
413    /**
414     * Render a CamelCase link
415     *
416     * @param string $link The link name
417     * @see http://en.wikipedia.org/wiki/CamelCase
418     */
419    function camelcaselink($link) {
420        $this->internallink($link, $link);
421    }
422
423    /**
424     * Render a page local link
425     *
426     * @param string $hash hash link identifier
427     * @param string $name name for the link
428     */
429    function locallink($hash, $name = null) {
430        if(is_array($name)) {
431            $this->_firstimage($name['src']);
432            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
433        }
434    }
435
436    /**
437     * keep track of internal links in $this->meta['relation']['references']
438     *
439     * @param string       $id   page ID to link to. eg. 'wiki:syntax'
440     * @param string|array $name name for the link, array for media file
441     */
442    function internallink($id, $name = null) {
443        global $ID;
444
445        if(is_array($name)) {
446            $this->_firstimage($name['src']);
447            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
448        }
449
450        $parts = explode('?', $id, 2);
451        if(count($parts) === 2) {
452            $id = $parts[0];
453        }
454
455        $default = $this->_simpleTitle($id);
456
457        // first resolve and clean up the $id
458        resolve_pageid(getNS($ID), $id, $exists);
459        @list($page) = explode('#', $id, 2);
460
461        // set metadata
462        $this->meta['relation']['references'][$page] = $exists;
463        // $data = array('relation' => array('isreferencedby' => array($ID => true)));
464        // p_set_metadata($id, $data);
465
466        // add link title to summary
467        if($this->capture) {
468            $name = $this->_getLinkTitle($name, $default, $id);
469            $this->doc .= $name;
470        }
471    }
472
473    /**
474     * Render an external link
475     *
476     * @param string       $url  full URL with scheme
477     * @param string|array $name name for the link, array for media file
478     */
479    function externallink($url, $name = null) {
480        if(is_array($name)) {
481            $this->_firstimage($name['src']);
482            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
483        }
484
485        if($this->capture) {
486            $this->doc .= $this->_getLinkTitle($name, '<'.$url.'>');
487        }
488    }
489
490    /**
491     * Render an interwiki link
492     *
493     * You may want to use $this->_resolveInterWiki() here
494     *
495     * @param string       $match     original link - probably not much use
496     * @param string|array $name      name for the link, array for media file
497     * @param string       $wikiName  indentifier (shortcut) for the remote wiki
498     * @param string       $wikiUri   the fragment parsed from the original link
499     */
500    function interwikilink($match, $name = null, $wikiName, $wikiUri) {
501        if(is_array($name)) {
502            $this->_firstimage($name['src']);
503            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
504        }
505
506        if($this->capture) {
507            list($wikiUri) = explode('#', $wikiUri, 2);
508            $name = $this->_getLinkTitle($name, $wikiUri);
509            $this->doc .= $name;
510        }
511    }
512
513    /**
514     * Link to windows share
515     *
516     * @param string       $url  the link
517     * @param string|array $name name for the link, array for media file
518     */
519    function windowssharelink($url, $name = null) {
520        if(is_array($name)) {
521            $this->_firstimage($name['src']);
522            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
523        }
524
525        if($this->capture) {
526            if($name) $this->doc .= $name;
527            else $this->doc .= '<'.$url.'>';
528        }
529    }
530
531    /**
532     * Render a linked E-Mail Address
533     *
534     * Should honor $conf['mailguard'] setting
535     *
536     * @param string       $address Email-Address
537     * @param string|array $name    name for the link, array for media file
538     */
539    function emaillink($address, $name = null) {
540        if(is_array($name)) {
541            $this->_firstimage($name['src']);
542            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
543        }
544
545        if($this->capture) {
546            if($name) $this->doc .= $name;
547            else $this->doc .= '<'.$address.'>';
548        }
549    }
550
551    /**
552     * Render an internal media file
553     *
554     * @param string $src     media ID
555     * @param string $title   descriptive text
556     * @param string $align   left|center|right
557     * @param int    $width   width of media in pixel
558     * @param int    $height  height of media in pixel
559     * @param string $cache   cache|recache|nocache
560     * @param string $linking linkonly|detail|nolink
561     */
562    function internalmedia($src, $title = null, $align = null, $width = null,
563                           $height = null, $cache = null, $linking = null) {
564        if($this->capture && $title) $this->doc .= '['.$title.']';
565        $this->_firstimage($src);
566        $this->_recordMediaUsage($src);
567    }
568
569    /**
570     * Render an external media file
571     *
572     * @param string $src     full media URL
573     * @param string $title   descriptive text
574     * @param string $align   left|center|right
575     * @param int    $width   width of media in pixel
576     * @param int    $height  height of media in pixel
577     * @param string $cache   cache|recache|nocache
578     * @param string $linking linkonly|detail|nolink
579     */
580    function externalmedia($src, $title = null, $align = null, $width = null,
581                           $height = null, $cache = null, $linking = null) {
582        if($this->capture && $title) $this->doc .= '['.$title.']';
583        $this->_firstimage($src);
584    }
585
586    /**
587     * Render the output of an RSS feed
588     *
589     * @param string $url    URL of the feed
590     * @param array  $params Finetuning of the output
591     */
592    function rss($url, $params) {
593        $this->meta['relation']['haspart'][$url] = true;
594
595        $this->meta['date']['valid']['age'] =
596            isset($this->meta['date']['valid']['age']) ?
597                min($this->meta['date']['valid']['age'], $params['refresh']) :
598                $params['refresh'];
599    }
600
601    #region Utils
602
603    /**
604     * Removes any Namespace from the given name but keeps
605     * casing and special chars
606     *
607     * @author Andreas Gohr <andi@splitbrain.org>
608     */
609    function _simpleTitle($name) {
610        global $conf;
611
612        if(is_array($name)) return '';
613
614        if($conf['useslash']) {
615            $nssep = '[:;/]';
616        } else {
617            $nssep = '[:;]';
618        }
619        $name = preg_replace('!.*'.$nssep.'!', '', $name);
620        //if there is a hash we use the anchor name only
621        $name = preg_replace('!.*#!', '', $name);
622        return $name;
623    }
624
625    /**
626     * Creates a linkid from a headline
627     *
628     * @author Andreas Gohr <andi@splitbrain.org>
629     * @param string  $title   The headline title
630     * @param boolean $create  Create a new unique ID?
631     * @return string
632     */
633    function _headerToLink($title, $create = false) {
634        if($create) {
635            return sectionID($title, $this->headers);
636        } else {
637            $check = false;
638            return sectionID($title, $check);
639        }
640    }
641
642    /**
643     * Construct a title and handle images in titles
644     *
645     * @author Harry Fuecks <hfuecks@gmail.com>
646     * @param string|array $title    either string title or media array
647     * @param string       $default  default title if nothing else is found
648     * @param null|string  $id       linked page id (used to extract title from first heading)
649     * @return string title text
650     */
651    function _getLinkTitle($title, $default, $id = null) {
652        if(is_array($title)) {
653            if($title['title']) {
654                return '['.$title['title'].']';
655            } else {
656                return $default;
657            }
658        } else if(is_null($title) || trim($title) == '') {
659            if(useHeading('content') && $id) {
660                $heading = p_get_first_heading($id, METADATA_DONT_RENDER);
661                if($heading) return $heading;
662            }
663            return $default;
664        } else {
665            return $title;
666        }
667    }
668
669    /**
670     * Remember first image
671     *
672     * @param string $src image URL or ID
673     */
674    function _firstimage($src) {
675        if($this->firstimage) return;
676        global $ID;
677
678        list($src) = explode('#', $src, 2);
679        if(!media_isexternal($src)) {
680            resolve_mediaid(getNS($ID), $src, $exists);
681        }
682        if(preg_match('/.(jpe?g|gif|png)$/i', $src)) {
683            $this->firstimage = $src;
684        }
685    }
686
687    /**
688     * Store list of used media files in metadata
689     *
690     * @param string $src media ID
691     */
692    function _recordMediaUsage($src) {
693        global $ID;
694
695        list ($src) = explode('#', $src, 2);
696        if(media_isexternal($src)) return;
697        resolve_mediaid(getNS($ID), $src, $exists);
698        $this->meta['relation']['media'][$src] = $exists;
699    }
700
701    #endregion
702}
703
704//Setup VIM: ex: et ts=4 :
705