xref: /dokuwiki/inc/parser/metadata.php (revision c6639e6a6a4b11d65ecbc19f1bbbf2d9b32d0c19)
1<?php
2/**
3 * The MetaData Renderer
4 *
5 * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content
6 * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and
7 * $persistent.
8 *
9 * Some simplified rendering to $doc is done to gather the page's (text-only) abstract.
10 *
11 * @author Esther Brunner <wikidesign@gmail.com>
12 */
13class Doku_Renderer_metadata extends Doku_Renderer {
14    /** the approximate byte lenght to capture for the abstract */
15    const ABSTRACT_LEN = 250;
16
17    /** the maximum UTF8 character length for the abstract */
18    const ABSTRACT_MAX = 500;
19
20    /** @var array transient meta data, will be reset on each rendering */
21    public $meta = array();
22
23    /** @var array persistent meta data, will be kept until explicitly deleted */
24    public $persistent = array();
25
26    /** @var array the list of headers used to create unique link ids */
27    protected $headers = array();
28
29    /** @var string temporary $doc store */
30    protected $store = '';
31
32    /** @var string keeps the first image reference */
33    protected $firstimage = '';
34
35    /** @var bool determines if enough data for the abstract was collected, yet */
36    public $capture = true;
37
38    /** @var int number of bytes captured for abstract */
39    protected $captured = 0;
40
41    /**
42     * Returns the format produced by this renderer.
43     *
44     * @return string always 'metadata'
45     */
46    public function getFormat() {
47        return 'metadata';
48    }
49
50    /**
51     * Initialize the document
52     *
53     * Sets up some of the persistent info about the page if it doesn't exist, yet.
54     */
55    public function document_start() {
56        global $ID;
57
58        $this->headers = array();
59
60        // external pages are missing create date
61        if(!$this->persistent['date']['created']) {
62            $this->persistent['date']['created'] = filectime(wikiFN($ID));
63        }
64        if(!isset($this->persistent['user'])) {
65            $this->persistent['user'] = '';
66        }
67        if(!isset($this->persistent['creator'])) {
68            $this->persistent['creator'] = '';
69        }
70        // reset metadata to persistent values
71        $this->meta = $this->persistent;
72    }
73
74    /**
75     * Finalize the document
76     *
77     * Stores collected data in the metadata
78     */
79    public function document_end() {
80        global $ID;
81
82        // store internal info in metadata (notoc,nocache)
83        $this->meta['internal'] = $this->info;
84
85        if(!isset($this->meta['description']['abstract'])) {
86            // cut off too long abstracts
87            $this->doc = trim($this->doc);
88            if(strlen($this->doc) > self::ABSTRACT_MAX) {
89                $this->doc = utf8_substr($this->doc, 0, self::ABSTRACT_MAX).'…';
90            }
91            $this->meta['description']['abstract'] = $this->doc;
92        }
93
94        $this->meta['relation']['firstimage'] = $this->firstimage;
95
96        if(!isset($this->meta['date']['modified'])) {
97            $this->meta['date']['modified'] = filemtime(wikiFN($ID));
98        }
99
100    }
101
102    /**
103     * Render plain text data
104     *
105     * This function takes care of the amount captured data and will stop capturing when
106     * enough abstract data is available
107     *
108     * @param $text
109     */
110    public function cdata($text) {
111        if(!$this->capture) return;
112
113        $this->doc .= $text;
114
115        $this->captured += strlen($text);
116        if($this->captured > self::ABSTRACT_LEN) $this->capture = false;
117    }
118
119    /**
120     * Add an item to the TOC
121     *
122     * @param string $id       the hash link
123     * @param string $text     the text to display
124     * @param int    $level    the nesting level
125     */
126    public function toc_additem($id, $text, $level) {
127        global $conf;
128
129        //only add items within configured levels
130        if($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) {
131            // the TOC is one of our standard ul list arrays ;-)
132            $this->meta['description']['tableofcontents'][] = array(
133                'hid'   => $id,
134                'title' => $text,
135                'type'  => 'ul',
136                'level' => $level - $conf['toptoclevel'] + 1
137            );
138        }
139
140    }
141
142    /**
143     * Render a heading
144     *
145     * @param string $text  the text to display
146     * @param int    $level header level
147     * @param int    $pos   byte position in the original source
148     */
149    public function header($text, $level, $pos) {
150        if(!isset($this->meta['title'])) $this->meta['title'] = $text;
151
152        // add the header to the TOC
153        $hid = $this->_headerToLink($text, true);
154        $this->toc_additem($hid, $text, $level);
155
156        // add to summary
157        $this->cdata(DOKU_LF.$text.DOKU_LF);
158    }
159
160    /**
161     * Open a paragraph
162     */
163    public function p_open() {
164        $this->cdata(DOKU_LF);
165    }
166
167    /**
168     * Close a paragraph
169     */
170    public function p_close() {
171        $this->cdata(DOKU_LF);
172    }
173
174    /**
175     * Create a line break
176     */
177    public function linebreak() {
178        $this->cdata(DOKU_LF);
179    }
180
181    /**
182     * Create a horizontal line
183     */
184    public function hr() {
185        $this->cdata(DOKU_LF.'----------'.DOKU_LF);
186    }
187
188    /**
189     * Callback for footnote start syntax
190     *
191     * All following content will go to the footnote instead of
192     * the document. To achieve this the previous rendered content
193     * is moved to $store and $doc is cleared
194     *
195     * @author Andreas Gohr <andi@splitbrain.org>
196     */
197    public function footnote_open() {
198        if($this->capture) {
199            // move current content to store and record footnote
200            $this->store = $this->doc;
201            $this->doc   = '';
202        }
203    }
204
205    /**
206     * Callback for footnote end syntax
207     *
208     * All rendered content is moved to the $footnotes array and the old
209     * content is restored from $store again
210     *
211     * @author Andreas Gohr
212     */
213    public function footnote_close() {
214        if($this->capture) {
215            // restore old content
216            $this->doc   = $this->store;
217            $this->store = '';
218        }
219    }
220
221    /**
222     * Open an unordered list
223     */
224    public function listu_open() {
225        $this->cdata(DOKU_LF);
226    }
227
228    /**
229     * Open an ordered list
230     */
231    public function listo_open() {
232        $this->cdata(DOKU_LF);
233    }
234
235    /**
236     * Open a list item
237     *
238     * @param int $level the nesting level
239     * @param bool $node true when a node; false when a leaf
240     */
241    public function listitem_open($level,$node=false) {
242        $this->cdata(str_repeat(DOKU_TAB, $level).'* ');
243    }
244
245    /**
246     * Close a list item
247     */
248    public function listitem_close() {
249        $this->cdata(DOKU_LF);
250    }
251
252    /**
253     * Output preformatted text
254     *
255     * @param string $text
256     */
257    public function preformatted($text) {
258        $this->cdata($text);
259    }
260
261    /**
262     * Start a block quote
263     */
264    public function quote_open() {
265        $this->cdata(DOKU_LF.DOKU_TAB.'"');
266    }
267
268    /**
269     * Stop a block quote
270     */
271    public function quote_close() {
272        $this->cdata('"'.DOKU_LF);
273    }
274
275    /**
276     * Display text as file content, optionally syntax highlighted
277     *
278     * @param string $text text to show
279     * @param string $lang programming language to use for syntax highlighting
280     * @param string $file file path label
281     */
282    public function file($text, $lang = null, $file = null) {
283        $this->cdata(DOKU_LF.$text.DOKU_LF);
284    }
285
286    /**
287     * Display text as code content, optionally syntax highlighted
288     *
289     * @param string $text     text to show
290     * @param string $language programming language to use for syntax highlighting
291     * @param string $file     file path label
292     */
293    public function code($text, $language = null, $file = null) {
294        $this->cdata(DOKU_LF.$text.DOKU_LF);
295    }
296
297    /**
298     * Format an acronym
299     *
300     * Uses $this->acronyms
301     *
302     * @param string $acronym
303     */
304    public function acronym($acronym) {
305        $this->cdata($acronym);
306    }
307
308    /**
309     * Format a smiley
310     *
311     * Uses $this->smiley
312     *
313     * @param string $smiley
314     */
315    public function smiley($smiley) {
316        $this->cdata($smiley);
317    }
318
319    /**
320     * Format an entity
321     *
322     * Entities are basically small text replacements
323     *
324     * Uses $this->entities
325     *
326     * @param string $entity
327     */
328    public function entity($entity) {
329        $this->cdata($entity);
330    }
331
332    /**
333     * Typographically format a multiply sign
334     *
335     * Example: ($x=640, $y=480) should result in "640×480"
336     *
337     * @param string|int $x first value
338     * @param string|int $y second value
339     */
340    public function multiplyentity($x, $y) {
341        $this->cdata($x.'×'.$y);
342    }
343
344    /**
345     * Render an opening single quote char (language specific)
346     */
347    public function singlequoteopening() {
348        global $lang;
349        $this->cdata($lang['singlequoteopening']);
350    }
351
352    /**
353     * Render a closing single quote char (language specific)
354     */
355    public function singlequoteclosing() {
356        global $lang;
357        $this->cdata($lang['singlequoteclosing']);
358    }
359
360    /**
361     * Render an apostrophe char (language specific)
362     */
363    public function apostrophe() {
364        global $lang;
365        $this->cdata($lang['apostrophe']);
366    }
367
368    /**
369     * Render an opening double quote char (language specific)
370     */
371    public function doublequoteopening() {
372        global $lang;
373        $this->cdata($lang['doublequoteopening']);
374    }
375
376    /**
377     * Render an closinging double quote char (language specific)
378     */
379    public function doublequoteclosing() {
380        global $lang;
381        $this->cdata($lang['doublequoteclosing']);
382    }
383
384    /**
385     * Render a CamelCase link
386     *
387     * @param string $link The link name
388     * @see http://en.wikipedia.org/wiki/CamelCase
389     */
390    public function camelcaselink($link) {
391        $this->internallink($link, $link);
392    }
393
394    /**
395     * Render a page local link
396     *
397     * @param string $hash hash link identifier
398     * @param string $name name for the link
399     */
400    public function locallink($hash, $name = null) {
401        if(is_array($name)) {
402            $this->_firstimage($name['src']);
403            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
404        }
405    }
406
407    /**
408     * keep track of internal links in $this->meta['relation']['references']
409     *
410     * @param string            $id   page ID to link to. eg. 'wiki:syntax'
411     * @param string|array|null $name name for the link, array for media file
412     */
413    public function internallink($id, $name = null) {
414        global $ID;
415
416        if(is_array($name)) {
417            $this->_firstimage($name['src']);
418            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
419        }
420
421        $parts = explode('?', $id, 2);
422        if(count($parts) === 2) {
423            $id = $parts[0];
424        }
425
426        $default = $this->_simpleTitle($id);
427
428        // first resolve and clean up the $id
429        resolve_pageid(getNS($ID), $id, $exists);
430        @list($page) = explode('#', $id, 2);
431
432        // set metadata
433        $this->meta['relation']['references'][$page] = $exists;
434        // $data = array('relation' => array('isreferencedby' => array($ID => true)));
435        // p_set_metadata($id, $data);
436
437        // add link title to summary
438        if($this->capture) {
439            $name = $this->_getLinkTitle($name, $default, $id);
440            $this->doc .= $name;
441        }
442    }
443
444    /**
445     * Render an external link
446     *
447     * @param string            $url  full URL with scheme
448     * @param string|array|null $name name for the link, array for media file
449     */
450    public function externallink($url, $name = null) {
451        if(is_array($name)) {
452            $this->_firstimage($name['src']);
453            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
454        }
455
456        if($this->capture) {
457            $this->doc .= $this->_getLinkTitle($name, '<'.$url.'>');
458        }
459    }
460
461    /**
462     * Render an interwiki link
463     *
464     * You may want to use $this->_resolveInterWiki() here
465     *
466     * @param string       $match     original link - probably not much use
467     * @param string|array $name      name for the link, array for media file
468     * @param string       $wikiName  indentifier (shortcut) for the remote wiki
469     * @param string       $wikiUri   the fragment parsed from the original link
470     */
471    public function interwikilink($match, $name, $wikiName, $wikiUri) {
472        if(is_array($name)) {
473            $this->_firstimage($name['src']);
474            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
475        }
476
477        if($this->capture) {
478            list($wikiUri) = explode('#', $wikiUri, 2);
479            $name = $this->_getLinkTitle($name, $wikiUri);
480            $this->doc .= $name;
481        }
482    }
483
484    /**
485     * Link to windows share
486     *
487     * @param string       $url  the link
488     * @param string|array $name name for the link, array for media file
489     */
490    public function windowssharelink($url, $name = null) {
491        if(is_array($name)) {
492            $this->_firstimage($name['src']);
493            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
494        }
495
496        if($this->capture) {
497            if($name) $this->doc .= $name;
498            else $this->doc .= '<'.$url.'>';
499        }
500    }
501
502    /**
503     * Render a linked E-Mail Address
504     *
505     * Should honor $conf['mailguard'] setting
506     *
507     * @param string       $address Email-Address
508     * @param string|array $name    name for the link, array for media file
509     */
510    public function emaillink($address, $name = null) {
511        if(is_array($name)) {
512            $this->_firstimage($name['src']);
513            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
514        }
515
516        if($this->capture) {
517            if($name) $this->doc .= $name;
518            else $this->doc .= '<'.$address.'>';
519        }
520    }
521
522    /**
523     * Render an internal media file
524     *
525     * @param string $src     media ID
526     * @param string $title   descriptive text
527     * @param string $align   left|center|right
528     * @param int    $width   width of media in pixel
529     * @param int    $height  height of media in pixel
530     * @param string $cache   cache|recache|nocache
531     * @param string $linking linkonly|detail|nolink
532     */
533    public function internalmedia($src, $title = null, $align = null, $width = null,
534                           $height = null, $cache = null, $linking = null) {
535        if($this->capture && $title) $this->doc .= '['.$title.']';
536        $this->_firstimage($src);
537        $this->_recordMediaUsage($src);
538    }
539
540    /**
541     * Render an external media file
542     *
543     * @param string $src     full media URL
544     * @param string $title   descriptive text
545     * @param string $align   left|center|right
546     * @param int    $width   width of media in pixel
547     * @param int    $height  height of media in pixel
548     * @param string $cache   cache|recache|nocache
549     * @param string $linking linkonly|detail|nolink
550     */
551    public function externalmedia($src, $title = null, $align = null, $width = null,
552                           $height = null, $cache = null, $linking = null) {
553        if($this->capture && $title) $this->doc .= '['.$title.']';
554        $this->_firstimage($src);
555    }
556
557    /**
558     * Render the output of an RSS feed
559     *
560     * @param string $url    URL of the feed
561     * @param array  $params Finetuning of the output
562     */
563    public function rss($url, $params) {
564        $this->meta['relation']['haspart'][$url] = true;
565
566        $this->meta['date']['valid']['age'] =
567            isset($this->meta['date']['valid']['age']) ?
568                min($this->meta['date']['valid']['age'], $params['refresh']) :
569                $params['refresh'];
570    }
571
572    #region Utils
573
574    /**
575     * Removes any Namespace from the given name but keeps
576     * casing and special chars
577     *
578     * @author Andreas Gohr <andi@splitbrain.org>
579     *
580     * @param string $name
581     *
582     * @return mixed|string
583     */
584    public function _simpleTitle($name) {
585        global $conf;
586
587        if(is_array($name)) return '';
588
589        if($conf['useslash']) {
590            $nssep = '[:;/]';
591        } else {
592            $nssep = '[:;]';
593        }
594        $name = preg_replace('!.*'.$nssep.'!', '', $name);
595        //if there is a hash we use the anchor name only
596        $name = preg_replace('!.*#!', '', $name);
597        return $name;
598    }
599
600    /**
601     * Construct a title and handle images in titles
602     *
603     * @author Harry Fuecks <hfuecks@gmail.com>
604     * @param string|array|null $title    either string title or media array
605     * @param string            $default  default title if nothing else is found
606     * @param null|string       $id       linked page id (used to extract title from first heading)
607     * @return string title text
608     */
609    public function _getLinkTitle($title, $default, $id = null) {
610        if(is_array($title)) {
611            if($title['title']) {
612                return '['.$title['title'].']';
613            } else {
614                return $default;
615            }
616        } else if(is_null($title) || trim($title) == '') {
617            if(useHeading('content') && $id) {
618                $heading = p_get_first_heading($id, METADATA_DONT_RENDER);
619                if($heading) return $heading;
620            }
621            return $default;
622        } else {
623            return $title;
624        }
625    }
626
627    /**
628     * Remember first image
629     *
630     * @param string $src image URL or ID
631     */
632    protected function _firstimage($src) {
633        if($this->firstimage) return;
634        global $ID;
635
636        list($src) = explode('#', $src, 2);
637        if(!media_isexternal($src)) {
638            resolve_mediaid(getNS($ID), $src, $exists);
639        }
640        if(preg_match('/.(jpe?g|gif|png)$/i', $src)) {
641            $this->firstimage = $src;
642        }
643    }
644
645    /**
646     * Store list of used media files in metadata
647     *
648     * @param string $src media ID
649     */
650    protected function _recordMediaUsage($src) {
651        global $ID;
652
653        list ($src) = explode('#', $src, 2);
654        if(media_isexternal($src)) return;
655        resolve_mediaid(getNS($ID), $src, $exists);
656        $this->meta['relation']['media'][$src] = $exists;
657    }
658
659    #endregion
660}
661
662//Setup VIM: ex: et ts=4 :
663