xref: /dokuwiki/inc/parser/metadata.php (revision c0f9e7c3a6878cbe728d03960a6ea6b1312a64ec)
1<?php
2/**
3 * Renderer for metadata
4 *
5 * @author Esther Brunner <wikidesign@gmail.com>
6 */
7if(!defined('DOKU_INC')) die('meh.');
8
9if(!defined('DOKU_LF')) {
10    // Some whitespace to help View > Source
11    define ('DOKU_LF', "\n");
12}
13
14if(!defined('DOKU_TAB')) {
15    // Some whitespace to help View > Source
16    define ('DOKU_TAB', "\t");
17}
18
19/**
20 * The MetaData Renderer
21 *
22 * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content
23 * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and
24 * $persistent.
25 *
26 * Some simplified rendering to $doc is done to gather the page's (text-only) abstract.
27 */
28class Doku_Renderer_metadata extends Doku_Renderer {
29    /** the approximate byte lenght to capture for the abstract */
30    const ABSTRACT_LEN = 250;
31
32    /** the maximum UTF8 character length for the abstract */
33    const ABSTRACT_MAX = 500;
34
35    /** @var array transient meta data, will be reset on each rendering */
36    public $meta = array();
37
38    /** @var array persistent meta data, will be kept until explicitly deleted */
39    public $persistent = array();
40
41    /** @var array the list of headers used to create unique link ids */
42    protected $headers = array();
43
44    /** @var string temporary $doc store */
45    protected $store = '';
46
47    /** @var string keeps the first image reference */
48    protected $firstimage = '';
49
50    /** @var bool determines if enough data for the abstract was collected, yet */
51    public $capture = true;
52
53    /** @var int number of bytes captured for abstract */
54    protected $captured = 0;
55
56    /**
57     * Returns the format produced by this renderer.
58     *
59     * @return string always 'metadata'
60     */
61    function getFormat() {
62        return 'metadata';
63    }
64
65    /**
66     * Initialize the document
67     *
68     * Sets up some of the persistent info about the page if it doesn't exist, yet.
69     */
70    function document_start() {
71        global $ID;
72
73        $this->headers = array();
74
75        // external pages are missing create date
76        if(!$this->persistent['date']['created']) {
77            $this->persistent['date']['created'] = filectime(wikiFN($ID));
78        }
79        if(!isset($this->persistent['user'])) {
80            $this->persistent['user'] = '';
81        }
82        if(!isset($this->persistent['creator'])) {
83            $this->persistent['creator'] = '';
84        }
85        // reset metadata to persistent values
86        $this->meta = $this->persistent;
87    }
88
89    /**
90     * Finalize the document
91     *
92     * Stores collected data in the metadata
93     */
94    function document_end() {
95        global $ID;
96
97        // store internal info in metadata (notoc,nocache)
98        $this->meta['internal'] = $this->info;
99
100        if(!isset($this->meta['description']['abstract'])) {
101            // cut off too long abstracts
102            $this->doc = trim($this->doc);
103            if(strlen($this->doc) > self::ABSTRACT_MAX) {
104                $this->doc = utf8_substr($this->doc, 0, self::ABSTRACT_MAX).'…';
105            }
106            $this->meta['description']['abstract'] = $this->doc;
107        }
108
109        $this->meta['relation']['firstimage'] = $this->firstimage;
110
111        if(!isset($this->meta['date']['modified'])) {
112            $this->meta['date']['modified'] = filemtime(wikiFN($ID));
113        }
114
115    }
116
117    /**
118     * Render plain text data
119     *
120     * This function takes care of the amount captured data and will stop capturing when
121     * enough abstract data is available
122     *
123     * @param $text
124     */
125    function cdata($text) {
126        if(!$this->capture) return;
127
128        $this->doc .= $text;
129
130        $this->captured += strlen($text);
131        if($this->captured > self::ABSTRACT_LEN) $this->capture = false;
132    }
133
134    /**
135     * Add an item to the TOC
136     *
137     * @param string $id       the hash link
138     * @param string $text     the text to display
139     * @param int    $level    the nesting level
140     */
141    function toc_additem($id, $text, $level) {
142        global $conf;
143
144        //only add items within configured levels
145        if($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) {
146            // the TOC is one of our standard ul list arrays ;-)
147            $this->meta['description']['tableofcontents'][] = array(
148                'hid'   => $id,
149                'title' => $text,
150                'type'  => 'ul',
151                'level' => $level - $conf['toptoclevel'] + 1
152            );
153        }
154
155    }
156
157    /**
158     * Render a heading
159     *
160     * @param string $text  the text to display
161     * @param int    $level header level
162     * @param int    $pos   byte position in the original source
163     */
164    function header($text, $level, $pos) {
165        if(!isset($this->meta['title'])) $this->meta['title'] = $text;
166
167        // add the header to the TOC
168        $hid = $this->_headerToLink($text, true);
169        $this->toc_additem($hid, $text, $level);
170
171        // add to summary
172        $this->cdata(DOKU_LF.$text.DOKU_LF);
173    }
174
175    /**
176     * Open a paragraph
177     */
178    function p_open() {
179        $this->cdata(DOKU_LF);
180    }
181
182    /**
183     * Close a paragraph
184     */
185    function p_close() {
186        $this->cdata(DOKU_LF);
187    }
188
189    /**
190     * Create a line break
191     */
192    function linebreak() {
193        $this->cdata(DOKU_LF);
194    }
195
196    /**
197     * Create a horizontal line
198     */
199    function hr() {
200        $this->cdata(DOKU_LF.'----------'.DOKU_LF);
201    }
202
203    /**
204     * Callback for footnote start syntax
205     *
206     * All following content will go to the footnote instead of
207     * the document. To achieve this the previous rendered content
208     * is moved to $store and $doc is cleared
209     *
210     * @author Andreas Gohr <andi@splitbrain.org>
211     */
212    function footnote_open() {
213        if($this->capture) {
214            // move current content to store and record footnote
215            $this->store = $this->doc;
216            $this->doc   = '';
217        }
218    }
219
220    /**
221     * Callback for footnote end syntax
222     *
223     * All rendered content is moved to the $footnotes array and the old
224     * content is restored from $store again
225     *
226     * @author Andreas Gohr
227     */
228    function footnote_close() {
229        if($this->capture) {
230            // restore old content
231            $this->doc   = $this->store;
232            $this->store = '';
233        }
234    }
235
236    /**
237     * Open an unordered list
238     */
239    function listu_open() {
240        $this->cdata(DOKU_LF);
241    }
242
243    /**
244     * Open an ordered list
245     */
246    function listo_open() {
247        $this->cdata(DOKU_LF);
248    }
249
250    /**
251     * Open a list item
252     *
253     * @param int $level the nesting level
254     * @param bool $node true when a node; false when a leaf
255     */
256    function listitem_open($level,$node=false) {
257        $this->cdata(str_repeat(DOKU_TAB, $level).'* ');
258    }
259
260    /**
261     * Close a list item
262     */
263    function listitem_close() {
264        $this->cdata(DOKU_LF);
265    }
266
267    /**
268     * Output preformatted text
269     *
270     * @param string $text
271     */
272    function preformatted($text) {
273        $this->cdata($text);
274    }
275
276    /**
277     * Start a block quote
278     */
279    function quote_open() {
280        $this->cdata(DOKU_LF.DOKU_TAB.'"');
281    }
282
283    /**
284     * Stop a block quote
285     */
286    function quote_close() {
287        $this->cdata('"'.DOKU_LF);
288    }
289
290    /**
291     * Display text as file content, optionally syntax highlighted
292     *
293     * @param string $text text to show
294     * @param string $lang programming language to use for syntax highlighting
295     * @param string $file file path label
296     */
297    function file($text, $lang = null, $file = null) {
298        $this->cdata(DOKU_LF.$text.DOKU_LF);
299    }
300
301    /**
302     * Display text as code content, optionally syntax highlighted
303     *
304     * @param string $text     text to show
305     * @param string $language programming language to use for syntax highlighting
306     * @param string $file     file path label
307     */
308    function code($text, $language = null, $file = null) {
309        $this->cdata(DOKU_LF.$text.DOKU_LF);
310    }
311
312    /**
313     * Format an acronym
314     *
315     * Uses $this->acronyms
316     *
317     * @param string $acronym
318     */
319    function acronym($acronym) {
320        $this->cdata($acronym);
321    }
322
323    /**
324     * Format a smiley
325     *
326     * Uses $this->smiley
327     *
328     * @param string $smiley
329     */
330    function smiley($smiley) {
331        $this->cdata($smiley);
332    }
333
334    /**
335     * Format an entity
336     *
337     * Entities are basically small text replacements
338     *
339     * Uses $this->entities
340     *
341     * @param string $entity
342     */
343    function entity($entity) {
344        $this->cdata($entity);
345    }
346
347    /**
348     * Typographically format a multiply sign
349     *
350     * Example: ($x=640, $y=480) should result in "640×480"
351     *
352     * @param string|int $x first value
353     * @param string|int $y second value
354     */
355    function multiplyentity($x, $y) {
356        $this->cdata($x.'×'.$y);
357    }
358
359    /**
360     * Render an opening single quote char (language specific)
361     */
362    function singlequoteopening() {
363        global $lang;
364        $this->cdata($lang['singlequoteopening']);
365    }
366
367    /**
368     * Render a closing single quote char (language specific)
369     */
370    function singlequoteclosing() {
371        global $lang;
372        $this->cdata($lang['singlequoteclosing']);
373    }
374
375    /**
376     * Render an apostrophe char (language specific)
377     */
378    function apostrophe() {
379        global $lang;
380        $this->cdata($lang['apostrophe']);
381    }
382
383    /**
384     * Render an opening double quote char (language specific)
385     */
386    function doublequoteopening() {
387        global $lang;
388        $this->cdata($lang['doublequoteopening']);
389    }
390
391    /**
392     * Render an closinging double quote char (language specific)
393     */
394    function doublequoteclosing() {
395        global $lang;
396        $this->cdata($lang['doublequoteclosing']);
397    }
398
399    /**
400     * Render a CamelCase link
401     *
402     * @param string $link The link name
403     * @see http://en.wikipedia.org/wiki/CamelCase
404     */
405    function camelcaselink($link) {
406        $this->internallink($link, $link);
407    }
408
409    /**
410     * Render a page local link
411     *
412     * @param string $hash hash link identifier
413     * @param string $name name for the link
414     */
415    function locallink($hash, $name = null) {
416        if(is_array($name)) {
417            $this->_firstimage($name['src']);
418            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
419        }
420    }
421
422    /**
423     * keep track of internal links in $this->meta['relation']['references']
424     *
425     * @param string            $id   page ID to link to. eg. 'wiki:syntax'
426     * @param string|array|null $name name for the link, array for media file
427     */
428    function internallink($id, $name = null) {
429        global $ID;
430
431        if(is_array($name)) {
432            $this->_firstimage($name['src']);
433            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
434        }
435
436        $parts = explode('?', $id, 2);
437        if(count($parts) === 2) {
438            $id = $parts[0];
439        }
440
441        $default = $this->_simpleTitle($id);
442
443        // first resolve and clean up the $id
444        resolve_pageid(getNS($ID), $id, $exists);
445        @list($page) = explode('#', $id, 2);
446
447        // set metadata
448        $this->meta['relation']['references'][$page] = $exists;
449        // $data = array('relation' => array('isreferencedby' => array($ID => true)));
450        // p_set_metadata($id, $data);
451
452        // add link title to summary
453        if($this->capture) {
454            $name = $this->_getLinkTitle($name, $default, $id);
455            $this->doc .= $name;
456        }
457    }
458
459    /**
460     * Render an external link
461     *
462     * @param string            $url  full URL with scheme
463     * @param string|array|null $name name for the link, array for media file
464     */
465    function externallink($url, $name = null) {
466        if(is_array($name)) {
467            $this->_firstimage($name['src']);
468            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
469        }
470
471        if($this->capture) {
472            $this->doc .= $this->_getLinkTitle($name, '<'.$url.'>');
473        }
474    }
475
476    /**
477     * Render an interwiki link
478     *
479     * You may want to use $this->_resolveInterWiki() here
480     *
481     * @param string       $match     original link - probably not much use
482     * @param string|array $name      name for the link, array for media file
483     * @param string       $wikiName  indentifier (shortcut) for the remote wiki
484     * @param string       $wikiUri   the fragment parsed from the original link
485     */
486    function interwikilink($match, $name = null, $wikiName, $wikiUri) {
487        if(is_array($name)) {
488            $this->_firstimage($name['src']);
489            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
490        }
491
492        if($this->capture) {
493            list($wikiUri) = explode('#', $wikiUri, 2);
494            $name = $this->_getLinkTitle($name, $wikiUri);
495            $this->doc .= $name;
496        }
497    }
498
499    /**
500     * Link to windows share
501     *
502     * @param string       $url  the link
503     * @param string|array $name name for the link, array for media file
504     */
505    function windowssharelink($url, $name = null) {
506        if(is_array($name)) {
507            $this->_firstimage($name['src']);
508            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
509        }
510
511        if($this->capture) {
512            if($name) $this->doc .= $name;
513            else $this->doc .= '<'.$url.'>';
514        }
515    }
516
517    /**
518     * Render a linked E-Mail Address
519     *
520     * Should honor $conf['mailguard'] setting
521     *
522     * @param string       $address Email-Address
523     * @param string|array $name    name for the link, array for media file
524     */
525    function emaillink($address, $name = null) {
526        if(is_array($name)) {
527            $this->_firstimage($name['src']);
528            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
529        }
530
531        if($this->capture) {
532            if($name) $this->doc .= $name;
533            else $this->doc .= '<'.$address.'>';
534        }
535    }
536
537    /**
538     * Render an internal media file
539     *
540     * @param string $src     media ID
541     * @param string $title   descriptive text
542     * @param string $align   left|center|right
543     * @param int    $width   width of media in pixel
544     * @param int    $height  height of media in pixel
545     * @param string $cache   cache|recache|nocache
546     * @param string $linking linkonly|detail|nolink
547     */
548    function internalmedia($src, $title = null, $align = null, $width = null,
549                           $height = null, $cache = null, $linking = null) {
550        if($this->capture && $title) $this->doc .= '['.$title.']';
551        $this->_firstimage($src);
552        $this->_recordMediaUsage($src);
553    }
554
555    /**
556     * Render an external media file
557     *
558     * @param string $src     full media URL
559     * @param string $title   descriptive text
560     * @param string $align   left|center|right
561     * @param int    $width   width of media in pixel
562     * @param int    $height  height of media in pixel
563     * @param string $cache   cache|recache|nocache
564     * @param string $linking linkonly|detail|nolink
565     */
566    function externalmedia($src, $title = null, $align = null, $width = null,
567                           $height = null, $cache = null, $linking = null) {
568        if($this->capture && $title) $this->doc .= '['.$title.']';
569        $this->_firstimage($src);
570    }
571
572    /**
573     * Render the output of an RSS feed
574     *
575     * @param string $url    URL of the feed
576     * @param array  $params Finetuning of the output
577     */
578    function rss($url, $params) {
579        $this->meta['relation']['haspart'][$url] = true;
580
581        $this->meta['date']['valid']['age'] =
582            isset($this->meta['date']['valid']['age']) ?
583                min($this->meta['date']['valid']['age'], $params['refresh']) :
584                $params['refresh'];
585    }
586
587    #region Utils
588
589    /**
590     * Removes any Namespace from the given name but keeps
591     * casing and special chars
592     *
593     * @author Andreas Gohr <andi@splitbrain.org>
594     *
595     * @param string $name
596     *
597     * @return mixed|string
598     */
599    function _simpleTitle($name) {
600        global $conf;
601
602        if(is_array($name)) return '';
603
604        if($conf['useslash']) {
605            $nssep = '[:;/]';
606        } else {
607            $nssep = '[:;]';
608        }
609        $name = preg_replace('!.*'.$nssep.'!', '', $name);
610        //if there is a hash we use the anchor name only
611        $name = preg_replace('!.*#!', '', $name);
612        return $name;
613    }
614
615    /**
616     * Creates a linkid from a headline
617     *
618     * @author Andreas Gohr <andi@splitbrain.org>
619     * @param string  $title   The headline title
620     * @param boolean $create  Create a new unique ID?
621     * @return string
622     */
623    function _headerToLink($title, $create = false) {
624        if($create) {
625            return sectionID($title, $this->headers);
626        } else {
627            $check = false;
628            return sectionID($title, $check);
629        }
630    }
631
632    /**
633     * Construct a title and handle images in titles
634     *
635     * @author Harry Fuecks <hfuecks@gmail.com>
636     * @param string|array|null $title    either string title or media array
637     * @param string            $default  default title if nothing else is found
638     * @param null|string       $id       linked page id (used to extract title from first heading)
639     * @return string title text
640     */
641    function _getLinkTitle($title, $default, $id = null) {
642        if(is_array($title)) {
643            if($title['title']) {
644                return '['.$title['title'].']';
645            } else {
646                return $default;
647            }
648        } else if(is_null($title) || trim($title) == '') {
649            if(useHeading('content') && $id) {
650                $heading = p_get_first_heading($id, METADATA_DONT_RENDER);
651                if($heading) return $heading;
652            }
653            return $default;
654        } else {
655            return $title;
656        }
657    }
658
659    /**
660     * Remember first image
661     *
662     * @param string $src image URL or ID
663     */
664    function _firstimage($src) {
665        if($this->firstimage) return;
666        global $ID;
667
668        list($src) = explode('#', $src, 2);
669        if(!media_isexternal($src)) {
670            resolve_mediaid(getNS($ID), $src, $exists);
671        }
672        if(preg_match('/.(jpe?g|gif|png)$/i', $src)) {
673            $this->firstimage = $src;
674        }
675    }
676
677    /**
678     * Store list of used media files in metadata
679     *
680     * @param string $src media ID
681     */
682    function _recordMediaUsage($src) {
683        global $ID;
684
685        list ($src) = explode('#', $src, 2);
686        if(media_isexternal($src)) return;
687        resolve_mediaid(getNS($ID), $src, $exists);
688        $this->meta['relation']['media'][$src] = $exists;
689    }
690
691    #endregion
692}
693
694//Setup VIM: ex: et ts=4 :
695