xref: /dokuwiki/inc/parser/metadata.php (revision 4809b2366a43391c8b26715aea8619206f0db515)
1<?php
2/**
3 * Renderer for metadata
4 *
5 * @author Esther Brunner <wikidesign@gmail.com>
6 */
7if(!defined('DOKU_INC')) die('meh.');
8
9if(!defined('DOKU_LF')) {
10    // Some whitespace to help View > Source
11    define ('DOKU_LF', "\n");
12}
13
14if(!defined('DOKU_TAB')) {
15    // Some whitespace to help View > Source
16    define ('DOKU_TAB', "\t");
17}
18
19/**
20 * The MetaData Renderer
21 *
22 * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content
23 * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and
24 * $persistent.
25 *
26 * Some simplified rendering to $doc is done to gather the page's (text-only) abstract.
27 */
28class Doku_Renderer_metadata extends Doku_Renderer {
29    /** the approximate byte lenght to capture for the abstract */
30    const ABSTRACT_LEN = 250;
31
32    /** the maximum UTF8 character length for the abstract */
33    const ABSTRACT_MAX = 500;
34
35    /** @var array transient meta data, will be reset on each rendering */
36    public $meta = array();
37
38    /** @var array persistent meta data, will be kept until explicitly deleted */
39    public $persistent = array();
40
41    /** @var array the list of headers used to create unique link ids */
42    protected $headers = array();
43
44    /** @var string temporary $doc store */
45    protected $store = '';
46
47    /** @var string keeps the first image reference */
48    protected $firstimage = '';
49
50    /** @var bool whether or not data is being captured for the abstract, public to be accessible by plugins */
51    public $capturing = true;
52
53    /** @var bool determines if enough data for the abstract was collected, yet */
54    public $capture = true;
55
56    /** @var int number of bytes captured for abstract */
57    protected $captured = 0;
58
59    /**
60     * Returns the format produced by this renderer.
61     *
62     * @return string always 'metadata'
63     */
64    function getFormat() {
65        return 'metadata';
66    }
67
68    /**
69     * Initialize the document
70     *
71     * Sets up some of the persistent info about the page if it doesn't exist, yet.
72     */
73    function document_start() {
74        global $ID;
75
76        $this->headers = array();
77
78        // external pages are missing create date
79        if(!$this->persistent['date']['created']) {
80            $this->persistent['date']['created'] = filectime(wikiFN($ID));
81        }
82        if(!isset($this->persistent['user'])) {
83            $this->persistent['user'] = '';
84        }
85        if(!isset($this->persistent['creator'])) {
86            $this->persistent['creator'] = '';
87        }
88        // reset metadata to persistent values
89        $this->meta = $this->persistent;
90    }
91
92    /**
93     * Finalize the document
94     *
95     * Stores collected data in the metadata
96     */
97    function document_end() {
98        global $ID;
99
100        // store internal info in metadata (notoc,nocache)
101        $this->meta['internal'] = $this->info;
102
103        if(!isset($this->meta['description']['abstract'])) {
104            // cut off too long abstracts
105            $this->doc = trim($this->doc);
106            if(strlen($this->doc) > self::ABSTRACT_MAX) {
107                $this->doc = utf8_substr($this->doc, 0, self::ABSTRACT_MAX).'…';
108            }
109            $this->meta['description']['abstract'] = $this->doc;
110        }
111
112        $this->meta['relation']['firstimage'] = $this->firstimage;
113
114        if(!isset($this->meta['date']['modified'])) {
115            $this->meta['date']['modified'] = filemtime(wikiFN($ID));
116        }
117
118    }
119
120    /**
121     * Render plain text data
122     *
123     * This function takes care of the amount captured data and will stop capturing when
124     * enough abstract data is available
125     *
126     * @param $text
127     */
128    function cdata($text) {
129        if(!$this->capture || !$this->capturing) return;
130
131        $this->doc .= $text;
132
133        $this->captured += strlen($text);
134        if($this->captured > self::ABSTRACT_LEN) $this->capture = false;
135    }
136
137    /**
138     * Add an item to the TOC
139     *
140     * @param string $id       the hash link
141     * @param string $text     the text to display
142     * @param int    $level    the nesting level
143     */
144    function toc_additem($id, $text, $level) {
145        global $conf;
146
147        //only add items within configured levels
148        if($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) {
149            // the TOC is one of our standard ul list arrays ;-)
150            $this->meta['description']['tableofcontents'][] = array(
151                'hid'   => $id,
152                'title' => $text,
153                'type'  => 'ul',
154                'level' => $level - $conf['toptoclevel'] + 1
155            );
156        }
157
158    }
159
160    /**
161     * Render a heading
162     *
163     * @param string $text  the text to display
164     * @param int    $level header level
165     * @param int    $pos   byte position in the original source
166     */
167    function header($text, $level, $pos) {
168        if(!isset($this->meta['title'])) $this->meta['title'] = $text;
169
170        // add the header to the TOC
171        $hid = $this->_headerToLink($text, true);
172        $this->toc_additem($hid, $text, $level);
173
174        // add to summary
175        $this->cdata(DOKU_LF.$text.DOKU_LF);
176    }
177
178    /**
179     * Open a paragraph
180     */
181    function p_open() {
182        $this->cdata(DOKU_LF);
183    }
184
185    /**
186     * Close a paragraph
187     */
188    function p_close() {
189        $this->cdata(DOKU_LF);
190    }
191
192    /**
193     * Create a line break
194     */
195    function linebreak() {
196        $this->cdata(DOKU_LF);
197    }
198
199    /**
200     * Create a horizontal line
201     */
202    function hr() {
203        $this->cdata(DOKU_LF.'----------'.DOKU_LF);
204    }
205
206    /**
207     * Callback for footnote start syntax
208     *
209     * All following content will go to the footnote instead of
210     * the document. To achieve this the previous rendered content
211     * is moved to $store and $doc is cleared
212     *
213     * @author Andreas Gohr <andi@splitbrain.org>
214     */
215    function footnote_open() {
216        if($this->capture) {
217            // move current content to store
218            // this is required to ensure safe behaviour of plugins accessed within footnotes
219            $this->store = $this->doc;
220            $this->doc   = '';
221
222            // disable capturing
223            $this->capturing = false;
224        }
225    }
226
227    /**
228     * Callback for footnote end syntax
229     *
230     * All content rendered whilst within footnote syntax mode is discarded,
231     * the previously rendered content is restored and capturing is re-enabled.
232     *
233     * @author Andreas Gohr
234     */
235    function footnote_close() {
236        if($this->capture) {
237            // re-enable capturing
238            $this->capturing = true;
239            // restore previously rendered content
240            $this->doc   = $this->store;
241            $this->store = '';
242        }
243    }
244
245    /**
246     * Open an unordered list
247     */
248    function listu_open() {
249        $this->cdata(DOKU_LF);
250    }
251
252    /**
253     * Open an ordered list
254     */
255    function listo_open() {
256        $this->cdata(DOKU_LF);
257    }
258
259    /**
260     * Open a list item
261     *
262     * @param int $level the nesting level
263     * @param bool $node true when a node; false when a leaf
264     */
265    function listitem_open($level,$node=false) {
266        $this->cdata(str_repeat(DOKU_TAB, $level).'* ');
267    }
268
269    /**
270     * Close a list item
271     */
272    function listitem_close() {
273        $this->cdata(DOKU_LF);
274    }
275
276    /**
277     * Output preformatted text
278     *
279     * @param string $text
280     */
281    function preformatted($text) {
282        $this->cdata($text);
283    }
284
285    /**
286     * Start a block quote
287     */
288    function quote_open() {
289        $this->cdata(DOKU_LF.DOKU_TAB.'"');
290    }
291
292    /**
293     * Stop a block quote
294     */
295    function quote_close() {
296        $this->cdata('"'.DOKU_LF);
297    }
298
299    /**
300     * Display text as file content, optionally syntax highlighted
301     *
302     * @param string $text text to show
303     * @param string $lang programming language to use for syntax highlighting
304     * @param string $file file path label
305     */
306    function file($text, $lang = null, $file = null) {
307        $this->cdata(DOKU_LF.$text.DOKU_LF);
308    }
309
310    /**
311     * Display text as code content, optionally syntax highlighted
312     *
313     * @param string $text     text to show
314     * @param string $language programming language to use for syntax highlighting
315     * @param string $file     file path label
316     */
317    function code($text, $language = null, $file = null) {
318        $this->cdata(DOKU_LF.$text.DOKU_LF);
319    }
320
321    /**
322     * Format an acronym
323     *
324     * Uses $this->acronyms
325     *
326     * @param string $acronym
327     */
328    function acronym($acronym) {
329        $this->cdata($acronym);
330    }
331
332    /**
333     * Format a smiley
334     *
335     * Uses $this->smiley
336     *
337     * @param string $smiley
338     */
339    function smiley($smiley) {
340        $this->cdata($smiley);
341    }
342
343    /**
344     * Format an entity
345     *
346     * Entities are basically small text replacements
347     *
348     * Uses $this->entities
349     *
350     * @param string $entity
351     */
352    function entity($entity) {
353        $this->cdata($entity);
354    }
355
356    /**
357     * Typographically format a multiply sign
358     *
359     * Example: ($x=640, $y=480) should result in "640×480"
360     *
361     * @param string|int $x first value
362     * @param string|int $y second value
363     */
364    function multiplyentity($x, $y) {
365        $this->cdata($x.'×'.$y);
366    }
367
368    /**
369     * Render an opening single quote char (language specific)
370     */
371    function singlequoteopening() {
372        global $lang;
373        $this->cdata($lang['singlequoteopening']);
374    }
375
376    /**
377     * Render a closing single quote char (language specific)
378     */
379    function singlequoteclosing() {
380        global $lang;
381        $this->cdata($lang['singlequoteclosing']);
382    }
383
384    /**
385     * Render an apostrophe char (language specific)
386     */
387    function apostrophe() {
388        global $lang;
389        $this->cdata($lang['apostrophe']);
390    }
391
392    /**
393     * Render an opening double quote char (language specific)
394     */
395    function doublequoteopening() {
396        global $lang;
397        $this->cdata($lang['doublequoteopening']);
398    }
399
400    /**
401     * Render an closinging double quote char (language specific)
402     */
403    function doublequoteclosing() {
404        global $lang;
405        $this->cdata($lang['doublequoteclosing']);
406    }
407
408    /**
409     * Render a CamelCase link
410     *
411     * @param string $link The link name
412     * @see http://en.wikipedia.org/wiki/CamelCase
413     */
414    function camelcaselink($link) {
415        $this->internallink($link, $link);
416    }
417
418    /**
419     * Render a page local link
420     *
421     * @param string $hash hash link identifier
422     * @param string $name name for the link
423     */
424    function locallink($hash, $name = null) {
425        if(is_array($name)) {
426            $this->_firstimage($name['src']);
427            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
428        }
429    }
430
431    /**
432     * keep track of internal links in $this->meta['relation']['references']
433     *
434     * @param string            $id   page ID to link to. eg. 'wiki:syntax'
435     * @param string|array|null $name name for the link, array for media file
436     */
437    function internallink($id, $name = null) {
438        global $ID;
439
440        if(is_array($name)) {
441            $this->_firstimage($name['src']);
442            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
443        }
444
445        $parts = explode('?', $id, 2);
446        if(count($parts) === 2) {
447            $id = $parts[0];
448        }
449
450        $default = $this->_simpleTitle($id);
451
452        // first resolve and clean up the $id
453        resolve_pageid(getNS($ID), $id, $exists);
454        @list($page) = explode('#', $id, 2);
455
456        // set metadata
457        $this->meta['relation']['references'][$page] = $exists;
458        // $data = array('relation' => array('isreferencedby' => array($ID => true)));
459        // p_set_metadata($id, $data);
460
461        // add link title to summary
462        if($this->capture) {
463            $name = $this->_getLinkTitle($name, $default, $id);
464            $this->doc .= $name;
465        }
466    }
467
468    /**
469     * Render an external link
470     *
471     * @param string            $url  full URL with scheme
472     * @param string|array|null $name name for the link, array for media file
473     */
474    function externallink($url, $name = null) {
475        if(is_array($name)) {
476            $this->_firstimage($name['src']);
477            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
478        }
479
480        if($this->capture) {
481            $this->doc .= $this->_getLinkTitle($name, '<'.$url.'>');
482        }
483    }
484
485    /**
486     * Render an interwiki link
487     *
488     * You may want to use $this->_resolveInterWiki() here
489     *
490     * @param string       $match     original link - probably not much use
491     * @param string|array $name      name for the link, array for media file
492     * @param string       $wikiName  indentifier (shortcut) for the remote wiki
493     * @param string       $wikiUri   the fragment parsed from the original link
494     */
495    function interwikilink($match, $name = null, $wikiName, $wikiUri) {
496        if(is_array($name)) {
497            $this->_firstimage($name['src']);
498            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
499        }
500
501        if($this->capture) {
502            list($wikiUri) = explode('#', $wikiUri, 2);
503            $name = $this->_getLinkTitle($name, $wikiUri);
504            $this->doc .= $name;
505        }
506    }
507
508    /**
509     * Link to windows share
510     *
511     * @param string       $url  the link
512     * @param string|array $name name for the link, array for media file
513     */
514    function windowssharelink($url, $name = null) {
515        if(is_array($name)) {
516            $this->_firstimage($name['src']);
517            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
518        }
519
520        if($this->capture) {
521            if($name) $this->doc .= $name;
522            else $this->doc .= '<'.$url.'>';
523        }
524    }
525
526    /**
527     * Render a linked E-Mail Address
528     *
529     * Should honor $conf['mailguard'] setting
530     *
531     * @param string       $address Email-Address
532     * @param string|array $name    name for the link, array for media file
533     */
534    function emaillink($address, $name = null) {
535        if(is_array($name)) {
536            $this->_firstimage($name['src']);
537            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
538        }
539
540        if($this->capture) {
541            if($name) $this->doc .= $name;
542            else $this->doc .= '<'.$address.'>';
543        }
544    }
545
546    /**
547     * Render an internal media file
548     *
549     * @param string $src     media ID
550     * @param string $title   descriptive text
551     * @param string $align   left|center|right
552     * @param int    $width   width of media in pixel
553     * @param int    $height  height of media in pixel
554     * @param string $cache   cache|recache|nocache
555     * @param string $linking linkonly|detail|nolink
556     */
557    function internalmedia($src, $title = null, $align = null, $width = null,
558                           $height = null, $cache = null, $linking = null) {
559        if($this->capture && $title) $this->doc .= '['.$title.']';
560        $this->_firstimage($src);
561        $this->_recordMediaUsage($src);
562    }
563
564    /**
565     * Render an external media file
566     *
567     * @param string $src     full media URL
568     * @param string $title   descriptive text
569     * @param string $align   left|center|right
570     * @param int    $width   width of media in pixel
571     * @param int    $height  height of media in pixel
572     * @param string $cache   cache|recache|nocache
573     * @param string $linking linkonly|detail|nolink
574     */
575    function externalmedia($src, $title = null, $align = null, $width = null,
576                           $height = null, $cache = null, $linking = null) {
577        if($this->capture && $title) $this->doc .= '['.$title.']';
578        $this->_firstimage($src);
579    }
580
581    /**
582     * Render the output of an RSS feed
583     *
584     * @param string $url    URL of the feed
585     * @param array  $params Finetuning of the output
586     */
587    function rss($url, $params) {
588        $this->meta['relation']['haspart'][$url] = true;
589
590        $this->meta['date']['valid']['age'] =
591            isset($this->meta['date']['valid']['age']) ?
592                min($this->meta['date']['valid']['age'], $params['refresh']) :
593                $params['refresh'];
594    }
595
596    #region Utils
597
598    /**
599     * Removes any Namespace from the given name but keeps
600     * casing and special chars
601     *
602     * @author Andreas Gohr <andi@splitbrain.org>
603     *
604     * @param string $name
605     *
606     * @return mixed|string
607     */
608    function _simpleTitle($name) {
609        global $conf;
610
611        if(is_array($name)) return '';
612
613        if($conf['useslash']) {
614            $nssep = '[:;/]';
615        } else {
616            $nssep = '[:;]';
617        }
618        $name = preg_replace('!.*'.$nssep.'!', '', $name);
619        //if there is a hash we use the anchor name only
620        $name = preg_replace('!.*#!', '', $name);
621        return $name;
622    }
623
624    /**
625     * Creates a linkid from a headline
626     *
627     * @author Andreas Gohr <andi@splitbrain.org>
628     * @param string  $title   The headline title
629     * @param boolean $create  Create a new unique ID?
630     * @return string
631     */
632    function _headerToLink($title, $create = false) {
633        if($create) {
634            return sectionID($title, $this->headers);
635        } else {
636            $check = false;
637            return sectionID($title, $check);
638        }
639    }
640
641    /**
642     * Construct a title and handle images in titles
643     *
644     * @author Harry Fuecks <hfuecks@gmail.com>
645     * @param string|array|null $title    either string title or media array
646     * @param string            $default  default title if nothing else is found
647     * @param null|string       $id       linked page id (used to extract title from first heading)
648     * @return string title text
649     */
650    function _getLinkTitle($title, $default, $id = null) {
651        if(is_array($title)) {
652            if($title['title']) {
653                return '['.$title['title'].']';
654            } else {
655                return $default;
656            }
657        } else if(is_null($title) || trim($title) == '') {
658            if(useHeading('content') && $id) {
659                $heading = p_get_first_heading($id, METADATA_DONT_RENDER);
660                if($heading) return $heading;
661            }
662            return $default;
663        } else {
664            return $title;
665        }
666    }
667
668    /**
669     * Remember first image
670     *
671     * @param string $src image URL or ID
672     */
673    function _firstimage($src) {
674        if($this->firstimage) return;
675        global $ID;
676
677        list($src) = explode('#', $src, 2);
678        if(!media_isexternal($src)) {
679            resolve_mediaid(getNS($ID), $src, $exists);
680        }
681        if(preg_match('/.(jpe?g|gif|png)$/i', $src)) {
682            $this->firstimage = $src;
683        }
684    }
685
686    /**
687     * Store list of used media files in metadata
688     *
689     * @param string $src media ID
690     */
691    function _recordMediaUsage($src) {
692        global $ID;
693
694        list ($src) = explode('#', $src, 2);
695        if(media_isexternal($src)) return;
696        resolve_mediaid(getNS($ID), $src, $exists);
697        $this->meta['relation']['media'][$src] = $exists;
698    }
699
700    #endregion
701}
702
703//Setup VIM: ex: et ts=4 :
704