xref: /dokuwiki/inc/parser/metadata.php (revision 91109d52e565c2a87aeee0650c7248472e54713a)
1<?php
2/**
3 * The MetaData Renderer
4 *
5 * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content
6 * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and
7 * $persistent.
8 *
9 * Some simplified rendering to $doc is done to gather the page's (text-only) abstract.
10 *
11 * @author Esther Brunner <wikidesign@gmail.com>
12 */
13class Doku_Renderer_metadata extends Doku_Renderer {
14    /** the approximate byte lenght to capture for the abstract */
15    const ABSTRACT_LEN = 250;
16
17    /** the maximum UTF8 character length for the abstract */
18    const ABSTRACT_MAX = 500;
19
20    /** @var array transient meta data, will be reset on each rendering */
21    public $meta = array();
22
23    /** @var array persistent meta data, will be kept until explicitly deleted */
24    public $persistent = array();
25
26    /** @var array the list of headers used to create unique link ids */
27    protected $headers = array();
28
29    /** @var string temporary $doc store */
30    protected $store = '';
31
32    /** @var string keeps the first image reference */
33    protected $firstimage = '';
34
35    /** @var bool whether or not data is being captured for the abstract, public to be accessible by plugins */
36    public $capturing = true;
37
38    /** @var bool determines if enough data for the abstract was collected, yet */
39    public $capture = true;
40
41    /** @var int number of bytes captured for abstract */
42    protected $captured = 0;
43
44    /**
45     * Returns the format produced by this renderer.
46     *
47     * @return string always 'metadata'
48     */
49    public function getFormat() {
50        return 'metadata';
51    }
52
53    /**
54     * Initialize the document
55     *
56     * Sets up some of the persistent info about the page if it doesn't exist, yet.
57     */
58    public function document_start() {
59        global $ID;
60
61        $this->headers = array();
62
63        // external pages are missing create date
64        if(!$this->persistent['date']['created']) {
65            $this->persistent['date']['created'] = filectime(wikiFN($ID));
66        }
67        if(!isset($this->persistent['user'])) {
68            $this->persistent['user'] = '';
69        }
70        if(!isset($this->persistent['creator'])) {
71            $this->persistent['creator'] = '';
72        }
73        // reset metadata to persistent values
74        $this->meta = $this->persistent;
75    }
76
77    /**
78     * Finalize the document
79     *
80     * Stores collected data in the metadata
81     */
82    public function document_end() {
83        global $ID;
84
85        // store internal info in metadata (notoc,nocache)
86        $this->meta['internal'] = $this->info;
87
88        if(!isset($this->meta['description']['abstract'])) {
89            // cut off too long abstracts
90            $this->doc = trim($this->doc);
91            if(strlen($this->doc) > self::ABSTRACT_MAX) {
92                $this->doc = utf8_substr($this->doc, 0, self::ABSTRACT_MAX).'…';
93            }
94            $this->meta['description']['abstract'] = $this->doc;
95        }
96
97        $this->meta['relation']['firstimage'] = $this->firstimage;
98
99        if(!isset($this->meta['date']['modified'])) {
100            $this->meta['date']['modified'] = filemtime(wikiFN($ID));
101        }
102
103    }
104
105    /**
106     * Render plain text data
107     *
108     * This function takes care of the amount captured data and will stop capturing when
109     * enough abstract data is available
110     *
111     * @param $text
112     */
113    function cdata($text) {
114        if(!$this->capture || !$this->capturing) return;
115
116        $this->doc .= $text;
117
118        $this->captured += strlen($text);
119        if($this->captured > self::ABSTRACT_LEN) $this->capture = false;
120    }
121
122    /**
123     * Add an item to the TOC
124     *
125     * @param string $id       the hash link
126     * @param string $text     the text to display
127     * @param int    $level    the nesting level
128     */
129    public function toc_additem($id, $text, $level) {
130        global $conf;
131
132        //only add items within configured levels
133        if($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) {
134            // the TOC is one of our standard ul list arrays ;-)
135            $this->meta['description']['tableofcontents'][] = array(
136                'hid'   => $id,
137                'title' => $text,
138                'type'  => 'ul',
139                'level' => $level - $conf['toptoclevel'] + 1
140            );
141        }
142
143    }
144
145    /**
146     * Render a heading
147     *
148     * @param string $text  the text to display
149     * @param int    $level header level
150     * @param int    $pos   byte position in the original source
151     */
152    public function header($text, $level, $pos) {
153        if(!isset($this->meta['title'])) $this->meta['title'] = $text;
154
155        // add the header to the TOC
156        $hid = $this->_headerToLink($text, true);
157        $this->toc_additem($hid, $text, $level);
158
159        // add to summary
160        $this->cdata(DOKU_LF.$text.DOKU_LF);
161    }
162
163    /**
164     * Open a paragraph
165     */
166    public function p_open() {
167        $this->cdata(DOKU_LF);
168    }
169
170    /**
171     * Close a paragraph
172     */
173    public function p_close() {
174        $this->cdata(DOKU_LF);
175    }
176
177    /**
178     * Create a line break
179     */
180    public function linebreak() {
181        $this->cdata(DOKU_LF);
182    }
183
184    /**
185     * Create a horizontal line
186     */
187    public function hr() {
188        $this->cdata(DOKU_LF.'----------'.DOKU_LF);
189    }
190
191    /**
192     * Callback for footnote start syntax
193     *
194     * All following content will go to the footnote instead of
195     * the document. To achieve this the previous rendered content
196     * is moved to $store and $doc is cleared
197     *
198     * @author Andreas Gohr <andi@splitbrain.org>
199     */
200    public function footnote_open() {
201        if($this->capture) {
202            // move current content to store
203            // this is required to ensure safe behaviour of plugins accessed within footnotes
204            $this->store = $this->doc;
205            $this->doc   = '';
206
207            // disable capturing
208            $this->capturing = false;
209        }
210    }
211
212    /**
213     * Callback for footnote end syntax
214     *
215     * All content rendered whilst within footnote syntax mode is discarded,
216     * the previously rendered content is restored and capturing is re-enabled.
217     *
218     * @author Andreas Gohr
219     */
220    public function footnote_close() {
221        if($this->capture) {
222            // re-enable capturing
223            $this->capturing = true;
224            // restore previously rendered content
225            $this->doc   = $this->store;
226            $this->store = '';
227        }
228    }
229
230    /**
231     * Open an unordered list
232     */
233    public function listu_open() {
234        $this->cdata(DOKU_LF);
235    }
236
237    /**
238     * Open an ordered list
239     */
240    public function listo_open() {
241        $this->cdata(DOKU_LF);
242    }
243
244    /**
245     * Open a list item
246     *
247     * @param int $level the nesting level
248     * @param bool $node true when a node; false when a leaf
249     */
250    public function listitem_open($level,$node=false) {
251        $this->cdata(str_repeat(DOKU_TAB, $level).'* ');
252    }
253
254    /**
255     * Close a list item
256     */
257    public function listitem_close() {
258        $this->cdata(DOKU_LF);
259    }
260
261    /**
262     * Output preformatted text
263     *
264     * @param string $text
265     */
266    public function preformatted($text) {
267        $this->cdata($text);
268    }
269
270    /**
271     * Start a block quote
272     */
273    public function quote_open() {
274        $this->cdata(DOKU_LF.DOKU_TAB.'"');
275    }
276
277    /**
278     * Stop a block quote
279     */
280    public function quote_close() {
281        $this->cdata('"'.DOKU_LF);
282    }
283
284    /**
285     * Display text as file content, optionally syntax highlighted
286     *
287     * @param string $text text to show
288     * @param string $lang programming language to use for syntax highlighting
289     * @param string $file file path label
290     */
291    public function file($text, $lang = null, $file = null) {
292        $this->cdata(DOKU_LF.$text.DOKU_LF);
293    }
294
295    /**
296     * Display text as code content, optionally syntax highlighted
297     *
298     * @param string $text     text to show
299     * @param string $language programming language to use for syntax highlighting
300     * @param string $file     file path label
301     */
302    public function code($text, $language = null, $file = null) {
303        $this->cdata(DOKU_LF.$text.DOKU_LF);
304    }
305
306    /**
307     * Format an acronym
308     *
309     * Uses $this->acronyms
310     *
311     * @param string $acronym
312     */
313    public function acronym($acronym) {
314        $this->cdata($acronym);
315    }
316
317    /**
318     * Format a smiley
319     *
320     * Uses $this->smiley
321     *
322     * @param string $smiley
323     */
324    public function smiley($smiley) {
325        $this->cdata($smiley);
326    }
327
328    /**
329     * Format an entity
330     *
331     * Entities are basically small text replacements
332     *
333     * Uses $this->entities
334     *
335     * @param string $entity
336     */
337    public function entity($entity) {
338        $this->cdata($entity);
339    }
340
341    /**
342     * Typographically format a multiply sign
343     *
344     * Example: ($x=640, $y=480) should result in "640×480"
345     *
346     * @param string|int $x first value
347     * @param string|int $y second value
348     */
349    public function multiplyentity($x, $y) {
350        $this->cdata($x.'×'.$y);
351    }
352
353    /**
354     * Render an opening single quote char (language specific)
355     */
356    public function singlequoteopening() {
357        global $lang;
358        $this->cdata($lang['singlequoteopening']);
359    }
360
361    /**
362     * Render a closing single quote char (language specific)
363     */
364    public function singlequoteclosing() {
365        global $lang;
366        $this->cdata($lang['singlequoteclosing']);
367    }
368
369    /**
370     * Render an apostrophe char (language specific)
371     */
372    public function apostrophe() {
373        global $lang;
374        $this->cdata($lang['apostrophe']);
375    }
376
377    /**
378     * Render an opening double quote char (language specific)
379     */
380    public function doublequoteopening() {
381        global $lang;
382        $this->cdata($lang['doublequoteopening']);
383    }
384
385    /**
386     * Render an closinging double quote char (language specific)
387     */
388    public function doublequoteclosing() {
389        global $lang;
390        $this->cdata($lang['doublequoteclosing']);
391    }
392
393    /**
394     * Render a CamelCase link
395     *
396     * @param string $link The link name
397     * @see http://en.wikipedia.org/wiki/CamelCase
398     */
399    public function camelcaselink($link) {
400        $this->internallink($link, $link);
401    }
402
403    /**
404     * Render a page local link
405     *
406     * @param string $hash hash link identifier
407     * @param string $name name for the link
408     */
409    public function locallink($hash, $name = null) {
410        if(is_array($name)) {
411            $this->_firstimage($name['src']);
412            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
413        }
414    }
415
416    /**
417     * keep track of internal links in $this->meta['relation']['references']
418     *
419     * @param string            $id   page ID to link to. eg. 'wiki:syntax'
420     * @param string|array|null $name name for the link, array for media file
421     */
422    public function internallink($id, $name = null) {
423        global $ID;
424
425        if(is_array($name)) {
426            $this->_firstimage($name['src']);
427            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
428        }
429
430        $parts = explode('?', $id, 2);
431        if(count($parts) === 2) {
432            $id = $parts[0];
433        }
434
435        $default = $this->_simpleTitle($id);
436
437        // first resolve and clean up the $id
438        resolve_pageid(getNS($ID), $id, $exists);
439        @list($page) = explode('#', $id, 2);
440
441        // set metadata
442        $this->meta['relation']['references'][$page] = $exists;
443        // $data = array('relation' => array('isreferencedby' => array($ID => true)));
444        // p_set_metadata($id, $data);
445
446        // add link title to summary
447        if($this->capture) {
448            $name = $this->_getLinkTitle($name, $default, $id);
449            $this->doc .= $name;
450        }
451    }
452
453    /**
454     * Render an external link
455     *
456     * @param string            $url  full URL with scheme
457     * @param string|array|null $name name for the link, array for media file
458     */
459    public function externallink($url, $name = null) {
460        if(is_array($name)) {
461            $this->_firstimage($name['src']);
462            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
463        }
464
465        if($this->capture) {
466            $this->doc .= $this->_getLinkTitle($name, '<'.$url.'>');
467        }
468    }
469
470    /**
471     * Render an interwiki link
472     *
473     * You may want to use $this->_resolveInterWiki() here
474     *
475     * @param string       $match     original link - probably not much use
476     * @param string|array $name      name for the link, array for media file
477     * @param string       $wikiName  indentifier (shortcut) for the remote wiki
478     * @param string       $wikiUri   the fragment parsed from the original link
479     */
480    public function interwikilink($match, $name, $wikiName, $wikiUri) {
481        if(is_array($name)) {
482            $this->_firstimage($name['src']);
483            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
484        }
485
486        if($this->capture) {
487            list($wikiUri) = explode('#', $wikiUri, 2);
488            $name = $this->_getLinkTitle($name, $wikiUri);
489            $this->doc .= $name;
490        }
491    }
492
493    /**
494     * Link to windows share
495     *
496     * @param string       $url  the link
497     * @param string|array $name name for the link, array for media file
498     */
499    public function windowssharelink($url, $name = null) {
500        if(is_array($name)) {
501            $this->_firstimage($name['src']);
502            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
503        }
504
505        if($this->capture) {
506            if($name) $this->doc .= $name;
507            else $this->doc .= '<'.$url.'>';
508        }
509    }
510
511    /**
512     * Render a linked E-Mail Address
513     *
514     * Should honor $conf['mailguard'] setting
515     *
516     * @param string       $address Email-Address
517     * @param string|array $name    name for the link, array for media file
518     */
519    public function emaillink($address, $name = null) {
520        if(is_array($name)) {
521            $this->_firstimage($name['src']);
522            if($name['type'] == 'internalmedia') $this->_recordMediaUsage($name['src']);
523        }
524
525        if($this->capture) {
526            if($name) $this->doc .= $name;
527            else $this->doc .= '<'.$address.'>';
528        }
529    }
530
531    /**
532     * Render an internal media file
533     *
534     * @param string $src     media ID
535     * @param string $title   descriptive text
536     * @param string $align   left|center|right
537     * @param int    $width   width of media in pixel
538     * @param int    $height  height of media in pixel
539     * @param string $cache   cache|recache|nocache
540     * @param string $linking linkonly|detail|nolink
541     */
542    public function internalmedia($src, $title = null, $align = null, $width = null,
543                           $height = null, $cache = null, $linking = null) {
544        if($this->capture && $title) $this->doc .= '['.$title.']';
545        $this->_firstimage($src);
546        $this->_recordMediaUsage($src);
547    }
548
549    /**
550     * Render an external media file
551     *
552     * @param string $src     full media URL
553     * @param string $title   descriptive text
554     * @param string $align   left|center|right
555     * @param int    $width   width of media in pixel
556     * @param int    $height  height of media in pixel
557     * @param string $cache   cache|recache|nocache
558     * @param string $linking linkonly|detail|nolink
559     */
560    public function externalmedia($src, $title = null, $align = null, $width = null,
561                           $height = null, $cache = null, $linking = null) {
562        if($this->capture && $title) $this->doc .= '['.$title.']';
563        $this->_firstimage($src);
564    }
565
566    /**
567     * Render the output of an RSS feed
568     *
569     * @param string $url    URL of the feed
570     * @param array  $params Finetuning of the output
571     */
572    public function rss($url, $params) {
573        $this->meta['relation']['haspart'][$url] = true;
574
575        $this->meta['date']['valid']['age'] =
576            isset($this->meta['date']['valid']['age']) ?
577                min($this->meta['date']['valid']['age'], $params['refresh']) :
578                $params['refresh'];
579    }
580
581    #region Utils
582
583    /**
584     * Removes any Namespace from the given name but keeps
585     * casing and special chars
586     *
587     * @author Andreas Gohr <andi@splitbrain.org>
588     *
589     * @param string $name
590     *
591     * @return mixed|string
592     */
593    public function _simpleTitle($name) {
594        global $conf;
595
596        if(is_array($name)) return '';
597
598        if($conf['useslash']) {
599            $nssep = '[:;/]';
600        } else {
601            $nssep = '[:;]';
602        }
603        $name = preg_replace('!.*'.$nssep.'!', '', $name);
604        //if there is a hash we use the anchor name only
605        $name = preg_replace('!.*#!', '', $name);
606        return $name;
607    }
608
609    /**
610     * Construct a title and handle images in titles
611     *
612     * @author Harry Fuecks <hfuecks@gmail.com>
613     * @param string|array|null $title    either string title or media array
614     * @param string            $default  default title if nothing else is found
615     * @param null|string       $id       linked page id (used to extract title from first heading)
616     * @return string title text
617     */
618    public function _getLinkTitle($title, $default, $id = null) {
619        if(is_array($title)) {
620            if($title['title']) {
621                return '['.$title['title'].']';
622            } else {
623                return $default;
624            }
625        } else if(is_null($title) || trim($title) == '') {
626            if(useHeading('content') && $id) {
627                $heading = p_get_first_heading($id, METADATA_DONT_RENDER);
628                if($heading) return $heading;
629            }
630            return $default;
631        } else {
632            return $title;
633        }
634    }
635
636    /**
637     * Remember first image
638     *
639     * @param string $src image URL or ID
640     */
641    protected function _firstimage($src) {
642        if($this->firstimage) return;
643        global $ID;
644
645        list($src) = explode('#', $src, 2);
646        if(!media_isexternal($src)) {
647            resolve_mediaid(getNS($ID), $src, $exists);
648        }
649        if(preg_match('/.(jpe?g|gif|png)$/i', $src)) {
650            $this->firstimage = $src;
651        }
652    }
653
654    /**
655     * Store list of used media files in metadata
656     *
657     * @param string $src media ID
658     */
659    protected function _recordMediaUsage($src) {
660        global $ID;
661
662        list ($src) = explode('#', $src, 2);
663        if(media_isexternal($src)) return;
664        resolve_mediaid(getNS($ID), $src, $exists);
665        $this->meta['relation']['media'][$src] = $exists;
666    }
667
668    #endregion
669}
670
671//Setup VIM: ex: et ts=4 :
672