xref: /dokuwiki/inc/parserutils.php (revision 4a81940267e4278153d3726b605286fd963084ec)
1<?php
2/**
3 * Utilities for accessing the parser
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Harry Fuecks <hfuecks@gmail.com>
7 * @author     Andreas Gohr <andi@splitbrain.org>
8 */
9
10if(!defined('DOKU_INC')) die('meh.');
11
12/**
13 * Returns the parsed Wikitext in XHTML for the given id and revision.
14 *
15 * If $excuse is true an explanation is returned if the file
16 * wasn't found
17 *
18 * @author Andreas Gohr <andi@splitbrain.org>
19 */
20function p_wiki_xhtml($id, $rev='', $excuse=true){
21    $file = wikiFN($id,$rev);
22    $ret  = '';
23
24    //ensure $id is in global $ID (needed for parsing)
25    global $ID;
26    $keep = $ID;
27    $ID   = $id;
28
29    if($rev){
30        if(@file_exists($file)){
31            $ret = p_render('xhtml',p_get_instructions(io_readWikiPage($file,$id,$rev)),$info); //no caching on old revisions
32        }elseif($excuse){
33            $ret = p_locale_xhtml('norev');
34        }
35    }else{
36        if(@file_exists($file)){
37            $ret = p_cached_output($file,'xhtml',$id);
38        }elseif($excuse){
39            $ret = p_locale_xhtml('newpage');
40        }
41    }
42
43    //restore ID (just in case)
44    $ID = $keep;
45
46    return $ret;
47}
48
49/**
50 * Returns starting summary for a page (e.g. the first few
51 * paragraphs), marked up in XHTML.
52 *
53 * If $excuse is true an explanation is returned if the file
54 * wasn't found
55 *
56 * @param string wiki page id
57 * @param reference populated with page title from heading or page id
58 * @deprecated
59 * @author Harry Fuecks <hfuecks@gmail.com>
60 */
61function p_wiki_xhtml_summary($id, &$title, $rev='', $excuse=true){
62    $file = wikiFN($id,$rev);
63    $ret  = '';
64
65    //ensure $id is in global $ID (needed for parsing)
66    global $ID;
67    $keep = $ID;
68    $ID   = $id;
69
70    if($rev){
71        if(@file_exists($file)){
72            //no caching on old revisions
73            $ins = p_get_instructions(io_readWikiPage($file,$id,$rev));
74        }elseif($excuse){
75            $ret = p_locale_xhtml('norev');
76            //restore ID (just in case)
77            $ID = $keep;
78            return $ret;
79        }
80
81    }else{
82
83        if(@file_exists($file)){
84            // The XHTML for a summary is not cached so use the instruction cache
85            $ins = p_cached_instructions($file);
86        }elseif($excuse){
87            $ret = p_locale_xhtml('newpage');
88            //restore ID (just in case)
89            $ID = $keep;
90            return $ret;
91        }
92    }
93
94    $ret = p_render('xhtmlsummary',$ins,$info);
95
96    if ( $info['sum_pagetitle'] ) {
97        $title = $info['sum_pagetitle'];
98    } else {
99        $title = $id;
100    }
101
102    $ID = $keep;
103    return $ret;
104}
105
106/**
107 * Returns the specified local text in parsed format
108 *
109 * @author Andreas Gohr <andi@splitbrain.org>
110 */
111function p_locale_xhtml($id){
112    //fetch parsed locale
113    $html = p_cached_output(localeFN($id));
114    return $html;
115}
116
117/**
118 *     *** DEPRECATED ***
119 *
120 * use p_cached_output()
121 *
122 * Returns the given file parsed to XHTML
123 *
124 * Uses and creates a cachefile
125 *
126 * @deprecated
127 * @author Andreas Gohr <andi@splitbrain.org>
128 * @todo   rewrite to use mode instead of hardcoded XHTML
129 */
130function p_cached_xhtml($file){
131    return p_cached_output($file);
132}
133
134/**
135 * Returns the given file parsed into the requested output format
136 *
137 * @author Andreas Gohr <andi@splitbrain.org>
138 * @author Chris Smith <chris@jalakai.co.uk>
139 */
140function p_cached_output($file, $format='xhtml', $id='') {
141    global $conf;
142
143    $cache = new cache_renderer($id, $file, $format);
144    if ($cache->useCache()) {
145        $parsed = $cache->retrieveCache(false);
146        if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- cachefile {$cache->cache} used -->\n";
147    } else {
148        $parsed = p_render($format, p_cached_instructions($file,false,$id), $info);
149
150        if ($info['cache']) {
151            $cache->storeCache($parsed);               //save cachefile
152            if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- no cachefile used, but created {$cache->cache} -->\n";
153        }else{
154            $cache->removeCache();                     //try to delete cachefile
155            if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- no cachefile used, caching forbidden -->\n";
156        }
157    }
158
159    return $parsed;
160}
161
162/**
163 * Returns the render instructions for a file
164 *
165 * Uses and creates a serialized cache file
166 *
167 * @author Andreas Gohr <andi@splitbrain.org>
168 */
169function p_cached_instructions($file,$cacheonly=false,$id='') {
170    global $conf;
171    static $run = null;
172    if(is_null($run)) $run = array();
173
174    $cache = new cache_instructions($id, $file);
175
176    if ($cacheonly || $cache->useCache() || isset($run[$file])) {
177        return $cache->retrieveCache();
178    } else if (@file_exists($file)) {
179        // no cache - do some work
180        $ins = p_get_instructions(io_readWikiPage($file,$id));
181        if ($cache->storeCache($ins)) {
182            $run[$file] = true; // we won't rebuild these instructions in the same run again
183        } else {
184            msg('Unable to save cache file. Hint: disk full; file permissions; safe_mode setting.',-1);
185        }
186        return $ins;
187    }
188
189    return null;
190}
191
192/**
193 * turns a page into a list of instructions
194 *
195 * @author Harry Fuecks <hfuecks@gmail.com>
196 * @author Andreas Gohr <andi@splitbrain.org>
197 */
198function p_get_instructions($text){
199
200    $modes = p_get_parsermodes();
201
202    // Create the parser
203    $Parser = new Doku_Parser();
204
205    // Add the Handler
206    $Parser->Handler = new Doku_Handler();
207
208    //add modes to parser
209    foreach($modes as $mode){
210        $Parser->addMode($mode['mode'],$mode['obj']);
211    }
212
213    // Do the parsing
214    trigger_event('PARSER_WIKITEXT_PREPROCESS', $text);
215    $p = $Parser->parse($text);
216    //  dbg($p);
217    return $p;
218}
219
220/**
221 * returns the metadata of a page
222 *
223 * @param string $id The id of the page the metadata should be returned from
224 * @param string $key The key of the metdata value that shall be read (by default everything) - separate hierarchies by " " like "date created"
225 * @param boolean $render If the page should be rendererd when the cache can't be used - default true
226 * @return mixed The requested metadata fields
227 *
228 * @author Esther Brunner <esther@kaffeehaus.ch>
229 * @author Michael Hamann <michael@content-space.de>
230 */
231function p_get_metadata($id, $key='', $render=true){
232    global $ID;
233
234    // cache the current page
235    // Benchmarking shows the current page's metadata is generally the only page metadata
236    // accessed several times. This may catch a few other pages, but that shouldn't be an issue.
237    $cache = ($ID == $id);
238    $meta = p_read_metadata($id, $cache);
239
240    // prevent recursive calls in the cache
241    static $recursion = false;
242    if (!$recursion && $render){
243        $recursion = true;
244
245        $cachefile = new cache_renderer($id, wikiFN($id), 'metadata');
246
247        if (page_exists($id) && !$cachefile->useCache()){
248            $old_meta = $meta;
249            $meta = p_render_metadata($id, $meta);
250            // only update the file when the metadata has been changed
251            if ($meta == $old_meta || p_save_metadata($id, $meta)) {
252                // store a timestamp in order to make sure that the cachefile is touched
253                $cachefile->storeCache(time());
254            } else {
255                msg('Unable to save metadata file. Hint: disk full; file permissions; safe_mode setting.',-1);
256            }
257        }
258
259        $recursion = false;
260    }
261
262    $val = $meta['current'];
263
264    // filter by $key
265    foreach(preg_split('/\s+/', $key, 2, PREG_SPLIT_NO_EMPTY) as $cur_key) {
266        if (!isset($val[$cur_key])) {
267            return null;
268        }
269        $val = $val[$cur_key];
270    }
271    return $val;
272}
273
274/**
275 * sets metadata elements of a page
276 *
277 * @see http://www.dokuwiki.org/devel:metadata#functions_to_get_and_set_metadata
278 *
279 * @param String  $id         is the ID of a wiki page
280 * @param Array   $data       is an array with key ⇒ value pairs to be set in the metadata
281 * @param Boolean $render     whether or not the page metadata should be generated with the renderer
282 * @param Boolean $persistent indicates whether or not the particular metadata value will persist through
283 *                            the next metadata rendering.
284 * @return boolean true on success
285 *
286 * @author Esther Brunner <esther@kaffeehaus.ch>
287 */
288function p_set_metadata($id, $data, $render=false, $persistent=true){
289    if (!is_array($data)) return false;
290
291    global $ID;
292
293    // cache the current page
294    $cache = ($ID == $id);
295    $orig = p_read_metadata($id, $cache);
296
297    // render metadata first?
298    $meta = $render ? p_render_metadata($id, $orig) : $orig;
299
300    // now add the passed metadata
301    $protected = array('description', 'date', 'contributor');
302    foreach ($data as $key => $value){
303
304        // be careful with sub-arrays of $meta['relation']
305        if ($key == 'relation'){
306
307            foreach ($value as $subkey => $subvalue){
308                $meta['current'][$key][$subkey] = !empty($meta['current'][$key][$subkey]) ? array_merge($meta['current'][$key][$subkey], $subvalue) : $subvalue;
309                if ($persistent)
310                    $meta['persistent'][$key][$subkey] = !empty($meta['persistent'][$key][$subkey]) ? array_merge($meta['persistent'][$key][$subkey], $subvalue) : $subvalue;
311            }
312
313            // be careful with some senisitive arrays of $meta
314        } elseif (in_array($key, $protected)){
315
316            // these keys, must have subkeys - a legitimate value must be an array
317            if (is_array($value)) {
318                $meta['current'][$key] = !empty($meta['current'][$key]) ? array_merge($meta['current'][$key],$value) : $value;
319
320                if ($persistent) {
321                    $meta['persistent'][$key] = !empty($meta['persistent'][$key]) ? array_merge($meta['persistent'][$key],$value) : $value;
322                }
323            }
324
325            // no special treatment for the rest
326        } else {
327            $meta['current'][$key] = $value;
328            if ($persistent) $meta['persistent'][$key] = $value;
329        }
330    }
331
332    // save only if metadata changed
333    if ($meta == $orig) return true;
334
335    return p_save_metadata($id, $meta);
336}
337
338/**
339 * Purges the non-persistant part of the meta data
340 * used on page deletion
341 *
342 * @author Michael Klier <chi@chimeric.de>
343 */
344function p_purge_metadata($id) {
345    $meta = p_read_metadata($id);
346    foreach($meta['current'] as $key => $value) {
347        if(is_array($meta[$key])) {
348            $meta['current'][$key] = array();
349        } else {
350            $meta['current'][$key] = '';
351        }
352
353    }
354    return p_save_metadata($id, $meta);
355}
356
357/**
358 * read the metadata from source/cache for $id
359 * (internal use only - called by p_get_metadata & p_set_metadata)
360 *
361 * @author   Christopher Smith <chris@jalakai.co.uk>
362 *
363 * @param    string   $id      absolute wiki page id
364 * @param    bool     $cache   whether or not to cache metadata in memory
365 *                             (only use for metadata likely to be accessed several times)
366 *
367 * @return   array             metadata
368 */
369function p_read_metadata($id,$cache=false) {
370    global $cache_metadata;
371
372    if (isset($cache_metadata[(string)$id])) return $cache_metadata[(string)$id];
373
374    $file = metaFN($id, '.meta');
375    $meta = @file_exists($file) ? unserialize(io_readFile($file, false)) : array('current'=>array(),'persistent'=>array());
376
377    if ($cache) {
378        $cache_metadata[(string)$id] = $meta;
379    }
380
381    return $meta;
382}
383
384/**
385 * This is the backend function to save a metadata array to a file
386 *
387 * @param    string   $id      absolute wiki page id
388 * @param    array    $meta    metadata
389 *
390 * @return   bool              success / fail
391 */
392function p_save_metadata($id, $meta) {
393    // sync cached copies, including $INFO metadata
394    global $cache_metadata, $INFO;
395
396    if (isset($cache_metadata[$id])) $cache_metadata[$id] = $meta;
397    if (!empty($INFO) && ($id == $INFO['id'])) { $INFO['meta'] = $meta['current']; }
398
399    return io_saveFile(metaFN($id, '.meta'), serialize($meta));
400}
401
402/**
403 * renders the metadata of a page
404 *
405 * @author Esther Brunner <esther@kaffeehaus.ch>
406 */
407function p_render_metadata($id, $orig){
408    // make sure the correct ID is in global ID
409    global $ID;
410    $keep = $ID;
411    $ID   = $id;
412
413    // add an extra key for the event - to tell event handlers the page whose metadata this is
414    $orig['page'] = $id;
415    $evt = new Doku_Event('PARSER_METADATA_RENDER', $orig);
416    if ($evt->advise_before()) {
417
418        require_once DOKU_INC."inc/parser/metadata.php";
419
420        // get instructions
421        $instructions = p_cached_instructions(wikiFN($id),false,$id);
422        if(is_null($instructions)){
423            $ID = $keep;
424            return null; // something went wrong with the instructions
425        }
426
427        // set up the renderer
428        $renderer = new Doku_Renderer_metadata();
429        $renderer->meta = $orig['current'];
430        $renderer->persistent = $orig['persistent'];
431
432        // loop through the instructions
433        foreach ($instructions as $instruction){
434            // execute the callback against the renderer
435            call_user_func_array(array(&$renderer, $instruction[0]), (array) $instruction[1]);
436        }
437
438        $evt->result = array('current'=>$renderer->meta,'persistent'=>$renderer->persistent);
439    }
440    $evt->advise_after();
441
442    $ID = $keep;
443    return $evt->result;
444}
445
446/**
447 * returns all available parser syntax modes in correct order
448 *
449 * @author Andreas Gohr <andi@splitbrain.org>
450 */
451function p_get_parsermodes(){
452    global $conf;
453
454    //reuse old data
455    static $modes = null;
456    if($modes != null){
457        return $modes;
458    }
459
460    //import parser classes and mode definitions
461    require_once DOKU_INC . 'inc/parser/parser.php';
462
463    // we now collect all syntax modes and their objects, then they will
464    // be sorted and added to the parser in correct order
465    $modes = array();
466
467    // add syntax plugins
468    $pluginlist = plugin_list('syntax');
469    if(count($pluginlist)){
470        global $PARSER_MODES;
471        $obj = null;
472        foreach($pluginlist as $p){
473            if(!$obj =& plugin_load('syntax',$p)) continue; //attempt to load plugin into $obj
474            $PARSER_MODES[$obj->getType()][] = "plugin_$p"; //register mode type
475            //add to modes
476            $modes[] = array(
477                    'sort' => $obj->getSort(),
478                    'mode' => "plugin_$p",
479                    'obj'  => $obj,
480                    );
481            unset($obj); //remove the reference
482        }
483    }
484
485    // add default modes
486    $std_modes = array('listblock','preformatted','notoc','nocache',
487            'header','table','linebreak','footnote','hr',
488            'unformatted','php','html','code','file','quote',
489            'internallink','rss','media','externallink',
490            'emaillink','windowssharelink','eol');
491    if($conf['typography']){
492        $std_modes[] = 'quotes';
493        $std_modes[] = 'multiplyentity';
494    }
495    foreach($std_modes as $m){
496        $class = "Doku_Parser_Mode_$m";
497        $obj   = new $class();
498        $modes[] = array(
499                'sort' => $obj->getSort(),
500                'mode' => $m,
501                'obj'  => $obj
502                );
503    }
504
505    // add formatting modes
506    $fmt_modes = array('strong','emphasis','underline','monospace',
507            'subscript','superscript','deleted');
508    foreach($fmt_modes as $m){
509        $obj   = new Doku_Parser_Mode_formatting($m);
510        $modes[] = array(
511                'sort' => $obj->getSort(),
512                'mode' => $m,
513                'obj'  => $obj
514                );
515    }
516
517    // add modes which need files
518    $obj     = new Doku_Parser_Mode_smiley(array_keys(getSmileys()));
519    $modes[] = array('sort' => $obj->getSort(), 'mode' => 'smiley','obj'  => $obj );
520    $obj     = new Doku_Parser_Mode_acronym(array_keys(getAcronyms()));
521    $modes[] = array('sort' => $obj->getSort(), 'mode' => 'acronym','obj'  => $obj );
522    $obj     = new Doku_Parser_Mode_entity(array_keys(getEntities()));
523    $modes[] = array('sort' => $obj->getSort(), 'mode' => 'entity','obj'  => $obj );
524
525    // add optional camelcase mode
526    if($conf['camelcase']){
527        $obj     = new Doku_Parser_Mode_camelcaselink();
528        $modes[] = array('sort' => $obj->getSort(), 'mode' => 'camelcaselink','obj'  => $obj );
529    }
530
531    //sort modes
532    usort($modes,'p_sort_modes');
533
534    return $modes;
535}
536
537/**
538 * Callback function for usort
539 *
540 * @author Andreas Gohr <andi@splitbrain.org>
541 */
542function p_sort_modes($a, $b){
543    if($a['sort'] == $b['sort']) return 0;
544    return ($a['sort'] < $b['sort']) ? -1 : 1;
545}
546
547/**
548 * Renders a list of instruction to the specified output mode
549 *
550 * In the $info array is information from the renderer returned
551 *
552 * @author Harry Fuecks <hfuecks@gmail.com>
553 * @author Andreas Gohr <andi@splitbrain.org>
554 */
555function p_render($mode,$instructions,&$info){
556    if(is_null($instructions)) return '';
557
558    $Renderer =& p_get_renderer($mode);
559    if (is_null($Renderer)) return null;
560
561    $Renderer->reset();
562
563    $Renderer->smileys = getSmileys();
564    $Renderer->entities = getEntities();
565    $Renderer->acronyms = getAcronyms();
566    $Renderer->interwiki = getInterwiki();
567
568    // Loop through the instructions
569    foreach ( $instructions as $instruction ) {
570        // Execute the callback against the Renderer
571        call_user_func_array(array(&$Renderer, $instruction[0]),$instruction[1]);
572    }
573
574    //set info array
575    $info = $Renderer->info;
576
577    // Post process and return the output
578    $data = array($mode,& $Renderer->doc);
579    trigger_event('RENDERER_CONTENT_POSTPROCESS',$data);
580    return $Renderer->doc;
581}
582
583function & p_get_renderer($mode) {
584    global $conf, $plugin_controller;
585
586    $rname = !empty($conf['renderer_'.$mode]) ? $conf['renderer_'.$mode] : $mode;
587
588    // try default renderer first:
589    $file = DOKU_INC."inc/parser/$rname.php";
590    if(@file_exists($file)){
591        require_once $file;
592        $rclass = "Doku_Renderer_$rname";
593
594        if ( !class_exists($rclass) ) {
595            trigger_error("Unable to resolve render class $rclass",E_USER_WARNING);
596            msg("Renderer '$rname' for $mode not valid",-1);
597            return null;
598        }
599        $Renderer = new $rclass();
600    }else{
601        // Maybe a plugin/component is available?
602        list($plugin, $component) = $plugin_controller->_splitName($rname);
603        if (!$plugin_controller->isdisabled($plugin)){
604            $Renderer =& $plugin_controller->load('renderer',$rname);
605        }
606
607        if(is_null($Renderer)){
608            msg("No renderer '$rname' found for mode '$mode'",-1);
609            return null;
610        }
611    }
612
613    return $Renderer;
614}
615
616/**
617 * Gets the first heading from a file
618 *
619 * @param   string   $id       dokuwiki page id
620 * @param   bool     $render   rerender if first heading not known
621 *                             default: true  -- must be set to false for calls from the metadata renderer to
622 *                                               protects against loops and excessive resource usage when pages
623 *                                               for which only a first heading is required will attempt to
624 *                                               render metadata for all the pages for which they require first
625 *                                               headings ... and so on.
626 *
627 * @author Andreas Gohr <andi@splitbrain.org>
628 */
629function p_get_first_heading($id, $render=true){
630    return p_get_metadata($id,'title',$render);
631}
632
633/**
634 * Wrapper for GeSHi Code Highlighter, provides caching of its output
635 *
636 * @param  string   $code       source code to be highlighted
637 * @param  string   $language   language to provide highlighting
638 * @param  string   $wrapper    html element to wrap the returned highlighted text
639 *
640 * @author Christopher Smith <chris@jalakai.co.uk>
641 * @author Andreas Gohr <andi@splitbrain.org>
642 */
643function p_xhtml_cached_geshi($code, $language, $wrapper='pre') {
644    global $conf, $config_cascade;
645    $language = strtolower($language);
646
647    // remove any leading or trailing blank lines
648    $code = preg_replace('/^\s*?\n|\s*?\n$/','',$code);
649
650    $cache = getCacheName($language.$code,".code");
651    $ctime = @filemtime($cache);
652    if($ctime && !$_REQUEST['purge'] &&
653            $ctime > filemtime(DOKU_INC.'inc/geshi.php') &&                 // geshi changed
654            $ctime > @filemtime(DOKU_INC.'inc/geshi/'.$language.'.php') &&  // language syntax definition changed
655            $ctime > filemtime(reset($config_cascade['main']['default']))){ // dokuwiki changed
656        $highlighted_code = io_readFile($cache, false);
657
658    } else {
659
660        $geshi = new GeSHi($code, $language, DOKU_INC . 'inc/geshi');
661        $geshi->set_encoding('utf-8');
662        $geshi->enable_classes();
663        $geshi->set_header_type(GESHI_HEADER_PRE);
664        $geshi->set_link_target($conf['target']['extern']);
665
666        // remove GeSHi's wrapper element (we'll replace it with our own later)
667        // we need to use a GeSHi wrapper to avoid <BR> throughout the highlighted text
668        $highlighted_code = trim(preg_replace('!^<pre[^>]*>|</pre>$!','',$geshi->parse_code()),"\n\r");
669        io_saveFile($cache,$highlighted_code);
670    }
671
672    // add a wrapper element if required
673    if ($wrapper) {
674        return "<$wrapper class=\"code $language\">$highlighted_code</$wrapper>";
675    } else {
676        return $highlighted_code;
677    }
678}
679
680