xref: /dokuwiki/inc/parserutils.php (revision 3bfb17c905aee9d0b5b237e017631271c4b53608)
1<?php
2/**
3 * Utilities for accessing the parser
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Harry Fuecks <hfuecks@gmail.com>
7 * @author     Andreas Gohr <andi@splitbrain.org>
8 */
9
10if(!defined('DOKU_INC')) die('meh.');
11
12/**
13 * For how many different pages shall the first heading be loaded from the
14 * metadata? When this limit is reached the title index is loaded and used for
15 * all following requests.
16 */
17if (!defined('P_GET_FIRST_HEADING_METADATA_LIMIT')) define('P_GET_FIRST_HEADING_METADATA_LIMIT', 10);
18
19/**
20 * Returns the parsed Wikitext in XHTML for the given id and revision.
21 *
22 * If $excuse is true an explanation is returned if the file
23 * wasn't found
24 *
25 * @author Andreas Gohr <andi@splitbrain.org>
26 */
27function p_wiki_xhtml($id, $rev='', $excuse=true){
28    $file = wikiFN($id,$rev);
29    $ret  = '';
30
31    //ensure $id is in global $ID (needed for parsing)
32    global $ID;
33    $keep = $ID;
34    $ID   = $id;
35
36    if($rev){
37        if(@file_exists($file)){
38            $ret = p_render('xhtml',p_get_instructions(io_readWikiPage($file,$id,$rev)),$info); //no caching on old revisions
39        }elseif($excuse){
40            $ret = p_locale_xhtml('norev');
41        }
42    }else{
43        if(@file_exists($file)){
44            $ret = p_cached_output($file,'xhtml',$id);
45        }elseif($excuse){
46            $ret = p_locale_xhtml('newpage');
47        }
48    }
49
50    //restore ID (just in case)
51    $ID = $keep;
52
53    return $ret;
54}
55
56/**
57 * Returns starting summary for a page (e.g. the first few
58 * paragraphs), marked up in XHTML.
59 *
60 * If $excuse is true an explanation is returned if the file
61 * wasn't found
62 *
63 * @param string wiki page id
64 * @param reference populated with page title from heading or page id
65 * @deprecated
66 * @author Harry Fuecks <hfuecks@gmail.com>
67 */
68function p_wiki_xhtml_summary($id, &$title, $rev='', $excuse=true){
69    $file = wikiFN($id,$rev);
70    $ret  = '';
71
72    //ensure $id is in global $ID (needed for parsing)
73    global $ID;
74    $keep = $ID;
75    $ID   = $id;
76
77    if($rev){
78        if(@file_exists($file)){
79            //no caching on old revisions
80            $ins = p_get_instructions(io_readWikiPage($file,$id,$rev));
81        }elseif($excuse){
82            $ret = p_locale_xhtml('norev');
83            //restore ID (just in case)
84            $ID = $keep;
85            return $ret;
86        }
87
88    }else{
89
90        if(@file_exists($file)){
91            // The XHTML for a summary is not cached so use the instruction cache
92            $ins = p_cached_instructions($file);
93        }elseif($excuse){
94            $ret = p_locale_xhtml('newpage');
95            //restore ID (just in case)
96            $ID = $keep;
97            return $ret;
98        }
99    }
100
101    $ret = p_render('xhtmlsummary',$ins,$info);
102
103    if ( $info['sum_pagetitle'] ) {
104        $title = $info['sum_pagetitle'];
105    } else {
106        $title = $id;
107    }
108
109    $ID = $keep;
110    return $ret;
111}
112
113/**
114 * Returns the specified local text in parsed format
115 *
116 * @author Andreas Gohr <andi@splitbrain.org>
117 */
118function p_locale_xhtml($id){
119    //fetch parsed locale
120    $html = p_cached_output(localeFN($id));
121    return $html;
122}
123
124/**
125 *     *** DEPRECATED ***
126 *
127 * use p_cached_output()
128 *
129 * Returns the given file parsed to XHTML
130 *
131 * Uses and creates a cachefile
132 *
133 * @deprecated
134 * @author Andreas Gohr <andi@splitbrain.org>
135 * @todo   rewrite to use mode instead of hardcoded XHTML
136 */
137function p_cached_xhtml($file){
138    return p_cached_output($file);
139}
140
141/**
142 * Returns the given file parsed into the requested output format
143 *
144 * @author Andreas Gohr <andi@splitbrain.org>
145 * @author Chris Smith <chris@jalakai.co.uk>
146 */
147function p_cached_output($file, $format='xhtml', $id='') {
148    global $conf;
149
150    $cache = new cache_renderer($id, $file, $format);
151    if ($cache->useCache()) {
152        $parsed = $cache->retrieveCache(false);
153        if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- cachefile {$cache->cache} used -->\n";
154    } else {
155        $parsed = p_render($format, p_cached_instructions($file,false,$id), $info);
156
157        if ($info['cache']) {
158            $cache->storeCache($parsed);               //save cachefile
159            if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- no cachefile used, but created {$cache->cache} -->\n";
160        }else{
161            $cache->removeCache();                     //try to delete cachefile
162            if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- no cachefile used, caching forbidden -->\n";
163        }
164    }
165
166    return $parsed;
167}
168
169/**
170 * Returns the render instructions for a file
171 *
172 * Uses and creates a serialized cache file
173 *
174 * @author Andreas Gohr <andi@splitbrain.org>
175 */
176function p_cached_instructions($file,$cacheonly=false,$id='') {
177    global $conf;
178    static $run = null;
179    if(is_null($run)) $run = array();
180
181    $cache = new cache_instructions($id, $file);
182
183    if ($cacheonly || $cache->useCache() || isset($run[$file])) {
184        return $cache->retrieveCache();
185    } else if (@file_exists($file)) {
186        // no cache - do some work
187        $ins = p_get_instructions(io_readWikiPage($file,$id));
188        if ($cache->storeCache($ins)) {
189            $run[$file] = true; // we won't rebuild these instructions in the same run again
190        } else {
191            msg('Unable to save cache file. Hint: disk full; file permissions; safe_mode setting.',-1);
192        }
193        return $ins;
194    }
195
196    return null;
197}
198
199/**
200 * turns a page into a list of instructions
201 *
202 * @author Harry Fuecks <hfuecks@gmail.com>
203 * @author Andreas Gohr <andi@splitbrain.org>
204 */
205function p_get_instructions($text){
206
207    $modes = p_get_parsermodes();
208
209    // Create the parser
210    $Parser = new Doku_Parser();
211
212    // Add the Handler
213    $Parser->Handler = new Doku_Handler();
214
215    //add modes to parser
216    foreach($modes as $mode){
217        $Parser->addMode($mode['mode'],$mode['obj']);
218    }
219
220    // Do the parsing
221    trigger_event('PARSER_WIKITEXT_PREPROCESS', $text);
222    $p = $Parser->parse($text);
223    //  dbg($p);
224    return $p;
225}
226
227/**
228 * returns the metadata of a page
229 *
230 * @param string $id The id of the page the metadata should be returned from
231 * @param string $key The key of the metdata value that shall be read (by default everything) - separate hierarchies by " " like "date created"
232 * @param boolean $render If the page should be rendererd when the cache can't be used - default true
233 * @return mixed The requested metadata fields
234 *
235 * @author Esther Brunner <esther@kaffeehaus.ch>
236 * @author Michael Hamann <michael@content-space.de>
237 */
238function p_get_metadata($id, $key='', $render=true){
239    global $ID;
240
241    // cache the current page
242    // Benchmarking shows the current page's metadata is generally the only page metadata
243    // accessed several times. This may catch a few other pages, but that shouldn't be an issue.
244    $cache = ($ID == $id);
245    $meta = p_read_metadata($id, $cache);
246
247    // prevent recursive calls in the cache
248    static $recursion = false;
249    if (!$recursion && $render){
250        $recursion = true;
251
252        $cachefile = new cache_renderer($id, wikiFN($id), 'metadata');
253
254        if (page_exists($id) && !$cachefile->useCache()){
255            $old_meta = $meta;
256            $meta = p_render_metadata($id, $meta);
257            // only update the file when the metadata has been changed
258            if ($meta == $old_meta || p_save_metadata($id, $meta)) {
259                // store a timestamp in order to make sure that the cachefile is touched
260                $cachefile->storeCache(time());
261            } else {
262                msg('Unable to save metadata file. Hint: disk full; file permissions; safe_mode setting.',-1);
263            }
264        }
265
266        $recursion = false;
267    }
268
269    $val = $meta['current'];
270
271    // filter by $key
272    foreach(preg_split('/\s+/', $key, 2, PREG_SPLIT_NO_EMPTY) as $cur_key) {
273        if (!isset($val[$cur_key])) {
274            return null;
275        }
276        $val = $val[$cur_key];
277    }
278    return $val;
279}
280
281/**
282 * sets metadata elements of a page
283 *
284 * @see http://www.dokuwiki.org/devel:metadata#functions_to_get_and_set_metadata
285 *
286 * @param String  $id         is the ID of a wiki page
287 * @param Array   $data       is an array with key ⇒ value pairs to be set in the metadata
288 * @param Boolean $render     whether or not the page metadata should be generated with the renderer
289 * @param Boolean $persistent indicates whether or not the particular metadata value will persist through
290 *                            the next metadata rendering.
291 * @return boolean true on success
292 *
293 * @author Esther Brunner <esther@kaffeehaus.ch>
294 */
295function p_set_metadata($id, $data, $render=false, $persistent=true){
296    if (!is_array($data)) return false;
297
298    global $ID;
299
300    // cache the current page
301    $cache = ($ID == $id);
302    $orig = p_read_metadata($id, $cache);
303
304    // render metadata first?
305    $meta = $render ? p_render_metadata($id, $orig) : $orig;
306
307    // now add the passed metadata
308    $protected = array('description', 'date', 'contributor');
309    foreach ($data as $key => $value){
310
311        // be careful with sub-arrays of $meta['relation']
312        if ($key == 'relation'){
313
314            foreach ($value as $subkey => $subvalue){
315                $meta['current'][$key][$subkey] = !empty($meta['current'][$key][$subkey]) ? array_merge($meta['current'][$key][$subkey], $subvalue) : $subvalue;
316                if ($persistent)
317                    $meta['persistent'][$key][$subkey] = !empty($meta['persistent'][$key][$subkey]) ? array_merge($meta['persistent'][$key][$subkey], $subvalue) : $subvalue;
318            }
319
320            // be careful with some senisitive arrays of $meta
321        } elseif (in_array($key, $protected)){
322
323            // these keys, must have subkeys - a legitimate value must be an array
324            if (is_array($value)) {
325                $meta['current'][$key] = !empty($meta['current'][$key]) ? array_merge($meta['current'][$key],$value) : $value;
326
327                if ($persistent) {
328                    $meta['persistent'][$key] = !empty($meta['persistent'][$key]) ? array_merge($meta['persistent'][$key],$value) : $value;
329                }
330            }
331
332            // no special treatment for the rest
333        } else {
334            $meta['current'][$key] = $value;
335            if ($persistent) $meta['persistent'][$key] = $value;
336        }
337    }
338
339    // save only if metadata changed
340    if ($meta == $orig) return true;
341
342    return p_save_metadata($id, $meta);
343}
344
345/**
346 * Purges the non-persistant part of the meta data
347 * used on page deletion
348 *
349 * @author Michael Klier <chi@chimeric.de>
350 */
351function p_purge_metadata($id) {
352    $meta = p_read_metadata($id);
353    foreach($meta['current'] as $key => $value) {
354        if(is_array($meta[$key])) {
355            $meta['current'][$key] = array();
356        } else {
357            $meta['current'][$key] = '';
358        }
359
360    }
361    return p_save_metadata($id, $meta);
362}
363
364/**
365 * read the metadata from source/cache for $id
366 * (internal use only - called by p_get_metadata & p_set_metadata)
367 *
368 * @author   Christopher Smith <chris@jalakai.co.uk>
369 *
370 * @param    string   $id      absolute wiki page id
371 * @param    bool     $cache   whether or not to cache metadata in memory
372 *                             (only use for metadata likely to be accessed several times)
373 *
374 * @return   array             metadata
375 */
376function p_read_metadata($id,$cache=false) {
377    global $cache_metadata;
378
379    if (isset($cache_metadata[(string)$id])) return $cache_metadata[(string)$id];
380
381    $file = metaFN($id, '.meta');
382    $meta = @file_exists($file) ? unserialize(io_readFile($file, false)) : array('current'=>array(),'persistent'=>array());
383
384    if ($cache) {
385        $cache_metadata[(string)$id] = $meta;
386    }
387
388    return $meta;
389}
390
391/**
392 * This is the backend function to save a metadata array to a file
393 *
394 * @param    string   $id      absolute wiki page id
395 * @param    array    $meta    metadata
396 *
397 * @return   bool              success / fail
398 */
399function p_save_metadata($id, $meta) {
400    // sync cached copies, including $INFO metadata
401    global $cache_metadata, $INFO;
402
403    if (isset($cache_metadata[$id])) $cache_metadata[$id] = $meta;
404    if (!empty($INFO) && ($id == $INFO['id'])) { $INFO['meta'] = $meta['current']; }
405
406    return io_saveFile(metaFN($id, '.meta'), serialize($meta));
407}
408
409/**
410 * renders the metadata of a page
411 *
412 * @author Esther Brunner <esther@kaffeehaus.ch>
413 */
414function p_render_metadata($id, $orig){
415    // make sure the correct ID is in global ID
416    global $ID;
417    $keep = $ID;
418    $ID   = $id;
419
420    // add an extra key for the event - to tell event handlers the page whose metadata this is
421    $orig['page'] = $id;
422    $evt = new Doku_Event('PARSER_METADATA_RENDER', $orig);
423    if ($evt->advise_before()) {
424
425        require_once DOKU_INC."inc/parser/metadata.php";
426
427        // get instructions
428        $instructions = p_cached_instructions(wikiFN($id),false,$id);
429        if(is_null($instructions)){
430            $ID = $keep;
431            return null; // something went wrong with the instructions
432        }
433
434        // set up the renderer
435        $renderer = new Doku_Renderer_metadata();
436        $renderer->meta = $orig['current'];
437        $renderer->persistent = $orig['persistent'];
438
439        // loop through the instructions
440        foreach ($instructions as $instruction){
441            // execute the callback against the renderer
442            call_user_func_array(array(&$renderer, $instruction[0]), (array) $instruction[1]);
443        }
444
445        $evt->result = array('current'=>$renderer->meta,'persistent'=>$renderer->persistent);
446    }
447    $evt->advise_after();
448
449    $ID = $keep;
450    return $evt->result;
451}
452
453/**
454 * returns all available parser syntax modes in correct order
455 *
456 * @author Andreas Gohr <andi@splitbrain.org>
457 */
458function p_get_parsermodes(){
459    global $conf;
460
461    //reuse old data
462    static $modes = null;
463    if($modes != null){
464        return $modes;
465    }
466
467    //import parser classes and mode definitions
468    require_once DOKU_INC . 'inc/parser/parser.php';
469
470    // we now collect all syntax modes and their objects, then they will
471    // be sorted and added to the parser in correct order
472    $modes = array();
473
474    // add syntax plugins
475    $pluginlist = plugin_list('syntax');
476    if(count($pluginlist)){
477        global $PARSER_MODES;
478        $obj = null;
479        foreach($pluginlist as $p){
480            if(!$obj =& plugin_load('syntax',$p)) continue; //attempt to load plugin into $obj
481            $PARSER_MODES[$obj->getType()][] = "plugin_$p"; //register mode type
482            //add to modes
483            $modes[] = array(
484                    'sort' => $obj->getSort(),
485                    'mode' => "plugin_$p",
486                    'obj'  => $obj,
487                    );
488            unset($obj); //remove the reference
489        }
490    }
491
492    // add default modes
493    $std_modes = array('listblock','preformatted','notoc','nocache',
494            'header','table','linebreak','footnote','hr',
495            'unformatted','php','html','code','file','quote',
496            'internallink','rss','media','externallink',
497            'emaillink','windowssharelink','eol');
498    if($conf['typography']){
499        $std_modes[] = 'quotes';
500        $std_modes[] = 'multiplyentity';
501    }
502    foreach($std_modes as $m){
503        $class = "Doku_Parser_Mode_$m";
504        $obj   = new $class();
505        $modes[] = array(
506                'sort' => $obj->getSort(),
507                'mode' => $m,
508                'obj'  => $obj
509                );
510    }
511
512    // add formatting modes
513    $fmt_modes = array('strong','emphasis','underline','monospace',
514            'subscript','superscript','deleted');
515    foreach($fmt_modes as $m){
516        $obj   = new Doku_Parser_Mode_formatting($m);
517        $modes[] = array(
518                'sort' => $obj->getSort(),
519                'mode' => $m,
520                'obj'  => $obj
521                );
522    }
523
524    // add modes which need files
525    $obj     = new Doku_Parser_Mode_smiley(array_keys(getSmileys()));
526    $modes[] = array('sort' => $obj->getSort(), 'mode' => 'smiley','obj'  => $obj );
527    $obj     = new Doku_Parser_Mode_acronym(array_keys(getAcronyms()));
528    $modes[] = array('sort' => $obj->getSort(), 'mode' => 'acronym','obj'  => $obj );
529    $obj     = new Doku_Parser_Mode_entity(array_keys(getEntities()));
530    $modes[] = array('sort' => $obj->getSort(), 'mode' => 'entity','obj'  => $obj );
531
532    // add optional camelcase mode
533    if($conf['camelcase']){
534        $obj     = new Doku_Parser_Mode_camelcaselink();
535        $modes[] = array('sort' => $obj->getSort(), 'mode' => 'camelcaselink','obj'  => $obj );
536    }
537
538    //sort modes
539    usort($modes,'p_sort_modes');
540
541    return $modes;
542}
543
544/**
545 * Callback function for usort
546 *
547 * @author Andreas Gohr <andi@splitbrain.org>
548 */
549function p_sort_modes($a, $b){
550    if($a['sort'] == $b['sort']) return 0;
551    return ($a['sort'] < $b['sort']) ? -1 : 1;
552}
553
554/**
555 * Renders a list of instruction to the specified output mode
556 *
557 * In the $info array is information from the renderer returned
558 *
559 * @author Harry Fuecks <hfuecks@gmail.com>
560 * @author Andreas Gohr <andi@splitbrain.org>
561 */
562function p_render($mode,$instructions,&$info){
563    if(is_null($instructions)) return '';
564
565    $Renderer =& p_get_renderer($mode);
566    if (is_null($Renderer)) return null;
567
568    $Renderer->reset();
569
570    $Renderer->smileys = getSmileys();
571    $Renderer->entities = getEntities();
572    $Renderer->acronyms = getAcronyms();
573    $Renderer->interwiki = getInterwiki();
574
575    // Loop through the instructions
576    foreach ( $instructions as $instruction ) {
577        // Execute the callback against the Renderer
578        call_user_func_array(array(&$Renderer, $instruction[0]),$instruction[1]);
579    }
580
581    //set info array
582    $info = $Renderer->info;
583
584    // Post process and return the output
585    $data = array($mode,& $Renderer->doc);
586    trigger_event('RENDERER_CONTENT_POSTPROCESS',$data);
587    return $Renderer->doc;
588}
589
590function & p_get_renderer($mode) {
591    global $conf, $plugin_controller;
592
593    $rname = !empty($conf['renderer_'.$mode]) ? $conf['renderer_'.$mode] : $mode;
594
595    // try default renderer first:
596    $file = DOKU_INC."inc/parser/$rname.php";
597    if(@file_exists($file)){
598        require_once $file;
599        $rclass = "Doku_Renderer_$rname";
600
601        if ( !class_exists($rclass) ) {
602            trigger_error("Unable to resolve render class $rclass",E_USER_WARNING);
603            msg("Renderer '$rname' for $mode not valid",-1);
604            return null;
605        }
606        $Renderer = new $rclass();
607    }else{
608        // Maybe a plugin/component is available?
609        list($plugin, $component) = $plugin_controller->_splitName($rname);
610        if (!$plugin_controller->isdisabled($plugin)){
611            $Renderer =& $plugin_controller->load('renderer',$rname);
612        }
613
614        if(is_null($Renderer)){
615            msg("No renderer '$rname' found for mode '$mode'",-1);
616            return null;
617        }
618    }
619
620    return $Renderer;
621}
622
623/**
624 * Gets the first heading from a file
625 *
626 * @param   string   $id       dokuwiki page id
627 * @param   bool     $render   rerender if first heading not known
628 *                             default: true  -- must be set to false for calls from the metadata renderer to
629 *                                               protects against loops and excessive resource usage when pages
630 *                                               for which only a first heading is required will attempt to
631 *                                               render metadata for all the pages for which they require first
632 *                                               headings ... and so on.
633 *
634 * @author Andreas Gohr <andi@splitbrain.org>
635 * @author Michael Hamann <michael@content-space.de>
636 */
637function p_get_first_heading($id, $render=true){
638    // counter how many titles have been requested using p_get_metadata
639    static $count = 1;
640    // the index of all titles, only loaded when many titles are requested
641    static $title_index = null;
642    // cache for titles requested using p_get_metadata
643    static $title_cache = array();
644
645    $id = cleanID($id);
646
647    // check if this title has already been requested
648    if (isset($title_cache[$id]))
649      return $title_cache[$id];
650
651    // check if already too many titles have been requested and probably
652    // using the title index is better
653    if ($count > P_GET_FIRST_HEADING_METADATA_LIMIT) {
654        if (is_null($title_index)) {
655            $pages  = array_map('rtrim', idx_getIndex('page', ''));
656            $titles = array_map('rtrim', idx_getIndex('title', ''));
657            // check for corrupt title index #FS2076
658            if(count($pages) != count($titles)){
659                $titles = array_fill(0,count($pages),'');
660                @unlink($conf['indexdir'].'/title.idx'); // will be rebuilt in inc/init.php
661            }
662            $title_index = array_combine($pages, $titles);
663        }
664        return $title_index[$id];
665    }
666
667    ++$count;
668    $title_cache[$id] = p_get_metadata($id,'title',$render);
669    return $title_cache[$id];
670}
671
672/**
673 * Wrapper for GeSHi Code Highlighter, provides caching of its output
674 *
675 * @param  string   $code       source code to be highlighted
676 * @param  string   $language   language to provide highlighting
677 * @param  string   $wrapper    html element to wrap the returned highlighted text
678 *
679 * @author Christopher Smith <chris@jalakai.co.uk>
680 * @author Andreas Gohr <andi@splitbrain.org>
681 */
682function p_xhtml_cached_geshi($code, $language, $wrapper='pre') {
683    global $conf, $config_cascade;
684    $language = strtolower($language);
685
686    // remove any leading or trailing blank lines
687    $code = preg_replace('/^\s*?\n|\s*?\n$/','',$code);
688
689    $cache = getCacheName($language.$code,".code");
690    $ctime = @filemtime($cache);
691    if($ctime && !$_REQUEST['purge'] &&
692            $ctime > filemtime(DOKU_INC.'inc/geshi.php') &&                 // geshi changed
693            $ctime > @filemtime(DOKU_INC.'inc/geshi/'.$language.'.php') &&  // language syntax definition changed
694            $ctime > filemtime(reset($config_cascade['main']['default']))){ // dokuwiki changed
695        $highlighted_code = io_readFile($cache, false);
696
697    } else {
698
699        $geshi = new GeSHi($code, $language, DOKU_INC . 'inc/geshi');
700        $geshi->set_encoding('utf-8');
701        $geshi->enable_classes();
702        $geshi->set_header_type(GESHI_HEADER_PRE);
703        $geshi->set_link_target($conf['target']['extern']);
704
705        // remove GeSHi's wrapper element (we'll replace it with our own later)
706        // we need to use a GeSHi wrapper to avoid <BR> throughout the highlighted text
707        $highlighted_code = trim(preg_replace('!^<pre[^>]*>|</pre>$!','',$geshi->parse_code()),"\n\r");
708        io_saveFile($cache,$highlighted_code);
709    }
710
711    // add a wrapper element if required
712    if ($wrapper) {
713        return "<$wrapper class=\"code $language\">$highlighted_code</$wrapper>";
714    } else {
715        return $highlighted_code;
716    }
717}
718
719