xref: /dokuwiki/inc/parserutils.php (revision 52b35d3ab15e4a042a2e1c5c8c5766fc76c534af)
1<?php
2/**
3 * Utilities for accessing the parser
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Harry Fuecks <hfuecks@gmail.com>
7 * @author     Andreas Gohr <andi@splitbrain.org>
8 */
9
10if(!defined('DOKU_INC')) die('meh.');
11require_once(DOKU_INC.'inc/confutils.php');
12require_once(DOKU_INC.'inc/pageutils.php');
13require_once(DOKU_INC.'inc/pluginutils.php');
14require_once(DOKU_INC.'inc/cache.php');
15
16/**
17 * Returns the parsed Wikitext in XHTML for the given id and revision.
18 *
19 * If $excuse is true an explanation is returned if the file
20 * wasn't found
21 *
22 * @author Andreas Gohr <andi@splitbrain.org>
23 */
24function p_wiki_xhtml($id, $rev='', $excuse=true){
25    $file = wikiFN($id,$rev);
26    $ret  = '';
27
28    //ensure $id is in global $ID (needed for parsing)
29    global $ID;
30    $keep = $ID;
31    $ID   = $id;
32
33    if($rev){
34        if(@file_exists($file)){
35            $ret = p_render('xhtml',p_get_instructions(io_readWikiPage($file,$id,$rev)),$info); //no caching on old revisions
36        }elseif($excuse){
37            $ret = p_locale_xhtml('norev');
38        }
39    }else{
40        if(@file_exists($file)){
41            $ret = p_cached_output($file,'xhtml',$id);
42        }elseif($excuse){
43            $ret = p_locale_xhtml('newpage');
44        }
45    }
46
47    //restore ID (just in case)
48    $ID = $keep;
49
50    return $ret;
51}
52
53/**
54 * Returns starting summary for a page (e.g. the first few
55 * paragraphs), marked up in XHTML.
56 *
57 * If $excuse is true an explanation is returned if the file
58 * wasn't found
59 *
60 * @param string wiki page id
61 * @param reference populated with page title from heading or page id
62 * @deprecated
63 * @author Harry Fuecks <hfuecks@gmail.com>
64 */
65function p_wiki_xhtml_summary($id, &$title, $rev='', $excuse=true){
66    $file = wikiFN($id,$rev);
67    $ret  = '';
68
69    //ensure $id is in global $ID (needed for parsing)
70    global $ID;
71    $keep = $ID;
72    $ID   = $id;
73
74    if($rev){
75        if(@file_exists($file)){
76            //no caching on old revisions
77            $ins = p_get_instructions(io_readWikiPage($file,$id,$rev));
78        }elseif($excuse){
79            $ret = p_locale_xhtml('norev');
80            //restore ID (just in case)
81            $ID = $keep;
82            return $ret;
83        }
84
85    }else{
86
87        if(@file_exists($file)){
88            // The XHTML for a summary is not cached so use the instruction cache
89            $ins = p_cached_instructions($file);
90        }elseif($excuse){
91            $ret = p_locale_xhtml('newpage');
92            //restore ID (just in case)
93            $ID = $keep;
94            return $ret;
95        }
96    }
97
98    $ret = p_render('xhtmlsummary',$ins,$info);
99
100    if ( $info['sum_pagetitle'] ) {
101        $title = $info['sum_pagetitle'];
102    } else {
103        $title = $id;
104    }
105
106    $ID = $keep;
107    return $ret;
108}
109
110/**
111 * Returns the specified local text in parsed format
112 *
113 * @author Andreas Gohr <andi@splitbrain.org>
114 */
115function p_locale_xhtml($id){
116    //fetch parsed locale
117    $html = p_cached_output(localeFN($id));
118    return $html;
119}
120
121/**
122 *     *** DEPRECATED ***
123 *
124 * use p_cached_output()
125 *
126 * Returns the given file parsed to XHTML
127 *
128 * Uses and creates a cachefile
129 *
130 * @deprecated
131 * @author Andreas Gohr <andi@splitbrain.org>
132 * @todo   rewrite to use mode instead of hardcoded XHTML
133 */
134function p_cached_xhtml($file){
135    return p_cached_output($file);
136}
137
138/**
139 * Returns the given file parsed into the requested output format
140 *
141 * @author Andreas Gohr <andi@splitbrain.org>
142 * @author Chris Smith <chris@jalakai.co.uk>
143 */
144function p_cached_output($file, $format='xhtml', $id='') {
145    global $conf;
146
147    $cache = new cache_renderer($id, $file, $format);
148    if ($cache->useCache()) {
149        $parsed = $cache->retrieveCache(false);
150        if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- cachefile {$cache->cache} used -->\n";
151    } else {
152        $parsed = p_render($format, p_cached_instructions($file,false,$id), $info);
153
154        if ($info['cache']) {
155            $cache->storeCache($parsed);               //save cachefile
156            if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- no cachefile used, but created {$cache->cache} -->\n";
157        }else{
158            $cache->removeCache();                     //try to delete cachefile
159            if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- no cachefile used, caching forbidden -->\n";
160        }
161    }
162
163    return $parsed;
164}
165
166/**
167 * Returns the render instructions for a file
168 *
169 * Uses and creates a serialized cache file
170 *
171 * @author Andreas Gohr <andi@splitbrain.org>
172 */
173function p_cached_instructions($file,$cacheonly=false,$id='') {
174    global $conf;
175    static $run = null;
176    if(is_null($run)) $run = array();
177
178    $cache = new cache_instructions($id, $file);
179
180    if ($cacheonly || $cache->useCache() || isset($run[$file])) {
181        return $cache->retrieveCache();
182    } else if (@file_exists($file)) {
183        // no cache - do some work
184        $ins = p_get_instructions(io_readWikiPage($file,$id));
185        if ($cache->storeCache($ins)) {
186            $run[$file] = true; // we won't rebuild these instructions in the same run again
187        } else {
188            msg('Unable to save cache file. Hint: disk full; file permissions; safe_mode setting.',-1);
189        }
190        return $ins;
191    }
192
193    return null;
194}
195
196/**
197 * turns a page into a list of instructions
198 *
199 * @author Harry Fuecks <hfuecks@gmail.com>
200 * @author Andreas Gohr <andi@splitbrain.org>
201 */
202function p_get_instructions($text){
203
204    $modes = p_get_parsermodes();
205
206    // Create the parser
207    $Parser = new Doku_Parser();
208
209    // Add the Handler
210    $Parser->Handler = new Doku_Handler();
211
212    //add modes to parser
213    foreach($modes as $mode){
214        $Parser->addMode($mode['mode'],$mode['obj']);
215    }
216
217    // Do the parsing
218    trigger_event('PARSER_WIKITEXT_PREPROCESS', $text);
219    $p = $Parser->parse($text);
220    //  dbg($p);
221    return $p;
222}
223
224/**
225 * returns the metadata of a page
226 *
227 * @author Esther Brunner <esther@kaffeehaus.ch>
228 */
229function p_get_metadata($id, $key=false, $render=false){
230    global $ID, $INFO, $cache_metadata;
231
232    // cache the current page
233    // Benchmarking shows the current page's metadata is generally the only page metadata
234    // accessed several times. This may catch a few other pages, but that shouldn't be an issue.
235    $cache = ($ID == $id);
236    $meta = p_read_metadata($id, $cache);
237
238    // metadata has never been rendered before - do it! (but not for non-existent pages)
239    if ($render && !isset($meta['current']['description']['abstract']) && page_exists($id)){
240        $meta = p_render_metadata($id, $meta);
241        io_saveFile(metaFN($id, '.meta'), serialize($meta));
242
243        // sync cached copies, including $INFO metadata
244        if (!empty($cache_metadata[$id])) $cache_metadata[$id] = $meta;
245        if (!empty($INFO) && ($id == $INFO['id'])) { $INFO['meta'] = $meta['current']; }
246    }
247
248    // filter by $key
249    if ($key){
250        list($key, $subkey) = explode(' ', $key, 2);
251        $subkey = trim($subkey);
252
253        if ($subkey) {
254            return isset($meta['current'][$key][$subkey]) ? $meta['current'][$key][$subkey] : null;
255        }
256
257        return isset($meta['current'][$key]) ? $meta['current'][$key] : null;
258    }
259
260    return $meta['current'];
261}
262
263/**
264 * sets metadata elements of a page
265 *
266 * @author Esther Brunner <esther@kaffeehaus.ch>
267 */
268function p_set_metadata($id, $data, $render=false, $persistent=true){
269    if (!is_array($data)) return false;
270
271    global $ID;
272
273    // cache the current page
274    $cache = ($ID == $id);
275    $orig = p_read_metadata($id, $cache);
276
277    // render metadata first?
278    $meta = $render ? p_render_metadata($id, $orig) : $orig;
279
280    // now add the passed metadata
281    $protected = array('description', 'date', 'contributor');
282    foreach ($data as $key => $value){
283
284        // be careful with sub-arrays of $meta['relation']
285        if ($key == 'relation'){
286
287            foreach ($value as $subkey => $subvalue){
288                $meta['current'][$key][$subkey] = !empty($meta['current'][$key][$subkey]) ? array_merge($meta['current'][$key][$subkey], $subvalue) : $subvalue;
289                if ($persistent)
290                    $meta['persistent'][$key][$subkey] = !empty($meta['persistent'][$key][$subkey]) ? array_merge($meta['persistent'][$key][$subkey], $subvalue) : $subvalue;
291            }
292
293            // be careful with some senisitive arrays of $meta
294        } elseif (in_array($key, $protected)){
295
296            // these keys, must have subkeys - a legitimate value must be an array
297            if (is_array($value)) {
298                $meta['current'][$key] = !empty($meta['current'][$key]) ? array_merge($meta['current'][$key],$value) : $value;
299
300                if ($persistent) {
301                    $meta['persistent'][$key] = !empty($meta['persistent'][$key]) ? array_merge($meta['persistent'][$key],$value) : $value;
302                }
303            }
304
305            // no special treatment for the rest
306        } else {
307            $meta['current'][$key] = $value;
308            if ($persistent) $meta['persistent'][$key] = $value;
309        }
310    }
311
312    // save only if metadata changed
313    if ($meta == $orig) return true;
314
315    // sync cached copies, including $INFO metadata
316    global $cache_metadata, $INFO;
317
318    if (!empty($cache_metadata[$id])) $cache_metadata[$id] = $meta;
319    if (!empty($INFO) && ($id == $INFO['id'])) { $INFO['meta'] = $meta['current']; }
320
321    return io_saveFile(metaFN($id, '.meta'), serialize($meta));
322}
323
324/**
325 * Purges the non-persistant part of the meta data
326 * used on page deletion
327 *
328 * @author Michael Klier <chi@chimeric.de>
329 */
330function p_purge_metadata($id) {
331    $metafn = metaFN('id', '.meta');
332    $meta   = p_read_metadata($id);
333    foreach($meta['current'] as $key => $value) {
334        if(is_array($meta[$key])) {
335            $meta['current'][$key] = array();
336        } else {
337            $meta['current'][$key] = '';
338        }
339    }
340    return io_saveFile(metaFN($id, '.meta'), serialize($meta));
341}
342
343/**
344 * read the metadata from source/cache for $id
345 * (internal use only - called by p_get_metadata & p_set_metadata)
346 *
347 * this function also converts the metadata from the original format to
348 * the current format ('current' & 'persistent' arrays)
349 *
350 * @author   Christopher Smith <chris@jalakai.co.uk>
351 *
352 * @param    string   $id      absolute wiki page id
353 * @param    bool     $cache   whether or not to cache metadata in memory
354 *                             (only use for metadata likely to be accessed several times)
355 *
356 * @return   array             metadata
357 */
358function p_read_metadata($id,$cache=false) {
359    global $cache_metadata;
360
361    if (isset($cache_metadata[(string)$id])) return $cache_metadata[(string)$id];
362
363    $file = metaFN($id, '.meta');
364    $meta = @file_exists($file) ? unserialize(io_readFile($file, false)) : array('current'=>array(),'persistent'=>array());
365
366    // convert $meta from old format to new (current+persistent) format
367    if (!isset($meta['current'])) {
368        $meta = array('current'=>$meta,'persistent'=>$meta);
369
370        // remove non-persistent keys
371        unset($meta['persistent']['title']);
372        unset($meta['persistent']['description']['abstract']);
373        unset($meta['persistent']['description']['tableofcontents']);
374        unset($meta['persistent']['relation']['haspart']);
375        unset($meta['persistent']['relation']['references']);
376        unset($meta['persistent']['date']['valid']);
377
378        if (empty($meta['persistent']['description'])) unset($meta['persistent']['description']);
379        if (empty($meta['persistent']['relation'])) unset($meta['persistent']['relation']);
380        if (empty($meta['persistent']['date'])) unset($meta['persistent']['date']);
381
382        // save converted metadata
383        io_saveFile($file, serialize($meta));
384    }
385
386    if ($cache) {
387        $cache_metadata[(string)$id] = $meta;
388    }
389
390    return $meta;
391}
392
393/**
394 * renders the metadata of a page
395 *
396 * @author Esther Brunner <esther@kaffeehaus.ch>
397 */
398function p_render_metadata($id, $orig){
399    // make sure the correct ID is in global ID
400    global $ID;
401    $keep = $ID;
402    $ID   = $id;
403
404    // add an extra key for the event - to tell event handlers the page whose metadata this is
405    $orig['page'] = $id;
406    $evt = new Doku_Event('PARSER_METADATA_RENDER', $orig);
407    if ($evt->advise_before()) {
408
409        require_once DOKU_INC."inc/parser/metadata.php";
410
411        // get instructions
412        $instructions = p_cached_instructions(wikiFN($id),false,$id);
413        if(is_null($instructions)){
414            $ID = $keep;
415            return null; // something went wrong with the instructions
416        }
417
418        // set up the renderer
419        $renderer = new Doku_Renderer_metadata();
420        $renderer->meta = $orig['current'];
421        $renderer->persistent = $orig['persistent'];
422
423        // loop through the instructions
424        foreach ($instructions as $instruction){
425            // execute the callback against the renderer
426            call_user_func_array(array(&$renderer, $instruction[0]), (array) $instruction[1]);
427        }
428
429        $evt->result = array('current'=>$renderer->meta,'persistent'=>$renderer->persistent);
430    }
431    $evt->advise_after();
432
433    $ID = $keep;
434    return $evt->result;
435}
436
437/**
438 * returns all available parser syntax modes in correct order
439 *
440 * @author Andreas Gohr <andi@splitbrain.org>
441 */
442function p_get_parsermodes(){
443    global $conf;
444
445    //reuse old data
446    static $modes = null;
447    if($modes != null){
448        return $modes;
449    }
450
451    //import parser classes and mode definitions
452    require_once DOKU_INC . 'inc/parser/parser.php';
453
454    // we now collect all syntax modes and their objects, then they will
455    // be sorted and added to the parser in correct order
456    $modes = array();
457
458    // add syntax plugins
459    $pluginlist = plugin_list('syntax');
460    if(count($pluginlist)){
461        global $PARSER_MODES;
462        $obj = null;
463        foreach($pluginlist as $p){
464            if(!$obj =& plugin_load('syntax',$p)) continue; //attempt to load plugin into $obj
465            $PARSER_MODES[$obj->getType()][] = "plugin_$p"; //register mode type
466            //add to modes
467            $modes[] = array(
468                    'sort' => $obj->getSort(),
469                    'mode' => "plugin_$p",
470                    'obj'  => $obj,
471                    );
472            unset($obj); //remove the reference
473        }
474    }
475
476    // add default modes
477    $std_modes = array('listblock','preformatted','notoc','nocache',
478            'header','table','linebreak','footnote','hr',
479            'unformatted','php','html','code','file','quote',
480            'internallink','rss','media','externallink',
481            'emaillink','windowssharelink','eol');
482    if($conf['typography']){
483        $std_modes[] = 'quotes';
484        $std_modes[] = 'multiplyentity';
485    }
486    foreach($std_modes as $m){
487        $class = "Doku_Parser_Mode_$m";
488        $obj   = new $class();
489        $modes[] = array(
490                'sort' => $obj->getSort(),
491                'mode' => $m,
492                'obj'  => $obj
493                );
494    }
495
496    // add formatting modes
497    $fmt_modes = array('strong','emphasis','underline','monospace',
498            'subscript','superscript','deleted');
499    foreach($fmt_modes as $m){
500        $obj   = new Doku_Parser_Mode_formatting($m);
501        $modes[] = array(
502                'sort' => $obj->getSort(),
503                'mode' => $m,
504                'obj'  => $obj
505                );
506    }
507
508    // add modes which need files
509    $obj     = new Doku_Parser_Mode_smiley(array_keys(getSmileys()));
510    $modes[] = array('sort' => $obj->getSort(), 'mode' => 'smiley','obj'  => $obj );
511    $obj     = new Doku_Parser_Mode_acronym(array_keys(getAcronyms()));
512    $modes[] = array('sort' => $obj->getSort(), 'mode' => 'acronym','obj'  => $obj );
513    $obj     = new Doku_Parser_Mode_entity(array_keys(getEntities()));
514    $modes[] = array('sort' => $obj->getSort(), 'mode' => 'entity','obj'  => $obj );
515
516    // add optional camelcase mode
517    if($conf['camelcase']){
518        $obj     = new Doku_Parser_Mode_camelcaselink();
519        $modes[] = array('sort' => $obj->getSort(), 'mode' => 'camelcaselink','obj'  => $obj );
520    }
521
522    //sort modes
523    usort($modes,'p_sort_modes');
524
525    return $modes;
526}
527
528/**
529 * Callback function for usort
530 *
531 * @author Andreas Gohr <andi@splitbrain.org>
532 */
533function p_sort_modes($a, $b){
534    if($a['sort'] == $b['sort']) return 0;
535    return ($a['sort'] < $b['sort']) ? -1 : 1;
536}
537
538/**
539 * Renders a list of instruction to the specified output mode
540 *
541 * In the $info array are informations from the renderer returned
542 *
543 * @author Harry Fuecks <hfuecks@gmail.com>
544 * @author Andreas Gohr <andi@splitbrain.org>
545 */
546function p_render($mode,$instructions,&$info){
547    if(is_null($instructions)) return '';
548
549    $Renderer =& p_get_renderer($mode);
550    if (is_null($Renderer)) return null;
551
552    $Renderer->reset();
553
554    $Renderer->smileys = getSmileys();
555    $Renderer->entities = getEntities();
556    $Renderer->acronyms = getAcronyms();
557    $Renderer->interwiki = getInterwiki();
558
559    // Loop through the instructions
560    foreach ( $instructions as $instruction ) {
561        // Execute the callback against the Renderer
562        call_user_func_array(array(&$Renderer, $instruction[0]),$instruction[1]);
563    }
564
565    //set info array
566    $info = $Renderer->info;
567
568    // Post process and return the output
569    $data = array($mode,& $Renderer->doc);
570    trigger_event('RENDERER_CONTENT_POSTPROCESS',$data);
571    return $Renderer->doc;
572}
573
574function & p_get_renderer($mode) {
575    global $conf, $plugin_controller;
576
577    $rname = !empty($conf['renderer_'.$mode]) ? $conf['renderer_'.$mode] : $mode;
578
579    // try default renderer first:
580    $file = DOKU_INC."inc/parser/$rname.php";
581    if(@file_exists($file)){
582        require_once $file;
583        $rclass = "Doku_Renderer_$rname";
584
585        if ( !class_exists($rclass) ) {
586            trigger_error("Unable to resolve render class $rclass",E_USER_WARNING);
587            msg("Renderer '$rname' for $mode not valid",-1);
588            return null;
589        }
590        $Renderer = new $rclass();
591    }else{
592        // Maybe a plugin/component is available?
593        list($plugin, $component) = $plugin_controller->_splitName($rname);
594        if (!$plugin_controller->isdisabled($plugin)){
595            $Renderer =& $plugin_controller->load('renderer',$rname, true);
596        }
597
598        if(is_null($Renderer)){
599            msg("No renderer '$rname' found for mode '$mode'",-1);
600            return null;
601        }
602    }
603
604    return $Renderer;
605}
606
607/**
608 * Gets the first heading from a file
609 *
610 * @param   string   $id       dokuwiki page id
611 * @param   bool     $render   rerender if first heading not known
612 *                             default: true  -- must be set to false for calls from the metadata renderer to
613 *                                               protects against loops and excessive resource usage when pages
614 *                                               for which only a first heading is required will attempt to
615 *                                               render metadata for all the pages for which they require first
616 *                                               headings ... and so on.
617 *
618 * @author Andreas Gohr <andi@splitbrain.org>
619 */
620function p_get_first_heading($id, $render=true){
621    return p_get_metadata($id,'title',$render);
622}
623
624/**
625 * Wrapper for GeSHi Code Highlighter, provides caching of its output
626 *
627 * @param  string   $code       source code to be highlighted
628 * @param  string   $language   language to provide highlighting
629 * @param  string   $wrapper    html element to wrap the returned highlighted text
630 *
631 * @author Christopher Smith <chris@jalakai.co.uk>
632 * @author Andreas Gohr <andi@splitbrain.org>
633 */
634function p_xhtml_cached_geshi($code, $language, $wrapper='pre') {
635    global $conf, $config_cascade;
636    $language = strtolower($language);
637
638    // remove any leading or trailing blank lines
639    $code = preg_replace('/^\s*?\n|\s*?\n$/','',$code);
640
641    $cache = getCacheName($language.$code,".code");
642    $ctime = @filemtime($cache);
643    if($ctime && !$_REQUEST['purge'] &&
644            $ctime > filemtime(DOKU_INC.'inc/geshi.php') &&                 // geshi changed
645            $ctime > @filemtime(DOKU_INC.'inc/geshi/'.$language.'.php') &&  // language syntax definition changed
646            $ctime > filemtime(reset($config_cascade['main']['default']))){ // dokuwiki changed
647        $highlighted_code = io_readFile($cache, false);
648
649    } else {
650
651        require_once(DOKU_INC . 'inc/geshi.php');
652
653        $geshi = new GeSHi($code, $language, DOKU_INC . 'inc/geshi');
654        $geshi->set_encoding('utf-8');
655        $geshi->enable_classes();
656        $geshi->set_header_type(GESHI_HEADER_PRE);
657        $geshi->set_link_target($conf['target']['extern']);
658
659        // remove GeSHi's wrapper element (we'll replace it with our own later)
660        // we need to use a GeSHi wrapper to avoid <BR> throughout the highlighted text
661        $highlighted_code = trim(preg_replace('!^<pre[^>]*>|</pre>$!','',$geshi->parse_code()),"\n\r");
662        io_saveFile($cache,$highlighted_code);
663    }
664
665    // add a wrapper element if required
666    if ($wrapper) {
667        return "<$wrapper class=\"code $language\">$highlighted_code</$wrapper>";
668    } else {
669        return $highlighted_code;
670    }
671}
672
673