xref: /dokuwiki/inc/parserutils.php (revision 2365d73d76c039bedafc4a250fa0d584296f87d3)
1<?php
2/**
3 * Utilities for accessing the parser
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Harry Fuecks <hfuecks@gmail.com>
7 * @author     Andreas Gohr <andi@splitbrain.org>
8 */
9
10if(!defined('DOKU_INC')) die('meh.');
11require_once(DOKU_INC.'inc/confutils.php');
12require_once(DOKU_INC.'inc/pageutils.php');
13require_once(DOKU_INC.'inc/pluginutils.php');
14require_once(DOKU_INC.'inc/cache.php');
15
16/**
17 * Returns the parsed Wikitext in XHTML for the given id and revision.
18 *
19 * If $excuse is true an explanation is returned if the file
20 * wasn't found
21 *
22 * @author Andreas Gohr <andi@splitbrain.org>
23 */
24function p_wiki_xhtml($id, $rev='', $excuse=true){
25  $file = wikiFN($id,$rev);
26  $ret  = '';
27
28  //ensure $id is in global $ID (needed for parsing)
29  global $ID;
30  $keep = $ID;
31  $ID   = $id;
32
33  if($rev){
34    if(@file_exists($file)){
35      $ret = p_render('xhtml',p_get_instructions(io_readWikiPage($file,$id,$rev)),$info); //no caching on old revisions
36    }elseif($excuse){
37      $ret = p_locale_xhtml('norev');
38    }
39  }else{
40    if(@file_exists($file)){
41      $ret = p_cached_output($file,'xhtml',$id);
42    }elseif($excuse){
43      $ret = p_locale_xhtml('newpage');
44    }
45  }
46
47  //restore ID (just in case)
48  $ID = $keep;
49
50  return $ret;
51}
52
53/**
54 * Returns starting summary for a page (e.g. the first few
55 * paragraphs), marked up in XHTML.
56 *
57 * If $excuse is true an explanation is returned if the file
58 * wasn't found
59 *
60 * @param string wiki page id
61 * @param reference populated with page title from heading or page id
62 * @deprecated
63 * @author Harry Fuecks <hfuecks@gmail.com>
64 */
65function p_wiki_xhtml_summary($id, &$title, $rev='', $excuse=true){
66  $file = wikiFN($id,$rev);
67  $ret  = '';
68
69  //ensure $id is in global $ID (needed for parsing)
70  global $ID;
71  $keep = $ID;
72  $ID   = $id;
73
74  if($rev){
75    if(@file_exists($file)){
76      //no caching on old revisions
77      $ins = p_get_instructions(io_readWikiPage($file,$id,$rev));
78    }elseif($excuse){
79      $ret = p_locale_xhtml('norev');
80      //restore ID (just in case)
81      $ID = $keep;
82      return $ret;
83    }
84
85  }else{
86
87    if(@file_exists($file)){
88      // The XHTML for a summary is not cached so use the instruction cache
89      $ins = p_cached_instructions($file);
90    }elseif($excuse){
91      $ret = p_locale_xhtml('newpage');
92      //restore ID (just in case)
93      $ID = $keep;
94      return $ret;
95    }
96  }
97
98  $ret = p_render('xhtmlsummary',$ins,$info);
99
100  if ( $info['sum_pagetitle'] ) {
101    $title = $info['sum_pagetitle'];
102  } else {
103    $title = $id;
104  }
105
106  $ID = $keep;
107  return $ret;
108}
109
110/**
111 * Returns the specified local text in parsed format
112 *
113 * @author Andreas Gohr <andi@splitbrain.org>
114 */
115function p_locale_xhtml($id){
116  //fetch parsed locale
117  $html = p_cached_output(localeFN($id));
118  return $html;
119}
120
121/**
122 *     *** DEPRECATED ***
123 *
124 * use p_cached_output()
125 *
126 * Returns the given file parsed to XHTML
127 *
128 * Uses and creates a cachefile
129 *
130 * @deprecated
131 * @author Andreas Gohr <andi@splitbrain.org>
132 * @todo   rewrite to use mode instead of hardcoded XHTML
133 */
134function p_cached_xhtml($file){
135  return p_cached_output($file);
136}
137
138/**
139 * Returns the given file parsed into the requested output format
140 *
141 * @author Andreas Gohr <andi@splitbrain.org>
142 * @author Chris Smith <chris@jalakai.co.uk>
143 */
144function p_cached_output($file, $format='xhtml', $id='') {
145  global $conf;
146
147  $cache = new cache_renderer($id, $file, $format);
148  if ($cache->useCache()) {
149    $parsed = $cache->retrieveCache(false);
150    if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- cachefile {$cache->cache} used -->\n";
151  } else {
152    $parsed = p_render($format, p_cached_instructions($file,false,$id), $info);
153
154    if ($info['cache']) {
155      $cache->storeCache($parsed);               //save cachefile
156      if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- no cachefile used, but created {$cache->cache} -->\n";
157    }else{
158      $cache->removeCache();                     //try to delete cachefile
159      if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- no cachefile used, caching forbidden -->\n";
160    }
161  }
162
163  return $parsed;
164}
165
166/**
167 * Returns the render instructions for a file
168 *
169 * Uses and creates a serialized cache file
170 *
171 * @author Andreas Gohr <andi@splitbrain.org>
172 */
173function p_cached_instructions($file,$cacheonly=false,$id='') {
174  global $conf;
175  static $run = null;
176  if(is_null($run)) $run = array();
177
178  $cache = new cache_instructions($id, $file);
179
180  if ($cacheonly || $cache->useCache() || isset($run[$file])) {
181    return $cache->retrieveCache();
182  } else if (@file_exists($file)) {
183    // no cache - do some work
184    $ins = p_get_instructions(io_readWikiPage($file,$id));
185    if ($cache->storeCache($ins)) {
186      $run[$file] = true; // we won't rebuild these instructions in the same run again
187    } else {
188      msg('Unable to save cache file. Hint: disk full; file permissions; safe_mode setting.',-1);
189    }
190    return $ins;
191  }
192
193  return null;
194}
195
196/**
197 * turns a page into a list of instructions
198 *
199 * @author Harry Fuecks <hfuecks@gmail.com>
200 * @author Andreas Gohr <andi@splitbrain.org>
201 */
202function p_get_instructions($text){
203
204  $modes = p_get_parsermodes();
205
206  // Create the parser
207  $Parser = & new Doku_Parser();
208
209  // Add the Handler
210  $Parser->Handler = & new Doku_Handler();
211
212  //add modes to parser
213  foreach($modes as $mode){
214    $Parser->addMode($mode['mode'],$mode['obj']);
215  }
216
217  // Do the parsing
218  trigger_event('PARSER_WIKITEXT_PREPROCESS', $text);
219  $p = $Parser->parse($text);
220//  dbg($p);
221  return $p;
222}
223
224/**
225 * returns the metadata of a page
226 *
227 * @author Esther Brunner <esther@kaffeehaus.ch>
228 */
229function p_get_metadata($id, $key=false, $render=false){
230  global $ID, $INFO, $cache_metadata;
231
232  // cache the current page
233  // Benchmarking shows the current page's metadata is generally the only page metadata
234  // accessed several times. This may catch a few other pages, but that shouldn't be an issue.
235  $cache = ($ID == $id);
236  $meta = p_read_metadata($id, $cache);
237
238  // metadata has never been rendered before - do it! (but not for non-existent pages)
239  if ($render && !$meta['current']['description']['abstract'] && page_exists($id)){
240    $meta = p_render_metadata($id, $meta);
241    io_saveFile(metaFN($id, '.meta'), serialize($meta));
242
243    // sync cached copies, including $INFO metadata
244    if (!empty($cache_metadata[$id])) $cache_metadata[$id] = $meta;
245    if (!empty($INFO) && ($id == $INFO['id'])) { $INFO['meta'] = $meta['current']; }
246  }
247
248  // filter by $key
249  if ($key){
250    list($key, $subkey) = explode(' ', $key, 2);
251    $subkey = trim($subkey);
252
253    if ($subkey) {
254      return isset($meta['current'][$key][$subkey]) ? $meta['current'][$key][$subkey] : null;
255    }
256
257    return isset($meta['current'][$key]) ? $meta['current'][$key] : null;
258  }
259
260  return $meta['current'];
261}
262
263/**
264 * sets metadata elements of a page
265 *
266 * @author Esther Brunner <esther@kaffeehaus.ch>
267 */
268function p_set_metadata($id, $data, $render=false, $persistent=true){
269  if (!is_array($data)) return false;
270
271  global $ID;
272
273  // cache the current page
274  $cache = ($ID == $id);
275  $orig = p_read_metadata($id, $cache);
276
277  // render metadata first?
278  $meta = $render ? p_render_metadata($id, $orig) : $orig;
279
280  // now add the passed metadata
281  $protected = array('description', 'date', 'contributor');
282  foreach ($data as $key => $value){
283
284    // be careful with sub-arrays of $meta['relation']
285    if ($key == 'relation'){
286
287      foreach ($value as $subkey => $subvalue){
288        $meta['current'][$key][$subkey] = !empty($meta['current'][$key][$subkey]) ? array_merge($meta['current'][$key][$subkey], $subvalue) : $subvalue;
289        if ($persistent)
290          $meta['persistent'][$key][$subkey] = !empty($meta['persistent'][$key][$subkey]) ? array_merge($meta['persistent'][$key][$subkey], $subvalue) : $subvalue;
291      }
292
293    // be careful with some senisitive arrays of $meta
294    } elseif (in_array($key, $protected)){
295
296      // these keys, must have subkeys - a legitimate value must be an array
297      if (is_array($value)) {
298        $meta['current'][$key] = !empty($meta['current'][$key]) ? array_merge($meta['current'][$key],$value) : $value;
299
300        if ($persistent) {
301          $meta['persistent'][$key] = !empty($meta['persistent'][$key]) ? array_merge($meta['persistent'][$key],$value) : $value;
302        }
303      }
304
305    // no special treatment for the rest
306    } else {
307      $meta['current'][$key] = $value;
308      if ($persistent) $meta['persistent'][$key] = $value;
309    }
310  }
311
312  // save only if metadata changed
313  if ($meta == $orig) return true;
314
315  // sync cached copies, including $INFO metadata
316  global $cache_metadata, $INFO;
317
318  if (!empty($cache_metadata[$id])) $cache_metadata[$id] = $meta;
319  if (!empty($INFO) && ($id == $INFO['id'])) { $INFO['meta'] = $meta['current']; }
320
321  return io_saveFile(metaFN($id, '.meta'), serialize($meta));
322}
323
324/**
325 * Purges the non-persistant part of the meta data
326 * used on page deletion
327 *
328 * @author Michael Klier <chi@chimeric.de>
329 */
330function p_purge_metadata($id) {
331    $metafn = metaFN('id', '.meta');
332    $meta   = p_read_metadata($id);
333    foreach($meta['current'] as $key => $value) {
334        if(is_array($meta[$key])) {
335            $meta['current'][$key] = array();
336        } else {
337            $meta['current'][$key] = '';
338        }
339    }
340    return io_saveFile(metaFN($id, '.meta'), serialize($meta));
341}
342
343/**
344 * read the metadata from source/cache for $id
345 * (internal use only - called by p_get_metadata & p_set_metadata)
346 *
347 * this function also converts the metadata from the original format to
348 * the current format ('current' & 'persistent' arrays)
349 *
350 * @author   Christopher Smith <chris@jalakai.co.uk>
351 *
352 * @param    string   $id      absolute wiki page id
353 * @param    bool     $cache   whether or not to cache metadata in memory
354 *                             (only use for metadata likely to be accessed several times)
355 *
356 * @return   array             metadata
357 */
358function p_read_metadata($id,$cache=false) {
359  global $cache_metadata;
360
361  if (isset($cache_metadata[(string)$id])) return $cache_metadata[(string)$id];
362
363  $file = metaFN($id, '.meta');
364  $meta = @file_exists($file) ? unserialize(io_readFile($file, false)) : array('current'=>array(),'persistent'=>array());
365
366  // convert $meta from old format to new (current+persistent) format
367  if (!isset($meta['current'])) {
368    $meta = array('current'=>$meta,'persistent'=>$meta);
369
370    // remove non-persistent keys
371    unset($meta['persistent']['title']);
372    unset($meta['persistent']['description']['abstract']);
373    unset($meta['persistent']['description']['tableofcontents']);
374    unset($meta['persistent']['relation']['haspart']);
375    unset($meta['persistent']['relation']['references']);
376    unset($meta['persistent']['date']['valid']);
377
378    if (empty($meta['persistent']['description'])) unset($meta['persistent']['description']);
379    if (empty($meta['persistent']['relation'])) unset($meta['persistent']['relation']);
380    if (empty($meta['persistent']['date'])) unset($meta['persistent']['date']);
381
382    // save converted metadata
383    io_saveFile($file, serialize($meta));
384  }
385
386  if ($cache) {
387    $cache_metadata[(string)$id] = $meta;
388  }
389
390  return $meta;
391}
392
393/**
394 * renders the metadata of a page
395 *
396 * @author Esther Brunner <esther@kaffeehaus.ch>
397 */
398function p_render_metadata($id, $orig){
399  // make sure the correct ID is in global ID
400  global $ID;
401  $keep = $ID;
402  $ID   = $id;
403
404
405  // add an extra key for the event - to tell event handlers the page whose metadata this is
406  $orig['page'] = $id;
407  $evt = new Doku_Event('PARSER_METADATA_RENDER', $orig);
408  if ($evt->advise_before()) {
409
410    require_once DOKU_INC."inc/parser/metadata.php";
411
412    // get instructions
413    $instructions = p_cached_instructions(wikiFN($id),false,$id);
414    if(is_null($instructions)){
415      $ID = $keep;
416      return null; // something went wrong with the instructions
417    }
418
419    // set up the renderer
420    $renderer = & new Doku_Renderer_metadata();
421    $renderer->meta = $orig['current'];
422    $renderer->persistent = $orig['persistent'];
423
424    // loop through the instructions
425    foreach ($instructions as $instruction){
426      // execute the callback against the renderer
427      call_user_func_array(array(&$renderer, $instruction[0]), $instruction[1]);
428    }
429
430    $evt->result = array('current'=>$renderer->meta,'persistent'=>$renderer->persistent);
431  }
432  $evt->advise_after();
433
434  $ID = $keep;
435  return $evt->result;
436}
437
438/**
439 * returns all available parser syntax modes in correct order
440 *
441 * @author Andreas Gohr <andi@splitbrain.org>
442 */
443function p_get_parsermodes(){
444  global $conf;
445
446  //reuse old data
447  static $modes = null;
448  if($modes != null){
449    return $modes;
450  }
451
452  //import parser classes and mode definitions
453  require_once DOKU_INC . 'inc/parser/parser.php';
454
455  // we now collect all syntax modes and their objects, then they will
456  // be sorted and added to the parser in correct order
457  $modes = array();
458
459  // add syntax plugins
460  $pluginlist = plugin_list('syntax');
461  if(count($pluginlist)){
462    global $PARSER_MODES;
463    $obj = null;
464    foreach($pluginlist as $p){
465      if(!$obj =& plugin_load('syntax',$p)) continue; //attempt to load plugin into $obj
466      $PARSER_MODES[$obj->getType()][] = "plugin_$p"; //register mode type
467      //add to modes
468      $modes[] = array(
469                   'sort' => $obj->getSort(),
470                   'mode' => "plugin_$p",
471                   'obj'  => $obj,
472                 );
473      unset($obj); //remove the reference
474    }
475  }
476
477  // add default modes
478  $std_modes = array('listblock','preformatted','notoc','nocache',
479                     'header','table','linebreak','footnote','hr',
480                     'unformatted','php','html','code','file','quote',
481                     'internallink','rss','media','externallink',
482                     'emaillink','windowssharelink','eol');
483  if($conf['typography']){
484    $std_modes[] = 'quotes';
485    $std_modes[] = 'multiplyentity';
486  }
487  foreach($std_modes as $m){
488    $class = "Doku_Parser_Mode_$m";
489    $obj   = new $class();
490    $modes[] = array(
491                 'sort' => $obj->getSort(),
492                 'mode' => $m,
493                 'obj'  => $obj
494               );
495  }
496
497  // add formatting modes
498  $fmt_modes = array('strong','emphasis','underline','monospace',
499                     'subscript','superscript','deleted');
500  foreach($fmt_modes as $m){
501    $obj   = new Doku_Parser_Mode_formatting($m);
502    $modes[] = array(
503                 'sort' => $obj->getSort(),
504                 'mode' => $m,
505                 'obj'  => $obj
506               );
507  }
508
509  // add modes which need files
510  $obj     = new Doku_Parser_Mode_smiley(array_keys(getSmileys()));
511  $modes[] = array('sort' => $obj->getSort(), 'mode' => 'smiley','obj'  => $obj );
512  $obj     = new Doku_Parser_Mode_acronym(array_keys(getAcronyms()));
513  $modes[] = array('sort' => $obj->getSort(), 'mode' => 'acronym','obj'  => $obj );
514  $obj     = new Doku_Parser_Mode_entity(array_keys(getEntities()));
515  $modes[] = array('sort' => $obj->getSort(), 'mode' => 'entity','obj'  => $obj );
516
517
518  // add optional camelcase mode
519  if($conf['camelcase']){
520    $obj     = new Doku_Parser_Mode_camelcaselink();
521    $modes[] = array('sort' => $obj->getSort(), 'mode' => 'camelcaselink','obj'  => $obj );
522  }
523
524  //sort modes
525  usort($modes,'p_sort_modes');
526
527  return $modes;
528}
529
530/**
531 * Callback function for usort
532 *
533 * @author Andreas Gohr <andi@splitbrain.org>
534 */
535function p_sort_modes($a, $b){
536  if($a['sort'] == $b['sort']) return 0;
537  return ($a['sort'] < $b['sort']) ? -1 : 1;
538}
539
540/**
541 * Renders a list of instruction to the specified output mode
542 *
543 * In the $info array are informations from the renderer returned
544 *
545 * @author Harry Fuecks <hfuecks@gmail.com>
546 * @author Andreas Gohr <andi@splitbrain.org>
547 */
548function p_render($mode,$instructions,&$info){
549  if(is_null($instructions)) return '';
550
551  $Renderer =& p_get_renderer($mode);
552  if (is_null($Renderer)) return null;
553
554  $Renderer->reset();
555
556  $Renderer->smileys = getSmileys();
557  $Renderer->entities = getEntities();
558  $Renderer->acronyms = getAcronyms();
559  $Renderer->interwiki = getInterwiki();
560  #$Renderer->badwords = getBadWords();
561
562  // Loop through the instructions
563  foreach ( $instructions as $instruction ) {
564      // Execute the callback against the Renderer
565      call_user_func_array(array(&$Renderer, $instruction[0]),$instruction[1]);
566  }
567
568  //set info array
569  $info = $Renderer->info;
570
571  // Post process and return the output
572  $data = array($mode,& $Renderer->doc);
573  trigger_event('RENDERER_CONTENT_POSTPROCESS',$data);
574  return $Renderer->doc;
575}
576
577function & p_get_renderer($mode) {
578  global $conf;
579
580  $rname = !empty($conf['renderer_'.$mode]) ? $conf['renderer_'.$mode] : $mode;
581
582  // try default renderer first:
583  $file = DOKU_INC."inc/parser/$rname.php";
584  if(@file_exists($file)){
585    require_once $file;
586    $rclass = "Doku_Renderer_$rname";
587
588    if ( !class_exists($rclass) ) {
589      trigger_error("Unable to resolve render class $rclass",E_USER_WARNING);
590      msg("Renderer '$rname' for $mode not valid",-1);
591      return null;
592    }
593    $Renderer = & new $rclass();
594  }else{
595    // Maybe a plugin is available?
596    $Renderer =& plugin_load('renderer',$rname, true);
597    if(is_null($Renderer)){
598      msg("No renderer '$rname' found for mode '$mode'",-1);
599      return null;
600    }
601  }
602
603  return $Renderer;
604}
605
606/**
607 * Gets the first heading from a file
608 *
609 * @param   string   $id       dokuwiki page id
610 * @param   bool     $render   rerender if first heading not known
611 *                             default: true  -- must be set to false for calls from the metadata renderer to
612 *                                               protects against loops and excessive resource usage when pages
613 *                                               for which only a first heading is required will attempt to
614 *                                               render metadata for all the pages for which they require first
615 *                                               headings ... and so on.
616 *
617 * @author Andreas Gohr <andi@splitbrain.org>
618 */
619function p_get_first_heading($id, $render=true){
620  return p_get_metadata($id,'title',$render);
621}
622
623/**
624 * Wrapper for GeSHi Code Highlighter, provides caching of its output
625 *
626 * @param  string   $code       source code to be highlighted
627 * @param  string   $language   language to provide highlighting
628 * @param  string   $wrapper    html element to wrap the returned highlighted text
629 *
630 * @author Christopher Smith <chris@jalakai.co.uk>
631 * @author Andreas Gohr <andi@splitbrain.org>
632 */
633function p_xhtml_cached_geshi($code, $language, $wrapper='pre') {
634  global $conf;
635  $language = strtolower($language);
636
637  // remove any leading or trailing blank lines
638  $code = preg_replace('/^\s*?\n|\s*?\n$/','',$code);
639
640  $cache = getCacheName($language.$code,".code");
641  $ctime = @filemtime($cache);
642  if($ctime && !$_REQUEST['purge'] &&
643     $ctime > filemtime(DOKU_INC.'inc/geshi.php') &&
644     $ctime > @filemtime(DOKU_INC.'inc/geshi/'.$language.'.php') &&
645     $ctime > filemtime(DOKU_CONF.'dokuwiki.php')){
646    $highlighted_code = io_readFile($cache, false);
647
648  } else {
649
650    require_once(DOKU_INC . 'inc/geshi.php');
651
652    $geshi = new GeSHi($code, $language, DOKU_INC . 'inc/geshi');
653    $geshi->set_encoding('utf-8');
654    $geshi->enable_classes();
655    $geshi->set_header_type(GESHI_HEADER_PRE);
656    $geshi->set_link_target($conf['target']['extern']);
657
658    // remove GeSHi's wrapper element (we'll replace it with our own later)
659    // we need to use a GeSHi wrapper to avoid <BR> throughout the highlighted text
660    $highlighted_code = preg_replace('!^<pre[^>]*>|</pre>$!','',$geshi->parse_code());
661    io_saveFile($cache,$highlighted_code);
662  }
663
664  // add a wrapper element if required
665  if ($wrapper) {
666    return "<$wrapper class=\"code $language\">$highlighted_code</$wrapper>";
667  } else {
668    return $highlighted_code;
669  }
670}
671
672//Setup VIM: ex: et ts=2 enc=utf-8 :
673