xref: /dokuwiki/inc/parserutils.php (revision e3f5b6f8aa3a3c13c846aa9ad7e0a1e1c0d59344)
1<?php
2/**
3 * Utilities for accessing the parser
4 *
5 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author     Harry Fuecks <hfuecks@gmail.com>
7 * @author     Andreas Gohr <andi@splitbrain.org>
8 */
9
10  if(!defined('DOKU_INC')) define('DOKU_INC',fullpath(dirname(__FILE__).'/../').'/');
11
12  require_once(DOKU_INC.'inc/confutils.php');
13  require_once(DOKU_INC.'inc/pageutils.php');
14  require_once(DOKU_INC.'inc/pluginutils.php');
15  require_once(DOKU_INC.'inc/cache.php');
16
17/**
18 * Returns the parsed Wikitext in XHTML for the given id and revision.
19 *
20 * If $excuse is true an explanation is returned if the file
21 * wasn't found
22 *
23 * @author Andreas Gohr <andi@splitbrain.org>
24 */
25function p_wiki_xhtml($id, $rev='', $excuse=true){
26  $file = wikiFN($id,$rev);
27  $ret  = '';
28
29  //ensure $id is in global $ID (needed for parsing)
30  global $ID;
31  $keep = $ID;
32  $ID   = $id;
33
34  if($rev){
35    if(@file_exists($file)){
36      $ret = p_render('xhtml',p_get_instructions(io_readWikiPage($file,$id,$rev)),$info); //no caching on old revisions
37    }elseif($excuse){
38      $ret = p_locale_xhtml('norev');
39    }
40  }else{
41    if(@file_exists($file)){
42      $ret = p_cached_output($file,'xhtml',$id);
43    }elseif($excuse){
44      $ret = p_locale_xhtml('newpage');
45    }
46  }
47
48  //restore ID (just in case)
49  $ID = $keep;
50
51  return $ret;
52}
53
54/**
55 * Returns starting summary for a page (e.g. the first few
56 * paragraphs), marked up in XHTML.
57 *
58 * If $excuse is true an explanation is returned if the file
59 * wasn't found
60 *
61 * @param string wiki page id
62 * @param reference populated with page title from heading or page id
63 * @deprecated
64 * @author Harry Fuecks <hfuecks@gmail.com>
65 */
66function p_wiki_xhtml_summary($id, &$title, $rev='', $excuse=true){
67  $file = wikiFN($id,$rev);
68  $ret  = '';
69
70  //ensure $id is in global $ID (needed for parsing)
71  global $ID;
72  $keep = $ID;
73  $ID   = $id;
74
75  if($rev){
76    if(@file_exists($file)){
77      //no caching on old revisions
78      $ins = p_get_instructions(io_readWikiPage($file,$id,$rev));
79    }elseif($excuse){
80      $ret = p_locale_xhtml('norev');
81      //restore ID (just in case)
82      $ID = $keep;
83      return $ret;
84    }
85
86  }else{
87
88    if(@file_exists($file)){
89      // The XHTML for a summary is not cached so use the instruction cache
90      $ins = p_cached_instructions($file);
91    }elseif($excuse){
92      $ret = p_locale_xhtml('newpage');
93      //restore ID (just in case)
94      $ID = $keep;
95      return $ret;
96    }
97  }
98
99  $ret = p_render('xhtmlsummary',$ins,$info);
100
101  if ( $info['sum_pagetitle'] ) {
102    $title = $info['sum_pagetitle'];
103  } else {
104    $title = $id;
105  }
106
107  $ID = $keep;
108  return $ret;
109}
110
111/**
112 * Returns the specified local text in parsed format
113 *
114 * @author Andreas Gohr <andi@splitbrain.org>
115 */
116function p_locale_xhtml($id){
117  //fetch parsed locale
118  $html = p_cached_output(localeFN($id));
119  return $html;
120}
121
122/**
123 *     *** DEPRECATED ***
124 *
125 * use p_cached_output()
126 *
127 * Returns the given file parsed to XHTML
128 *
129 * Uses and creates a cachefile
130 *
131 * @deprecated
132 * @author Andreas Gohr <andi@splitbrain.org>
133 * @todo   rewrite to use mode instead of hardcoded XHTML
134 */
135function p_cached_xhtml($file){
136  return p_cached_output($file);
137}
138
139/**
140 * Returns the given file parsed into the requested output format
141 *
142 * @author Andreas Gohr <andi@splitbrain.org>
143 * @author Chris Smith <chris@jalakai.co.uk>
144 */
145function p_cached_output($file, $format='xhtml', $id='') {
146  global $conf;
147
148  $cache = new cache_renderer($id, $file, $format);
149  if ($cache->useCache()) {
150    $parsed = $cache->retrieveCache(false);
151    if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- cachefile {$cache->cache} used -->\n";
152  } else {
153    $parsed = p_render($format, p_cached_instructions($file,false,$id), $info);
154
155    if ($info['cache']) {
156      $cache->storeCache($parsed);               //save cachefile
157      if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- no cachefile used, but created {$cache->cache} -->\n";
158    }else{
159      $cache->removeCache();                     //try to delete cachefile
160      if($conf['allowdebug'] && $format=='xhtml') $parsed .= "\n<!-- no cachefile used, caching forbidden -->\n";
161    }
162  }
163
164  return $parsed;
165}
166
167/**
168 * Returns the render instructions for a file
169 *
170 * Uses and creates a serialized cache file
171 *
172 * @author Andreas Gohr <andi@splitbrain.org>
173 */
174function p_cached_instructions($file,$cacheonly=false,$id='') {
175  global $conf;
176  static $run = null;
177  if(is_null($run)) $run = array();
178
179  $cache = new cache_instructions($id, $file);
180
181  if ($cacheonly || $cache->useCache() || isset($run[$file])) {
182    return $cache->retrieveCache();
183  } else if (@file_exists($file)) {
184    // no cache - do some work
185    $ins = p_get_instructions(io_readWikiPage($file,$id));
186    if ($cache->storeCache($ins)) {
187      $run[$file] = true; // we won't rebuild these instructions in the same run again
188    } else {
189      msg('Unable to save cache file. Hint: disk full; file permissions; safe_mode setting.',-1);
190    }
191    return $ins;
192  }
193
194  return null;
195}
196
197/**
198 * turns a page into a list of instructions
199 *
200 * @author Harry Fuecks <hfuecks@gmail.com>
201 * @author Andreas Gohr <andi@splitbrain.org>
202 */
203function p_get_instructions($text){
204
205  $modes = p_get_parsermodes();
206
207  // Create the parser
208  $Parser = & new Doku_Parser();
209
210  // Add the Handler
211  $Parser->Handler = & new Doku_Handler();
212
213  //add modes to parser
214  foreach($modes as $mode){
215    $Parser->addMode($mode['mode'],$mode['obj']);
216  }
217
218  // Do the parsing
219  trigger_event('PARSER_WIKITEXT_PREPROCESS', $text);
220  $p = $Parser->parse($text);
221//  dbg($p);
222  return $p;
223}
224
225/**
226 * returns the metadata of a page
227 *
228 * @author Esther Brunner <esther@kaffeehaus.ch>
229 */
230function p_get_metadata($id, $key=false, $render=false){
231  global $ID, $INFO, $cache_metadata;
232
233  // cache the current page
234  // Benchmarking shows the current page's metadata is generally the only page metadata
235  // accessed several times. This may catch a few other pages, but that shouldn't be an issue.
236  $cache = ($ID == $id);
237  $meta = p_read_metadata($id, $cache);
238
239  // metadata has never been rendered before - do it! (but not for non-existent pages)
240  if ($render && !$meta['current']['description']['abstract'] && page_exists($id)){
241    $meta = p_render_metadata($id, $meta);
242    io_saveFile(metaFN($id, '.meta'), serialize($meta));
243
244    // sync cached copies, including $INFO metadata
245    if (!empty($cache_metadata[$id])) $cache_metadata[$id] = $meta;
246    if (!empty($INFO) && ($id == $INFO['id'])) { $INFO['meta'] = $meta['current']; }
247  }
248
249  // filter by $key
250  if ($key){
251    list($key, $subkey) = explode(' ', $key, 2);
252    $subkey = trim($subkey);
253
254    if ($subkey) {
255      return isset($meta['current'][$key][$subkey]) ? $meta['current'][$key][$subkey] : null;
256    }
257
258    return isset($meta['current'][$key]) ? $meta['current'][$key] : null;
259  }
260
261  return $meta['current'];
262}
263
264/**
265 * sets metadata elements of a page
266 *
267 * @author Esther Brunner <esther@kaffeehaus.ch>
268 */
269function p_set_metadata($id, $data, $render=false, $persistent=true){
270  if (!is_array($data)) return false;
271
272  global $ID;
273
274  // cache the current page
275  $cache = ($ID == $id);
276  $orig = p_read_metadata($id, $cache);
277
278  // render metadata first?
279  $meta = $render ? p_render_metadata($id, $orig) : $orig;
280
281  // now add the passed metadata
282  $protected = array('description', 'date', 'contributor');
283  foreach ($data as $key => $value){
284
285    // be careful with sub-arrays of $meta['relation']
286    if ($key == 'relation'){
287
288      foreach ($value as $subkey => $subvalue){
289        $meta['current'][$key][$subkey] = !empty($meta['current'][$key][$subkey]) ? array_merge($meta['current'][$key][$subkey], $subvalue) : $subvalue;
290        if ($persistent)
291          $meta['persistent'][$key][$subkey] = !empty($meta['persistent'][$key][$subkey]) ? array_merge($meta['persistent'][$key][$subkey], $subvalue) : $subvalue;
292      }
293
294    // be careful with some senisitive arrays of $meta
295    } elseif (in_array($key, $protected)){
296
297      // these keys, must have subkeys - a legitimate value must be an array
298      if (is_array($value)) {
299        $meta['current'][$key] = !empty($meta['current'][$key]) ? array_merge($meta['current'][$key],$value) : $value;
300
301        if ($persistent) {
302          $meta['persistent'][$key] = !empty($meta['persistent'][$key]) ? array_merge($meta['persistent'][$key],$value) : $value;
303        }
304      }
305
306    // no special treatment for the rest
307    } else {
308      $meta['current'][$key] = $value;
309      if ($persistent) $meta['persistent'][$key] = $value;
310    }
311  }
312
313  // save only if metadata changed
314  if ($meta == $orig) return true;
315
316  // sync cached copies, including $INFO metadata
317  global $cache_metadata, $INFO;
318
319  if (!empty($cache_metadata[$id])) $cache_metadata[$id] = $meta;
320  if (!empty($INFO) && ($id == $INFO['id'])) { $INFO['meta'] = $meta['current']; }
321
322  return io_saveFile(metaFN($id, '.meta'), serialize($meta));
323}
324
325/**
326 * Purges the non-persistant part of the meta data
327 * used on page deletion
328 *
329 * @author Michael Klier <chi@chimeric.de>
330 */
331function p_purge_metadata($id) {
332    $metafn = metaFN('id', '.meta');
333    $meta   = p_read_metadata($id);
334    foreach($meta['current'] as $key => $value) {
335        if(is_array($meta[$key])) {
336            $meta['current'][$key] = array();
337        } else {
338            $meta['current'][$key] = '';
339        }
340    }
341    return io_saveFile(metaFN($id, '.meta'), serialize($meta));
342}
343
344/**
345 * read the metadata from source/cache for $id
346 * (internal use only - called by p_get_metadata & p_set_metadata)
347 *
348 * this function also converts the metadata from the original format to
349 * the current format ('current' & 'persistent' arrays)
350 *
351 * @author   Christopher Smith <chris@jalakai.co.uk>
352 *
353 * @param    string   $id      absolute wiki page id
354 * @param    bool     $cache   whether or not to cache metadata in memory
355 *                             (only use for metadata likely to be accessed several times)
356 *
357 * @return   array             metadata
358 */
359function p_read_metadata($id,$cache=false) {
360  global $cache_metadata;
361
362  if (isset($cache_metadata[$id])) return $cache_metadata[$id];
363
364  $file = metaFN($id, '.meta');
365  $meta = @file_exists($file) ? unserialize(io_readFile($file, false)) : array('current'=>array(),'persistent'=>array());
366
367  // convert $meta from old format to new (current+persistent) format
368  if (!isset($meta['current'])) {
369    $meta = array('current'=>$meta,'persistent'=>$meta);
370
371    // remove non-persistent keys
372    unset($meta['persistent']['title']);
373    unset($meta['persistent']['description']['abstract']);
374    unset($meta['persistent']['description']['tableofcontents']);
375    unset($meta['persistent']['relation']['haspart']);
376    unset($meta['persistent']['relation']['references']);
377    unset($meta['persistent']['date']['valid']);
378
379    if (empty($meta['persistent']['description'])) unset($meta['persistent']['description']);
380    if (empty($meta['persistent']['relation'])) unset($meta['persistent']['relation']);
381    if (empty($meta['persistent']['date'])) unset($meta['persistent']['date']);
382
383    // save converted metadata
384    io_saveFile($file, serialize($meta));
385  }
386
387  if ($cache) {
388    $cache_metadata[$id] = $meta;
389  }
390
391  return $meta;
392}
393
394/**
395 * renders the metadata of a page
396 *
397 * @author Esther Brunner <esther@kaffeehaus.ch>
398 */
399function p_render_metadata($id, $orig){
400  // make sure the correct ID is in global ID
401  global $ID;
402  $keep = $ID;
403  $ID   = $id;
404
405
406  // add an extra key for the event - to tell event handlers the page whose metadata this is
407  $orig['page'] = $id;
408  $evt = new Doku_Event('PARSER_METADATA_RENDER', $orig);
409  if ($evt->advise_before()) {
410
411    require_once DOKU_INC."inc/parser/metadata.php";
412
413    // get instructions
414    $instructions = p_cached_instructions(wikiFN($id),false,$id);
415    if(is_null($instructions)){
416      $ID = $keep;
417      return null; // something went wrong with the instructions
418    }
419
420    // set up the renderer
421    $renderer = & new Doku_Renderer_metadata();
422    $renderer->meta = $orig['current'];
423    $renderer->persistent = $orig['persistent'];
424
425    // loop through the instructions
426    foreach ($instructions as $instruction){
427      // execute the callback against the renderer
428      call_user_func_array(array(&$renderer, $instruction[0]), $instruction[1]);
429    }
430
431    $evt->result = array('current'=>$renderer->meta,'persistent'=>$renderer->persistent);
432  }
433  $evt->advise_after();
434
435  $ID = $keep;
436  return $evt->result;
437}
438
439/**
440 * returns all available parser syntax modes in correct order
441 *
442 * @author Andreas Gohr <andi@splitbrain.org>
443 */
444function p_get_parsermodes(){
445  global $conf;
446
447  //reuse old data
448  static $modes = null;
449  if($modes != null){
450    return $modes;
451  }
452
453  //import parser classes and mode definitions
454  require_once DOKU_INC . 'inc/parser/parser.php';
455
456  // we now collect all syntax modes and their objects, then they will
457  // be sorted and added to the parser in correct order
458  $modes = array();
459
460  // add syntax plugins
461  $pluginlist = plugin_list('syntax');
462  if(count($pluginlist)){
463    global $PARSER_MODES;
464    $obj = null;
465    foreach($pluginlist as $p){
466      if(!$obj =& plugin_load('syntax',$p)) continue; //attempt to load plugin into $obj
467      $PARSER_MODES[$obj->getType()][] = "plugin_$p"; //register mode type
468      //add to modes
469      $modes[] = array(
470                   'sort' => $obj->getSort(),
471                   'mode' => "plugin_$p",
472                   'obj'  => $obj,
473                 );
474      unset($obj); //remove the reference
475    }
476  }
477
478  // add default modes
479  $std_modes = array('listblock','preformatted','notoc','nocache',
480                     'header','table','linebreak','footnote','hr',
481                     'unformatted','php','html','code','file','quote',
482                     'internallink','rss','media','externallink',
483                     'emaillink','windowssharelink','eol');
484  if($conf['typography']){
485    $std_modes[] = 'quotes';
486    $std_modes[] = 'multiplyentity';
487  }
488  foreach($std_modes as $m){
489    $class = "Doku_Parser_Mode_$m";
490    $obj   = new $class();
491    $modes[] = array(
492                 'sort' => $obj->getSort(),
493                 'mode' => $m,
494                 'obj'  => $obj
495               );
496  }
497
498  // add formatting modes
499  $fmt_modes = array('strong','emphasis','underline','monospace',
500                     'subscript','superscript','deleted');
501  foreach($fmt_modes as $m){
502    $obj   = new Doku_Parser_Mode_formatting($m);
503    $modes[] = array(
504                 'sort' => $obj->getSort(),
505                 'mode' => $m,
506                 'obj'  => $obj
507               );
508  }
509
510  // add modes which need files
511  $obj     = new Doku_Parser_Mode_smiley(array_keys(getSmileys()));
512  $modes[] = array('sort' => $obj->getSort(), 'mode' => 'smiley','obj'  => $obj );
513  $obj     = new Doku_Parser_Mode_acronym(array_keys(getAcronyms()));
514  $modes[] = array('sort' => $obj->getSort(), 'mode' => 'acronym','obj'  => $obj );
515  $obj     = new Doku_Parser_Mode_entity(array_keys(getEntities()));
516  $modes[] = array('sort' => $obj->getSort(), 'mode' => 'entity','obj'  => $obj );
517
518
519  // add optional camelcase mode
520  if($conf['camelcase']){
521    $obj     = new Doku_Parser_Mode_camelcaselink();
522    $modes[] = array('sort' => $obj->getSort(), 'mode' => 'camelcaselink','obj'  => $obj );
523  }
524
525  //sort modes
526  usort($modes,'p_sort_modes');
527
528  return $modes;
529}
530
531/**
532 * Callback function for usort
533 *
534 * @author Andreas Gohr <andi@splitbrain.org>
535 */
536function p_sort_modes($a, $b){
537  if($a['sort'] == $b['sort']) return 0;
538  return ($a['sort'] < $b['sort']) ? -1 : 1;
539}
540
541/**
542 * Renders a list of instruction to the specified output mode
543 *
544 * In the $info array are informations from the renderer returned
545 *
546 * @author Harry Fuecks <hfuecks@gmail.com>
547 * @author Andreas Gohr <andi@splitbrain.org>
548 */
549function p_render($mode,$instructions,&$info){
550  if(is_null($instructions)) return '';
551
552  $Renderer =& p_get_renderer($mode);
553  if (is_null($Renderer)) return null;
554
555  $Renderer->reset();
556
557  $Renderer->smileys = getSmileys();
558  $Renderer->entities = getEntities();
559  $Renderer->acronyms = getAcronyms();
560  $Renderer->interwiki = getInterwiki();
561  #$Renderer->badwords = getBadWords();
562
563  // Loop through the instructions
564  foreach ( $instructions as $instruction ) {
565      // Execute the callback against the Renderer
566      call_user_func_array(array(&$Renderer, $instruction[0]),$instruction[1]);
567  }
568
569  //set info array
570  $info = $Renderer->info;
571
572  // Post process and return the output
573  $data = array($mode,& $Renderer->doc);
574  trigger_event('RENDERER_CONTENT_POSTPROCESS',$data);
575  return $Renderer->doc;
576}
577
578function & p_get_renderer($mode) {
579  global $conf;
580
581  $rname = !empty($conf['renderer_'.$mode]) ? $conf['renderer_'.$mode] : $mode;
582
583  // try default renderer first:
584  $file = DOKU_INC."inc/parser/$rname.php";
585  if(@file_exists($file)){
586    require_once $file;
587    $rclass = "Doku_Renderer_$rname";
588
589    if ( !class_exists($rclass) ) {
590      trigger_error("Unable to resolve render class $rclass",E_USER_WARNING);
591      msg("Renderer '$rname' for $mode not valid",-1);
592      return null;
593    }
594    $Renderer = & new $rclass();
595  }else{
596    // Maybe a plugin is available?
597    $Renderer =& plugin_load('renderer',$rname);
598    if(is_null($Renderer)){
599      msg("No renderer '$rname' found for mode '$mode'",-1);
600      return null;
601    }
602  }
603
604  return $Renderer;
605}
606
607/**
608 * Gets the first heading from a file
609 *
610 * @param   string   $id       dokuwiki page id
611 * @param   bool     $render   rerender if first heading not known
612 *                             default: true  -- must be set to false for calls from the metadata renderer to
613 *                                               protects against loops and excessive resource usage when pages
614 *                                               for which only a first heading is required will attempt to
615 *                                               render metadata for all the pages for which they require first
616 *                                               headings ... and so on.
617 *
618 * @author Andreas Gohr <andi@splitbrain.org>
619 */
620function p_get_first_heading($id, $render=true){
621  global $conf;
622  return $conf['useheading'] ? p_get_metadata($id,'title',$render) : null;
623}
624
625/**
626 * Wrapper for GeSHi Code Highlighter, provides caching of its output
627 *
628 * @param  string   $code       source code to be highlighted
629 * @param  string   $language   language to provide highlighting
630 * @param  string   $wrapper    html element to wrap the returned highlighted text
631 *
632 * @author Christopher Smith <chris@jalakai.co.uk>
633 * @author Andreas Gohr <andi@splitbrain.org>
634 */
635function p_xhtml_cached_geshi($code, $language, $wrapper='pre') {
636  global $conf;
637  $language = strtolower($language);
638
639  // remove any leading or trailing blank lines
640  $code = preg_replace('/^\s*?\n|\s*?\n$/','',$code);
641
642  $cache = getCacheName($language.$code,".code");
643  $ctime = @filemtime($cache);
644  if($ctime && !$_REQUEST['purge'] &&
645     $ctime > filemtime(DOKU_INC.'inc/geshi.php') &&
646     $ctime > @filemtime(DOKU_INC.'inc/geshi/'.$language.'.php') &&
647     $ctime > filemtime(DOKU_CONF.'dokuwiki.php')){
648    $highlighted_code = io_readFile($cache, false);
649
650  } else {
651
652    require_once(DOKU_INC . 'inc/geshi.php');
653
654    $geshi = new GeSHi($code, $language, DOKU_INC . 'inc/geshi');
655    $geshi->set_encoding('utf-8');
656    $geshi->enable_classes();
657    $geshi->set_header_type(GESHI_HEADER_PRE);
658    $geshi->set_link_target($conf['target']['extern']);
659
660    // remove GeSHi's wrapper element (we'll replace it with our own later)
661    // we need to use a GeSHi wrapper to avoid <BR> throughout the highlighted text
662    $highlighted_code = preg_replace('!^<pre[^>]*>|</pre>$!','',$geshi->parse_code());
663    io_saveFile($cache,$highlighted_code);
664  }
665
666  // add a wrapper element if required
667  if ($wrapper) {
668    return "<$wrapper class=\"code $language\">$highlighted_code</$wrapper>";
669  } else {
670    return $highlighted_code;
671  }
672}
673
674//Setup VIM: ex: et ts=2 enc=utf-8 :
675