2if (! class_exists('syntax_plugin_code')) {
3	if (! defined('DOKU_PLUGIN')) {
4		if (! defined('DOKU_INC')) {
5			define('DOKU_INC',
6				realpath(dirname(__FILE__) . '/../../') . '/');
7		} // if
8		define('DOKU_PLUGIN', DOKU_INC . 'lib/plugins/');
9	} // if
10	// Include parent class:
11	require_once(DOKU_PLUGIN . 'syntax.php');
12	// We're dealing with "GeSHi" here, hence include it:
13	require_once(DOKU_INC . 'inc/geshi.php');
16 * <tt>syntax_plugin_code.php </tt>- A PHP4 class that implements the
17 * <tt>DokuWiki</tt> plugin for <tt>highlighting</tt> code fragments.
18 *
19 * <p>
20 * Usage:<br>
21 * <tt>&#60;code [language startno |[fh] text |[hs]]&#62;...&#60;/code&#62;</tt>
22 * </p><pre>
23 *	Copyright (C) 2006, 2008  M.Watermann, D-10247 Berlin, FRG
24 *			All rights reserved
25 *		EMail : &lt;support@mwat.de&gt;
26 * </pre><div class="disclaimer">
27 * This program is free software; you can redistribute it and/or modify
28 * it under the terms of the GNU General Public License as published by
29 * the Free Software Foundation; either
30 * <a href="http://www.gnu.org/licenses/gpl.html">version 3</a> of the
31 * License, or (at your option) any later version.<br>
32 * This software is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
35 * General Public License for more details.
36 * </div>
37 * @author <a href="mailto:support@mwat.de">Matthias Watermann</a>
38 * @version <tt>$Id: syntax_plugin_code.php,v 1.29 2008/07/22 09:22:47 matthias Exp $</tt>
39 * @since created 24-Dec-2006
40 */
41class syntax_plugin_code extends DokuWiki_Syntax_Plugin {
43	/**
44	 * @privatesection
45	 */
46	//@{
48	/**
49	 * Additional markup used with older DokuWiki installations.
50	 *
51	 * @private
52	 * @see _fixJS()
53	 */
54	var $_JSmarkup = FALSE;
56	/**
57	 * Indention "text" used by <tt>_addLines()</tt>.
58	 *
59	 * <p>
60	 * Note that we're using raw <em>UTF-8 NonBreakable Spaces</em> here.
61	 * </p>
62	 * @private
63	 * @see _addLines()
64	 */
65	var $_lead = array('', ' ', '  ', '   ', '    ',
66		'     ', '      ', '       ');
69	/**
70	 * Section counter for ODT export
71	 *
72	 * @private
73	 * @see render()
74	 * @since created 08-Jun-2008
75	 */
76	var $_odtSect = 0;
78	/**
79	 * Prepare the markup to render the DIFF text.
80	 *
81	 * @param $aText String The DIFF text to markup.
82	 * @param $aFormat String The DIFF format used ("u", "c", "n|r", "s").
83	 * @param $aDoc String Reference to the current renderer's
84	 * <tt>doc</tt> property.
85	 * @return Boolean <tt>TRUE</tt>.
86	 * @private
87	 * @see render()
88	 */
89	function _addDiff(&$aText, &$aFormat, &$aDoc) {
90		// Since we're inside a PRE block we need the leading LFs:
91		$ADD = "\n" . '<span class="diff_addedline">';
92		$DEL = "\n" . '<span class="diff_deletedline">';
93		$HEAD = "\n" . '<span class="diff_blockheader">';
94		$CLOSE = '</span>';
95		// Common headers for all formats;
96		// the RegEx needs at least ")#" appended!
97		$DiffHead = '#\n((?:diff\s[^\n]*)|(?:Index:\s[^\n]*)|(?:={60,})'
98			. '|(?:RCS file:\s[^\n]*)|(?:retrieving revision [0-9][^\n]*)';
99		switch ($aFormat) {
100			case 'u':	// unified output
101				$aDoc .= preg_replace(
102					array($DiffHead . '|(?:@@[^\n]*))#',
103						'|\n(\+[^\n]*)|',
104						'|\n(\-[^\n]*)|'),
105					array($HEAD . '\1' . $CLOSE,
106						$ADD . '\1' . $CLOSE,
107						$DEL . '\1' . $CLOSE),
108					$aText);
109				return TRUE;
110			case 'c':	// context output
111				$sections = preg_split('|(\n\*{5,})|',
112					preg_replace($DiffHead . ')#',
113						$HEAD . '\1' . $CLOSE,
114						$aText),
116				$sections[0] = preg_replace(
117					array('|\n(\-{3}[^\n]*)|',
118						'|\n(\*{3}[^\n]*)|'),
119					array($ADD . '\1' . $CLOSE,
120						$DEL . '\1' . $CLOSE),
121					$sections[0]);
122				$c = count($sections);
123				for ($i = 1; $c > $i; ++$i) {
124					$hits = array();
125					if (preg_match('|^\n(\*{5,})|',
126						$sections[$i], $hits)) {
127						unset($hits[0]);
128						$sections[$i] = $HEAD . $hits[1] . $CLOSE;
129					} else if (preg_match('|^\n(\x2A{3}\s[^\n]*)(.*)|s',
130						$sections[$i], $hits)) {
131						unset($hits[0]);	// free mem
132						$parts = preg_split('|\n(\-{3}\s[^\n]*)|',
133							$hits[2], -1, PREG_SPLIT_DELIM_CAPTURE);
134						// $parts[0] == OLD code
135						$parts[0] = preg_replace('|\n([!\-][^\n]*)|',
136							$DEL . '\1' . $CLOSE, $parts[0]);
137						// $parts[1] == head of NEW code
138						$parts[1] = $ADD . $parts[1] . $CLOSE;
139						// $parts[2] == NEW code
140						$parts[2] = preg_replace(
141							array('|\n([!\x2B][^\n]*)|',
142								'|\n(\x2A{3}[^\n]*)|'),
143							array($ADD . '\1' . $CLOSE,
144								$DEL . '\1' . $CLOSE),
145							$parts[2]);
146						if (isset($parts[3])) {
147							// TRUE when handling multi-file patches
148							$parts[3] = preg_replace('|^(\x2D{3}[^\n]*)|',
149								$ADD . '\1' . $CLOSE, $parts[3]);
150						} // if
151						$sections[$i] = $DEL . $hits[1] . $CLOSE
152							. implode('', $parts);
153					} // if
154					// ELSE: leave $sections[$i] as is
155				} // for
156				$aDoc .= implode('', $sections);
157				return TRUE;
158			case 'n':	// RCS output
159				// Only added lines are there so we highlight just the
160				// diff indicators while leaving the text alone.
161				$aDoc .= preg_replace(
162					array($DiffHead . ')#',
163						'|\n(d[0-9]+\s+[0-9]+)|',
164						'|\n(a[0-9]+\s+[0-9]+)|'),
165					array($HEAD . '\1' . $CLOSE,
166						$DEL . '\1' . $CLOSE,
167						$ADD . '\1' . $CLOSE),
168					$aText);
169				return TRUE;
170			case 's':	// simple output
171				$aDoc .= preg_replace(
172					array($DiffHead
173						. '|((?:[0-9a-z]+(?:,[0-9a-z]+)*)(?:[^\n]*)))#',
174						'|\n(\x26#60;[^\n]*)|',
175						'|\n(\x26#62;[^\n]*)|'),
176					array($HEAD . '\1' . $CLOSE,
177						$DEL . '\1' . $CLOSE,
178						$ADD . '\1' . $CLOSE),
179					$aText);
180				return TRUE;
181			default:	// unknown diff format
182				$aDoc .= $aText;	// just append any unrecognized text
183				return TRUE;
184		} // switch
185	} // _addDiff()
187	/**
188	 * Add the lines of the given <tt>$aList</tt> to the specified
189	 * <tt>$aDoc</tt> beginning with the given <tt>$aStart</tt> linenumber.
190	 *
191	 * @param $aList Array [IN] the list of lines as prepared by
192	 * <tt>render()</tt>, [OUT] <tt>FALSE</tt>.
193	 * @param $aStart Integer The first linenumber to use.
194	 * @param $aDoc String Reference to the current renderer's
195	 * <tt>doc</tt> property.
196	 * @private
197	 * @see render()
198	 */
199	function _addLines(&$aList, $aStart, &$aDoc) {
200		// Since we're dealing with monospaced fonts here the width of each
201		// character (space, NBSP, digit) is the same. Hence the length of
202		// a digits string gives us its width i.e. the number of digits.
203		$i = $aStart + count($aList);	// greatest line number
204		$g = strlen("$i");		// width of greatest number
205		while (list($i, $l) = each($aList)) {
206			unset($aList[$i]);	// free mem
207			$aDoc .= '<span class="lno">'
208				. $this->_lead[$g - strlen("$aStart")]
209				. "$aStart:</span>" . ((($l) && ('&nbsp;' != $l))
210					? " $l\n"
211					: "\n");
212			++$aStart;	// increment line number
213		} // while
214		$aList = FALSE;	// release memory
215	} // _addLines()
217	/**
218	 * Internal convenience method to replace HTML special characters.
219	 *
220	 * @param $aString String [IN] The text to handle;
221	 * [OUT] the modified text (i.e. the method's result).
222	 * @return String The string with HTML special chars replaced.
223	 * @private
224	 * @since created 05-Feb-2007
225	 */
226	function &_entities(&$aString) {
227		$aString = str_replace(array('&', '<', '>'),
228			array('&#38;', '&#60;', '&#62;'), $aString);
229		return $aString;
230	} // _entities()
232	/**
233	 * Try to fix some markup error of the GeSHi SHELL highlighting.
234	 *
235	 * <p>
236	 * The GeShi highlighting for type "sh" (i.e. "bash") is, well,
237	 * seriously flawed (at least up to version i.e. 2007-07-01).
238	 * Especially handling of comments and embedded string as well as
239	 * keyword is plain wrong.
240	 * </p><p>
241	 * This internal helper method tries to solve some minor problems by
242	 * removing highlight markup embedded in comment markup.
243	 * This is, however, by no means a final resolution: GeSHi obviously
244	 * keeps a kind of internal state resulting in highlighting markup
245	 * spawing (i.e. repeated on) several lines.
246	 * Which - if that state is wrong - causes great demage not by
247	 * corrupting the data but by confusing the reader with wrong markup.
248	 * The easiest way to trigger such a line spawning confusion is to use
249	 * solitary doublequotes or singlequotes (apostrophe) in a comment
250	 * line ...
251	 * </p>
252	 * @param $aMarkup String [IN] The highlight markup as returned by GeSHi;
253	 * [OUT] <tt>FALSE</tt>.
254	 * @param $aDoc String Reference to the current renderer's
255	 * <tt>doc</tt> property.
256	 * @private
257	 * @since created 04-Aug-2007
258	 * @see render()
259	 */
260	function _fixGeSHi_Bash(&$aMarkup, &$aDoc) {
261		$hits = array();
262		if (defined('GESHI_VERSION')
263		&& preg_match('|(\d+)\.(\d+)\.(\d+)\.(\d+)|', GESHI_VERSION, $hits)
264		&& ($hits = sprintf('%02u%02u%02u%03u',
265			$hits[1] * 1, $hits[2] * 1, $hits[3] * 1, $hits[4] * 1))
266		&& ('010007020' < $hits)) {
267			// GeSHi v1.0.7.21 has the comments bug fixed
268			$aDoc .= $aMarkup;
269			$aMarkup = FALSE;	// release memory
270			return;
271		} // if
272		$lines = explode("\n", $aMarkup);
273		$aMarkup = FALSE;	// release memory
274		while (list($i, $l) = each($lines)) {
275			$hits = array();
276			// GeSHi "bash" module marks up comments with CSS class "re3":
277			if (preg_match('|^((.*)<span class="re3">)(.*)$|i', $l, $hits)) {
278				if ('#!/bin/' == substr($hits[3], 0, 7)) {
279					$lines[$i] = $hits[2] . strip_tags($hits[3]);
280				} else {
281					$lines[$i] = $hits[1] . strip_tags($hits[3]) . '</span>';
282				} // if
283			} else if (! preg_match('|^\s*<span|i', $l)) {
284				// If a line doesn't start with a highlighted keyword
285				// all tags are removed since they're most probably
286				// "leftovers" from the GeSHI string/comment bug.
287				$lines[$i] = strip_tags($l);
288			} // if
289		} // while
290		$aDoc .= implode("\n", $lines);
291	} // _fixGeSHi_Bash()
293	/**
294	 * Add markup to load JavaScript file with older DokuWiki versions.
295	 *
296	 * @param $aRenderer Object The renderer used.
297	 * @private
298	 * @since created 19-Feb-2007
299	 * @see render()
300	 */
301	function _fixJS(&$aRenderer) {
302		//XXX This test will break if the DokuWiki file gets renamed:
303		if (@file_exists(DOKU_INC . 'lib/exe/js.php')) {
304			// Assuming a fairly recent DokuWiki installation
305			// handling the plugin files on its own there's
306			// nothing to do here ...
307			return;
308		} // if
309		if ($this->_JSmarkup) {
310			// Markup already added (or not needed)
311			return;
312		} // if
313		$localdir = realpath(dirname(__FILE__)) . '/';
314		$webdir = DOKU_BASE . 'lib/plugins/code/';
315		$css = '';
316		if (file_exists($localdir . 'style.css')) {
317			ob_start();
318			@include($localdir . 'style.css');
319			// Remove whitespace from CSS and expand IMG paths:
320			if ($css = preg_replace(
321				array('|\s*/\x2A.*?\x2A/\s*|s', '|\s*([:;\{\},+!])\s*|',
322					'|(?:url\x28\s*)([^/])|', '|^\s*|', '|\s*$|'),
323				array(' ', '\1', 'url(' . $webdir . '\1'),
324				ob_get_contents())) {
325				$css = '<style type="text/css">' . $css . '</style>';
326			} // if
327			ob_end_clean();
328		} // if
329		$js = (file_exists($localdir . 'script.js'))
330			? '<script type="text/javascript" src="'
331				. $webdir . 'script.js"></script>'
332			: '';
333		if ($this->_JSmarkup = $css . $js) {
334			$aRenderer->doc = $this->_JSmarkup
335				. preg_replace('|\s*<p>\s*</p>\s*|', '', $aRenderer->doc);
336		//ELSE: Neither CSS nor JS files found.
337		} // if
338		// Set member field to skip tests with next call:
339		$this->_JSmarkup = TRUE;
340	} // _fixJS()
342	/**
343	 * RegEx callback to markup spaces in ODT mode.
344	 *
345	 * @param $aList Array A list of RegEx matches.
346	 * @private
347	 * @static
348	 * @since created 07-Jun-2008
349	 * @see render()
350	 */
351	function _preserveSpaces($aList) {
352		return ($len = strlen($aList[1]))
353			? '<text:s text:c="' . $len . '"/>'
354			: ' ';
355	} // _preserveSpaces()
357	/**
358	 * Add the lines of the given <tt>$aText</tt> to the specified
359	 * <tt>$aDoc</tt> beginning with the given <tt>$aStart</tt> linenumber.
360	 *
361	 * @param $aText String [IN] the text lines as prepared by
362	 * <tt>handle()</tt>, [OUT] <tt>FALSE</tt>.
363	 * @param $aStart Integer The first linenumber to use;
364	 * if <tt>0</tt> (zero) no linenumbers are used.
365	 * @param $aDoc String Reference to the current renderer's
366	 * <tt>doc</tt> property.
367	 * @param $aClass String The CSS class name for the <tt>PRE</tt> tag.
368	 * @param $addTags Boolean Used in "ODT" mode to suppress tagging
369	 * the line numbers.
370	 * @private
371	 * @since created 03-Feb-2007
372	 * @see render()
373	 */
374	function _rawMarkup(&$aText, $aStart, &$aDoc, $aClass, $addTags = TRUE) {
375		if ($addTags) {
376			$aDoc .= '<pre class="' . $aClass . '">' . "\n";
377		} // if
378		if ($aStart) {
379			// Split the prepared data into a list of lines:
380			$aText = explode("\n", $aText);
381			// Add the numbered lines to the document:
382			$this->_addLines($aText, $aStart, $aDoc);
383		} else {
384			$aDoc .= $aText;
385		} // if
386		if ($addTags) {
387			$aDoc .= '</pre>';
388		} // if
389		$aText = FALSE;	// release memory
390	} // _rawMarkup()
392	/**
393	 * RegEx callback to replace SPAN tags in ODT mode.
394	 *
395	 * @param $aList Array A list of RegEx matches.
396	 * @private
397	 * @static
398	 * @since created 07-Jun-2008
399	 * @see render()
400	 */
401	function _replaceSpan($aList) {
402		return ($aList[3])
403			? '<text:span text:style-name="Code_5f_'
404				. str_replace('_', '_5f_', $aList[3]) . '">'
405			: '<text:span>';
406	} // _replaceSpan()
408	//@}
409	/**
410	 * @publicsection
411	 */
412	//@{
414	/**
415	 * Tell the parser whether the plugin accepts syntax mode
416	 * <tt>$aMode</tt> within its own markup.
417	 *
418	 * @param $aMode String The requested syntaxmode.
419	 * @return Boolean <tt>FALSE</tt> (no nested markup allowed).
420	 * @public
421	 * @see getAllowedTypes()
422	 */
423	function accepts($aMode) {
424		return FALSE;
425	} // accepts()
427	/**
428	 * Connect lookup pattern to lexer.
429	 *
430	 * @param $aMode String The desired rendermode.
431	 * @public
432	 * @see render()
433	 */
434	function connectTo($aMode) {
435		// look-ahead to minimize the chance of false matches:
436		$this->Lexer->addEntryPattern(
437			'\x3Ccode(?=[^>]*\x3E\r?\n.*\n\x3C\x2Fcode\x3E)',
438			$aMode, 'plugin_code');
439	} // connectTo()
441	/**
442	 * Get an array of mode types that may be nested within the
443	 * plugin's own markup.
444	 *
445	 * @return Array Allowed nested types (none).
446	 * @public
447	 * @see accepts()
448	 * @static
449	 */
450	function getAllowedTypes() {
451		return array();
452	} // getAllowedTypes()
454	/**
455	 * Get an associative array with plugin info.
456	 *
457	 * <p>
458	 * The returned array holds the following fields:
459	 * <dl>
460	 * <dt>author</dt><dd>Author of the plugin</dd>
461	 * <dt>email</dt><dd>Email address to contact the author</dd>
462	 * <dt>date</dt><dd>Last modified date of the plugin in
463	 * <tt>YYYY-MM-DD</tt> format</dd>
464	 * <dt>name</dt><dd>Name of the plugin</dd>
465	 * <dt>desc</dt><dd>Short description of the plugin (Text only)</dd>
466	 * <dt>url</dt><dd>Website with more information on the plugin
467	 * (eg. syntax description)</dd>
468	 * </dl>
469	 * @return Array Information about this plugin class.
470	 * @public
471	 * @static
472	 */
473	function getInfo() {
474		$c = 'code';	// hack to hide "desc" field from GeShi
475		return array(
476			'author' =>	'Matthias Watermann',
477			'email' =>	'support@mwat.de',
478			'date' =>	'2008-07-22',
479			'name' =>	'Code Syntax Plugin',
480			'desc' =>	'Syntax highlighting with line numbering <'
481				. $c . ' lang 1 |[fh] text |[hs]> ... </' . $c . '>',
482			'url' =>	'http://wiki.splitbrain.org/plugin:code2');
483	} // getInfo()
485	/**
486	 * Define how this plugin is handled regarding paragraphs.
487	 *
488	 * <p>
489	 * This method is important for correct XHTML nesting.
490	 * It returns one of the following values:
491	 * </p><dl>
492	 * <dt>normal</dt><dd>The plugin can be used inside paragraphs.</dd>
493	 * <dt>block</dt><dd>Open paragraphs need to be closed before
494	 * plugin output.</dd>
495	 * <dt>stack</dt><dd>Special case: Plugin wraps other paragraphs.</dd>
496	 * </dl>
497	 * @return String <tt>"block"</tt>.
498	 * @public
499	 * @static
500	 */
501	function getPType() {
502		return 'block';
503	} // getPType()
505	/**
506	 * Where to sort in?
507	 *
508	 * @return Integer <tt>194</tt> (below "Doku_Parser_Mode_code").
509	 * @public
510	 * @static
511	 */
512	function getSort() {
513		// class "Doku_Parser_Mode_code" returns 200
514		return 194;
515	} // getSort()
517	/**
518	 * Get the type of syntax this plugin defines.
519	 *
520	 * @return String <tt>"protected"</tt>.
521	 * @public
522	 * @static
523	 */
524	function getType() {
525		return 'protected';
526	} // getType()
528	/**
529	 * Handler to prepare matched data for the rendering process.
530	 *
531	 * <p>
532	 * The <tt>$aState</tt> parameter gives the type of pattern
533	 * which triggered the call to this method:
534	 * </p><dl>
535	 * <dt>DOKU_LEXER_UNMATCHED</dt>
536	 * <dd>ordinary text encountered within the plugin's syntax mode
537	 * which doesn't match any pattern.</dd>
538	 * </dl>
539	 * @param $aMatch String The text matched by the patterns.
540	 * @param $aState Integer The lexer state for the match.
541	 * @param $aPos Integer The character position of the matched text.
542	 * @param $aHandler Object Reference to the Doku_Handler object.
543	 * @return Array Index <tt>[0]</tt> holds the current <tt>$aState</tt>,
544	 * index <tt>[1]</tt> the embedded text to highlight,
545	 * index <tt>[2]</tt> the language/dialect (or <tt>FALSE</tt>),
546	 * index <tt>[3]</tt> the first line number (or <tt>0</tt>),
547	 * index <tt>[4]</tt> the top title (or <tt>FALSE</tt>),
548	 * index <tt>[5]</tt> the bottom title (or <tt>FALSE</tt>),
549	 * index <tt>[6]</tt> hidding CSS flag (or <tt>""</tt>).
550	 * @public
551	 * @see render()
552	 * @static
553	 */
554	function handle($aMatch, $aState, $aPos, &$aHandler) {
555		if (DOKU_LEXER_UNMATCHED != $aState) {
556			return array($aState);	// nothing to do for "render()"
557		} // if
558		$aMatch = explode('>', $aMatch, 2);
559		// $aMatch[0] : lang etc.
560		// $aMatch[1] : text to highlight
561		$n = explode('>', trim($aMatch[1]));
562		$l = 'extern';		// external resource requested?
563		// Check whether there's an external file to fetch:
564		if ($l == $n[0]) {
565			if ($n[1] = trim($n[1])) {
566				if (is_array($n[0] = @parse_url($n[1]))
567				&& ($n[0] = $n[0]['scheme'])) {
568					// Don't accept unsecure schemes like
569					// "file", "javascript", "mailto" etc.
570					switch ($n[0]) {
571						case 'ftp':
572						case 'http':
573						case 'https':
574							//XXX This might fail due to global PHP setup:
575							if ($handle = @fopen($n[1], 'rb')) {
576								$aMatch[1] = '';
577								while (! @feof($handle)) {
578									//XXX This might fail due to
579									// memory constraints:
580									$aMatch[1] .= @fread($handle, 0x8000);
581								} // while
582								@fclose($handle);
583							} else {
584								$aMatch = array($l,
585									'Failed to retrieve: ' . $n[1]);
586							} // if
587							break;
588						default:
589							$aMatch = array($l,
590								'Unsupported URL scheme: ' . $n[0]);
591							break;
592					} // switch
593				} else {
594					$aMatch = array($l, 'Invalid URL: ' . $n[1]);
595				} // if
596			} else {
597				$aMatch = array($l, 'Missing URL: ' . $aMatch[1]);
598			} // if
599		} // if
600		// Strip leading/trailing/EoL whitespace,
601		// replace TABs by four spaces, "&#160;" by NBSP:
602		$aMatch[1] = preg_replace(
603			array('#(?>\r\n)|\r#', '|^\n\n*|',
604				'|[\t ]+\n|', '|\s*\n$|'),
605			array("\n", '', "\n", ''),
606			str_replace('&#160;', '&nbsp;',
607				str_replace("\t", '    ', $aMatch[1])));
609		$css = '';		// default: no initial CSS content hidding
610		$l = FALSE;		// default: no language
611		$n = 0;			// default: no line numbers
612		$ht = $ft = FALSE;	// default: no (head/foot) title
613		$hits = array();	// RegEx matches from the tag attributes
614		/*
615			The free form of the RegEx to parse the arguments here is:
616		/^
617			# "eat" leading whitespace:
618			\s*
619			(?=\S)	# Look ahead: do not match empty lines. This is
620					# needed since all other expressions are optional.
621			# Make sure, nothing is given away once it matched:
622			(?>
623				# We need a separate branch for "diff" because it may be
624				# followed by a _letter_ (not digit) indicating the format.
625				(?>
626					(diff)
627					#	match 1
628					(?>\s+([cnrsu]?))?
629					#	match 2
630				)
631			|
632				# Branch for standard language highlighting
633				(?>
634					# extract language:
635					([a-z][^\x7C\s]*)
636					#	match 3
637					(?>
638						# extract starting line number:
639						\s+(\d\d*)
640						#	match 4
641					)?
642				)
643			|
644				# Branch for line numbering only
645				(\d\d*)
646				#	match 5
647			|
648				\s*		# dummy needed to match "title only" markup (below)
649			)
650			# "eat" anything else up to the text delimiter:
651			[^\x7C]*
652			(?>
653				\x7C
654				# extract the position flag:
655				([bfht])?\s*
656				#	match 6
657				# extract the header,footer line:
658				([^\x7C]+)
659				#	match 7
660				(?>
661					# see whether there is a class flag:
662					\x7C\s*
663					(h|s)?.*
664					#	match 8
665				)?
666			)?
667		# Anchored to make sure everything gets matched:
668		$/xiu
670			Since compiling and applying a free form RegEx slows down the
671			overall matching process I've folded it all to a standard RegEx.
672			Benchmarking during development gave me
673			free form:	20480 loops, 552960 hits, 102400 fails, 12.994689 secs
674			standard:	20480 loops, 552960 hits, 102400 fails, 8.357169 secs
675		*/
676		if (preg_match('/^\s*(?=\S)(?>(?>(diff)(?>\s+([cnrsu]?))?)|'
677			. '(?>([a-z][^\x7C\s]*)(?>\s+(\d\d*))?)|(\d\d*)|\s*)[^\x7C]*'
678			. '(?>\x7C([bfht])?\s*([^\x7C]+)(?>\x7C\s*(h|s)?.*)?)?$/iu',
679		$aMatch[0], $hits)) {
680			unset($hits[0]);	// free mem
681			// $hits[1] = "diff"
682			// $hits[2] = type	(of [1])
683			// $hits[3] = LANG
684			// $hits[4] = NUM	(of [3])
685			// $hits[5] = NUM	(alone)
686			// $hits[6] = Top/Bottom flag	(of [7])
687			// $hits[7] = TITLE
688			// $hits[8] = s/h CSS flag
689			if (isset($hits[3]) && ($hits[3])) {
690				$l = strtolower($hits[3]);
691				if (isset($hits[4]) && ($hits[4])) {
692					$n = (int)$hits[4];
693				} // if
694				$hits[3] = $hits[4] = FALSE;
695			} else if (isset($hits[1]) && ($hits[1])) {
696				$l = strtolower($hits[1]);
697				$hits[2] = (isset($hits[2]))
698					? strtolower($hits[2]) . '?'
699					: '?';
700				$n = $hits[2]{0};
701				$hits[1] = $hits[2] = FALSE;
702			} else if (isset($hits[5]) && ($hits[5])) {
703				$n = (int)$hits[5];
704			} // if
705			if (isset($hits[7]) && ($hits[7])) {
706				$hits[6] = (isset($hits[6]))
707					? strtolower($hits[6]) . 'f'
708					: 'f';
709				switch ($hits[6]{0}) {
710					case 'h':
711					case 't':
712						$ht = trim($hits[7]);
713						break;
714					default:
715						$ft = trim($hits[7]);
716						break;
717				} // switch
718				if (isset($hits[8])) {
719					$hits[8] = strtolower($hits[8]) . 's';
720					if ('h' == $hits[8]{0}) {
721						// This class is handled by JavaScript (there
722						// _must_not_ be any CSS rules for this):
723						$css = ' HideOnInit';
724					} // if
725				} // if
726				$hits[6] = $hits[7] = $hits[8] = FALSE;
727			} // if
728		// ELSE: no arguments given to CODE tag
729		} // if
730		switch ($l) {
731			case 'console':
732				// nothing additional to setup here
733				break;
734			case 'diff':
735				if ("\n" != $aMatch[1]{0}) {
736					// A leading LF is needed to recognize and handle
737					// the very first line with all the REs used.
738					$aMatch[1] = "\n" . $aMatch[1];
739				} // if
740				switch ($n) {
741					case 'u':	// DIFF cmdline switch for "unified"
742					case 'c':	// DIFF cmdline switch for "context"
743					case 'n':	// DIFF cmdline switch for "RCS"
744					case 's':
745						// We believe the format hint ...
746						// (or should we be more suspicious?)
747						break;
748					case 'r':	// Mnemonic for "RCS"
749						$n = 'n';
750						break;
751					default:	// try to figure out the format actually used
752						if (preg_match(
753							'|\n(?:\x2A{5,}\n\x2A{3}\s[1-9]+.*?\x2A{4}\n.+?)+|s',
754							$aMatch[1])) {
755							$n = 'c';
756						} else if (preg_match(
757							'|\n@@\s\-[0-9]+,[0-9]+[ \+,0-9]+?@@\n.+\n|s',
758							$aMatch[1])) {
759							$n = 'u';
760						} else if (preg_match(
761							'|\n[ad][0-9]+\s+[0-9]+\r?\n|', $aMatch[1])) {
762							// We've to check this _before_ "simple" since
763							// the REs are quite similar (but this one is
764							// slightly more specific).
765							$n = 'n';
766						} else if (preg_match(
767							'|\n(?:[0-9a-z]+(?:,[0-9a-z]+)*)(?:[^\n]*\n.*?)+|',
768							$aMatch[1])) {
769							$n = 's';
770						} else {
771							$n = '?';
772						} // if
773						break;
774				} // switch
775				break;
776			case 'htm':		// convenience shortcut
777			case 'html':	// dito
778				$l = 'html4strict';
779				break;
780			case 'js':		// shortcut
781				$l = 'javascript';
782				break;
783			case 'sh':		// shortcut
784				$l = 'bash';
785				break;
786			default:
787				if (! $l) {
788					// no language: simple PRE markup will get generated
789					$l = FALSE;
790				} // if
791				break;
792		} // switch
793		return array(DOKU_LEXER_UNMATCHED,
794			$aMatch[1], $l, $n, $ht, $ft, $css);
795	} // handle()
797	/**
798	 * Add exit pattern to lexer.
799	 *
800	 * @public
801	 */
802	function postConnect() {
803		// look-before to minimize the chance of false matches:
804		$this->Lexer->addExitPattern('(?<=\n)\x3C\x2Fcode\x3E',
805			'plugin_code');
806	} // postConnect()
808	/**
809	 * Handle the actual output (markup) creation.
810	 *
811	 * <p>
812	 * The method checks the given <tt>$aFormat</tt> to decide how to
813	 * handle the specified <tt>$aData</tt>.
814	 * The standard case (i.e. <tt>"xhtml"</tt>) is handled completely
815	 * by this implementation, preparing linenumbers and/or head/foot
816	 * lines are requested.
817	 * For the <tt>"odt"</tt> format all plugin features (incl. linenumbers
818	 * and header/footer lines) are supported by generating the appropriate
819	 * ODT/XML markup.
820	 * All other formats are passed back to the given <tt>$aRenderer</tt>
821	 * instance for further handling.
822	 * </p><p>
823	 * <tt>$aRenderer</tt> contains a reference to the renderer object
824	 * which is currently in charge of the rendering.
825	 * The contents of the given <tt>$aData</tt> is the return value
826	 * of the <tt>handle()</tt> method.
827	 * </p>
828	 * @param $aFormat String The output format to generate.
829	 * @param $aRenderer Object A reference to the renderer object.
830	 * @param $aData Array The data created/returned by the
831	 * <tt>handle()</tt> method.
832	 * @return Boolean <tt>TRUE</tt>.
833	 * @public
834	 * @see handle()
835	 */
836	function render($aFormat, &$aRenderer, &$aData) {
837		if (DOKU_LEXER_UNMATCHED != $aData[0]) {
838			return TRUE;
839		} // if
840		if ('xhtml' == $aFormat) {
841			if ($tdiv = (($aData[4]) || ($aData[5]))) {
842				$this->_fixJS($aRenderer);	// check for old DokuWiki versions
843				$aRenderer->doc .= '<div class="code">';
844				if ($aData[4]) {
845					//XXX Note that "_headerToLink()" is supposed to be a
846					// _private_ method of the renderer class; so this code
847					// will fail once DokuWiki is rewritten in PHP5 which
848					// implements encapsulation of private methods and
849					// properties:
850					$aRenderer->doc .= '<p class="codehead' . $aData[6]
851						. '"><a name="' . $aRenderer->_headerToLink($aData[4])
852						. '">' . $this->_entities($aData[4]) . '</a></p>';
853					$aData[4] = $aData[6] = FALSE;	// free mem
854				} // if
855			} // if
856			if ($aData[2]) {	// lang was given
857				if ('console' == $aData[2]) {
858					$this->_rawMarkup($this->_entities($aData[1]),
859						$aData[3], $aRenderer->doc, $aData[2]);
860				} else if ('diff' == $aData[2]) {
861					$this->_entities($aData[1]);
862					$aRenderer->doc .= '<pre class="code diff">';
863					$this->_addDiff($aData[1], $aData[3], $aRenderer->doc);
864					$aRenderer->doc .= '</pre>';
865				} else {
866					$isSH = ('bash' == $aData[2]);
867					$geshi = new GeSHi($aData[1], $aData[2], GESHI_LANG_ROOT);
868					if ($geshi->error()) {
869						// Language not supported by "GeSHi"
870						$geshi = NULL;	// release memory
871						$this->_rawMarkup($this->_entities($aData[1]),
872							$aData[3], $aRenderer->doc, 'code');
873					} else {
874						$aData[1] = FALSE;	// free mem
875						$geshi->enable_classes();
876						$geshi->set_encoding('utf-8');
877						$geshi->set_header_type(GESHI_HEADER_PRE);
878						$geshi->set_overall_class('code ' . $aData[2]);
879						global $conf;
880						if ($conf['target']['extern']) {
881							$geshi->set_link_target($conf['target']['extern']);
882						} // if
883						if ($aData[3]) {		// line numbers requested
884							// Separate PRE tag from parsed data:
885							$aData[1] = explode('>', $geshi->parse_code(), 2);
886							// [1][0] =	leading "<pre"
887							// [1][1] =	remaining markup up to trailing "</pre"
888							$geshi = NULL;	// release memory
890							// Add the open tag to the document:
891							$aRenderer->doc .= $aData[1][0] . '>';
893							// Separate trailing PRE tag:
894							$aData[1] = explode('</pre>', $aData[1][1], 2);
895							// [1][0] =	GeSHi markup
896							// [1][1] =	trailing "</pre"
898							if ($isSH) {
899								$aData[1][1] = '';
900								$this->_fixGeSHi_Bash($aData[1][0],
901									$aData[1][1]);
902							} else {
903								// Set reference to fixed markup to sync with
904								// the "bash" execution path (above):
905								$aData[1][1] =& $aData[1][0];
906							} // if
908							// Split the parsed data into a list of lines:
909							$aData[2] = explode("\n", $aData[1][1]);
910							$aData[1] = FALSE; // free mem
912							// Add the numbered lines to the document:
913							$this->_addLines($aData[2], $aData[3],
914								$aRenderer->doc);
916							// Close the preformatted section markup:
917							$aRenderer->doc .= '</pre>';
918						} else {				// w/o line numbering
919							if ($isSH) {
920								// Separate trailing PRE tag which
921								// sometimes is "forgotten" by GeSHi:
922								$aData[2] = explode('</pre>',
923									$geshi->parse_code(), 2);
924								// [1][0] =	GeSHi markup
925								// [1][1] =	trailing "</pre" (if any)
926								$this->_fixGeSHi_Bash($aData[2][0],
927									$aRenderer->doc);
928								$aRenderer->doc .= '</pre>';
929							} else {
930								$aRenderer->doc .= $geshi->parse_code();
931							} // if
932							$geshi = NULL;	// release memory
933						} // if
934					} // if
935				} // if
936			} else {
937				$this->_rawMarkup($this->_entities($aData[1]),
938					$aData[3], $aRenderer->doc, 'code');
939			} // if
940			if ($tdiv) {
941				if ($aData[5]) {
942					//XXX See "_headerToLink()" note above.
943					$aRenderer->doc .= '<p class="codefoot'
944						. $aData[6] . '"><a name="'
945						. $aRenderer->_headerToLink($aData[5]) . '">'
946						. $this->_entities($aData[5]) . '</a></p>';
947				} // if
948				$aRenderer->doc .= '</div>';
949			} // if
950		} else if ('odt' == $aFormat) {
951			$inLI = array();
952			if (preg_match('|^<text:p text:style-name="[^"]+">\s*</text:p>\s*(.*)$|si',
953				$aRenderer->doc, $inLI)) {
954				// remove leading whitespace
955				$aRenderer->doc = $inLI[1];
956			} // if
957			// The "renderer_plugin_odt" doesn't clean (close)
958			// its own tags before calling this plugin.
959			// To work around that bug we have to check some
960			// private properties of the renderer instance.
961			$inLI = FALSE;
962			if (is_a($aRenderer, 'renderer_plugin_odt')) {
963				if ($inLI = ($aRenderer->in_list_item)) {
964					// If we're in a list item, we've to close the paragraph:
965					$aRenderer->doc .= '</text:p>';
966				} // if
967				if ($aRenderer->in_paragraph) {
968					$aRenderer->doc .= '</text:p>';
969					$aRenderer->in_paragraph = FALSE;
970				} // if
971			} // if
973			// Init (open) our text section:
974			$aRenderer->doc .= "\n"
975				. '<text:section text:style-name="Code_5f_Section" text:name="CodeSnippet'
976				. ++$this->_odtSect . '">';
978			if ($tdiv = (($aData[4]) || ($aData[5]))) {
979				// Check whether we need a top caption ("header"):
980				if ($aData[4]) {
981					$aRenderer->doc .=
982						'<text:p text:style-name="Code_5f_Title">'
983						. "<text:line-break/>\n"
984						. $aData[4] . "</text:p>\n";
985					$aData[4] = $aData[6] = FALSE;	// free mem
986				} // if
987			} // if
988			// The following code resembles the "xhtml" processing
989			// above except that we're not using "pre" tags here
990			// but ODT/XML markup.
991			$aData[0] = '';		// tmp. container of processed data
992			if ($aData[2]) {	// lang was given
993				if ('console' == $aData[2]) {
994					$this->_rawMarkup($this->_entities($aData[1]),
995						$aData[3], $aData[0], $aData[2], FALSE);
996				} else if ('diff' == $aData[2]) {
997					$this->_addDiff($this->_entities($aData[1]),
998						$aData[3], $aData[0]);
999				} else {
1000					$isSH = ('bash' == $aData[2]);
1001					$geshi = new GeSHi($aData[1], $aData[2], GESHI_LANG_ROOT);
1002					if ($geshi->error()) {
1003						// Language not supported by "GeSHi"
1004						$geshi = NULL;	// release memory
1005						$this->_rawMarkup($this->_entities($aData[1]),
1006							$aData[3], $aData[0], '', FALSE);
1007					} else {
1008						$aData[1] = FALSE;	// free mem
1009						$geshi->enable_classes();
1010						$geshi->set_encoding('utf-8');
1011						$geshi->set_header_type(GESHI_HEADER_PRE);
1012						$geshi->set_overall_class('code ' . $aData[2]);
1013						global $conf;
1014						if ($conf['target']['extern']) {
1015							$geshi->set_link_target($conf['target']['extern']);
1016						} // if
1017						// Separate PRE tag from parsed data:
1018						$aData[1] = explode('>', $geshi->parse_code(), 2);
1019						// [1][0] =	leading "<pre"
1020						// [1][1] =	remaining markup up to trailing "</pre"
1021						$geshi = NULL;	// release memory
1023						// Separate trailing PRE tag:
1024						$aData[1] = explode('</pre>', $aData[1][1], 2);
1025						// [1][0] =	GeSHi markup
1026						// [1][1] =	trailing "</pre"
1027						$aData[1] = $aData[1][0];
1029						if ($isSH) {	// work around GeSHI bug
1030							$aData[2] = '';
1031							$this->_fixGeSHi_Bash($aData[1], $aData[2]);
1032						} else {
1033							$aData[2] = $aData[1];
1034						} // if
1035						$aData[1] = FALSE; // release memory
1037						if ($aData[3]) {		// line numbers requested
1038							// Split the parsed data into a list of lines:
1039							$aData[1] = explode("\n", $aData[2]);
1040							$aData[2] = FALSE; // release memory
1042							// Add the numbered lines to the document:
1043							$this->_addLines($aData[1], $aData[3], $aData[0]);
1044						} else {		// w/o line numbers
1045							$aData[0] = $aData[2];
1046							$aData[2] = FALSE; // release memory
1047						} // if
1048					} // if
1049				} // if
1050			} else {
1051				$this->_rawMarkup($this->_entities($aData[1]),
1052					$aData[3], $aData[0], '', FALSE);
1053			} // if
1055			if ('console' == $aData[2]) {
1056				$aRenderer->doc .=
1057					'<text:p text:style-name="Code_5f_Console">';
1058			} else {
1059				$aRenderer->doc .=
1060					'<text:p text:style-name="Code_5f_Standard">';
1061			} // if
1062			// Replace the HTML "span" tags (for highlighting) by
1063			// the appropriate ODT/XML markup.
1064			// For unknown reasons we need an additional space
1065			// in front of the very first line.
1066			$aData[0] = '<text:s/>'
1067				. preg_replace_callback('|(<span( class="([^"]*)"[^>]*)?>)|',
1068					array('syntax_plugin_code', '_replaceSpan'),
1069					// OOo (v2.3) crashes on "&nbsp;"
1070					str_replace('&nbsp;', chr(194) . chr(160),
1071						str_replace('</span>', '</text:span>',
1072							strip_tags($aData[0], '<span>'))));
1073			// Now append our markup to the renderer's document;
1074			// TABs, LFs and SPACEs are replaced by their respective
1075			// ODT/XML equivalents:
1076			$aRenderer->doc .= preg_replace_callback('|( {2,})|',
1077				array('syntax_plugin_code', '_preserveSpaces'),
1078				str_replace("\n", "<text:line-break/>\n", $aData[0]));
1079			$aData[0] =	FALSE;	// release memory
1081			// Check whether we need a bottom caption ("footer"):
1082			if ($tdiv && ($aData[5])) {
1083				$aRenderer->doc .=
1084					'</text:p><text:p text:style-name="Code_5f_Title">'
1085					. $aData[5];
1086			} // if
1087			// Close all our open tags:
1088			$aRenderer->doc .=  "</text:p></text:section>\n";
1090			if ($inLI) {
1091				// Workaround (see above): (re-)open a paragraph:
1092				$aRenderer->doc .= '<text:p>';
1093			} // if
1094		} else {		// unsupported output format
1095			$aData[0] = $aData[4] = $aData[5] = FALSE;	// avoid recursion
1096			// Pass anything else back to the renderer instance
1097			// (which will - hopefully - know how to handle it):
1098			$aRenderer->code($aData[1], $aData[2]);
1099		} // if
1100		$aData = array(FALSE);	// don't process this text again
1101		return TRUE;
1102	} // render()
1104	//@}
1105} // class syntax_plugin_code
1106} // if