1<?php
2#
3# Markdown Extra  -  A text-to-HTML conversion tool for web writers
4#
5# PHP Markdown & Extra
6# Copyright (c) 2004-2013 Michel Fortin
7# <http://michelf.ca/projects/php-markdown/>
8#
9# Original Markdown
10# Copyright (c) 2004-2006 John Gruber
11# <http://daringfireball.net/projects/markdown/>
12#
13
14
15define( 'MARKDOWN_VERSION',  "1.0.2" ); # 29 Nov 2013
16define( 'MARKDOWNEXTRA_VERSION',  "1.2.8" ); # 29 Nov 2013
17
18
19#
20# Global default settings:
21#
22
23# Change to ">" for HTML output
24@define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
25
26# Define the width of a tab for code blocks.
27@define( 'MARKDOWN_TAB_WIDTH',     4 );
28
29# Optional title attribute for footnote links and backlinks.
30@define( 'MARKDOWN_FN_LINK_TITLE',         "" );
31@define( 'MARKDOWN_FN_BACKLINK_TITLE',     "" );
32
33# Optional class attribute for footnote links and backlinks.
34@define( 'MARKDOWN_FN_LINK_CLASS',         "" );
35@define( 'MARKDOWN_FN_BACKLINK_CLASS',     "" );
36
37# Optional class prefix for fenced code block.
38@define( 'MARKDOWN_CODE_CLASS_PREFIX',     "" );
39
40# Class attribute for code blocks goes on the `code` tag;
41# setting this to true will put attributes on the `pre` tag instead.
42@define( 'MARKDOWN_CODE_ATTR_ON_PRE',   false );
43
44
45#
46# WordPress settings:
47#
48
49# Change to false to remove Markdown from posts and/or comments.
50@define( 'MARKDOWN_WP_POSTS',      true );
51@define( 'MARKDOWN_WP_COMMENTS',   true );
52
53
54
55### Standard Function Interface ###
56
57@define( 'MARKDOWN_PARSER_CLASS',  'MarkdownExtra_Parser' );
58
59function Markdown($text) {
60#
61# Initialize the parser and return the result of its transform method.
62#
63	# Setup static parser variable.
64	static $parser;
65	if (!isset($parser)) {
66		$parser_class = MARKDOWN_PARSER_CLASS;
67		$parser = new $parser_class;
68	}
69
70	# Transform text using parser.
71	return $parser->transform($text);
72}
73
74
75### WordPress Plugin Interface ###
76
77/*
78Plugin Name: Markdown Extra
79Plugin Name: Markdown
80Plugin URI: http://michelf.ca/projects/php-markdown/
81Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.ca/projects/php-markdown/">More...</a>
82Version: 1.2.8
83Author: Michel Fortin
84Author URI: http://michelf.ca/
85*/
86
87if (isset($wp_version)) {
88	# More details about how it works here:
89	# <http://michelf.ca/weblog/2005/wordpress-text-flow-vs-markdown/>
90
91	# Post content and excerpts
92	# - Remove WordPress paragraph generator.
93	# - Run Markdown on excerpt, then remove all tags.
94	# - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
95	if (MARKDOWN_WP_POSTS) {
96		remove_filter('the_content',     'wpautop');
97        remove_filter('the_content_rss', 'wpautop');
98		remove_filter('the_excerpt',     'wpautop');
99		add_filter('the_content',     'mdwp_MarkdownPost', 6);
100        add_filter('the_content_rss', 'mdwp_MarkdownPost', 6);
101		add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6);
102		add_filter('get_the_excerpt', 'trim', 7);
103		add_filter('the_excerpt',     'mdwp_add_p');
104		add_filter('the_excerpt_rss', 'mdwp_strip_p');
105
106		remove_filter('content_save_pre',  'balanceTags', 50);
107		remove_filter('excerpt_save_pre',  'balanceTags', 50);
108		add_filter('the_content',  	  'balanceTags', 50);
109		add_filter('get_the_excerpt', 'balanceTags', 9);
110	}
111
112	# Add a footnote id prefix to posts when inside a loop.
113	function mdwp_MarkdownPost($text) {
114		static $parser;
115		if (!$parser) {
116			$parser_class = MARKDOWN_PARSER_CLASS;
117			$parser = new $parser_class;
118		}
119		if (is_single() || is_page() || is_feed()) {
120			$parser->fn_id_prefix = "";
121		} else {
122			$parser->fn_id_prefix = get_the_ID() . ".";
123		}
124		return $parser->transform($text);
125	}
126
127	# Comments
128	# - Remove WordPress paragraph generator.
129	# - Remove WordPress auto-link generator.
130	# - Scramble important tags before passing them to the kses filter.
131	# - Run Markdown on excerpt then remove paragraph tags.
132	if (MARKDOWN_WP_COMMENTS) {
133		remove_filter('comment_text', 'wpautop', 30);
134		remove_filter('comment_text', 'make_clickable');
135		add_filter('pre_comment_content', 'Markdown', 6);
136		add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
137		add_filter('pre_comment_content', 'mdwp_show_tags', 12);
138		add_filter('get_comment_text',    'Markdown', 6);
139		add_filter('get_comment_excerpt', 'Markdown', 6);
140		add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
141
142		global $mdwp_hidden_tags, $mdwp_placeholders;
143		$mdwp_hidden_tags = explode(' ',
144			'<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
145		$mdwp_placeholders = explode(' ', str_rot13(
146			'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
147			'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
148	}
149
150	function mdwp_add_p($text) {
151		if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
152			$text = '<p>'.$text.'</p>';
153			$text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
154		}
155		return $text;
156	}
157
158	function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
159
160	function mdwp_hide_tags($text) {
161		global $mdwp_hidden_tags, $mdwp_placeholders;
162		return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
163	}
164	function mdwp_show_tags($text) {
165		global $mdwp_hidden_tags, $mdwp_placeholders;
166		return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
167	}
168}
169
170
171### bBlog Plugin Info ###
172
173function identify_modifier_markdown() {
174	return array(
175		'name' => 'markdown',
176		'type' => 'modifier',
177		'nicename' => 'PHP Markdown Extra',
178		'description' => 'A text-to-HTML conversion tool for web writers',
179		'authors' => 'Michel Fortin and John Gruber',
180		'licence' => 'GPL',
181		'version' => MARKDOWNEXTRA_VERSION,
182		'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.ca/projects/php-markdown/">More...</a>',
183		);
184}
185
186
187### Smarty Modifier Interface ###
188
189function smarty_modifier_markdown($text) {
190	return Markdown($text);
191}
192
193
194### Textile Compatibility Mode ###
195
196# Rename this file to "classTextile.php" and it can replace Textile everywhere.
197
198if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
199	# Try to include PHP SmartyPants. Should be in the same directory.
200	@include_once 'smartypants.php';
201	# Fake Textile class. It calls Markdown instead.
202	class Textile {
203		function TextileThis($text, $lite='', $encode='') {
204			if ($lite == '' && $encode == '')    $text = Markdown($text);
205			if (function_exists('SmartyPants'))  $text = SmartyPants($text);
206			return $text;
207		}
208		# Fake restricted version: restrictions are not supported for now.
209		function TextileRestricted($text, $lite='', $noimage='') {
210			return $this->TextileThis($text, $lite);
211		}
212		# Workaround to ensure compatibility with TextPattern 4.0.3.
213		function blockLite($text) { return $text; }
214	}
215}
216
217
218
219#
220# Markdown Parser Class
221#
222
223class Markdown_Parser {
224
225	### Configuration Variables ###
226
227	# Change to ">" for HTML output.
228	var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
229	var $tab_width = MARKDOWN_TAB_WIDTH;
230
231	# Change to `true` to disallow markup or entities.
232	var $no_markup = false;
233	var $no_entities = false;
234
235	# Predefined urls and titles for reference links and images.
236	var $predef_urls = array();
237	var $predef_titles = array();
238
239
240	### Parser Implementation ###
241
242	# Regex to match balanced [brackets].
243	# Needed to insert a maximum bracked depth while converting to PHP.
244	var $nested_brackets_depth = 6;
245	var $nested_brackets_re;
246
247	var $nested_url_parenthesis_depth = 4;
248	var $nested_url_parenthesis_re;
249
250	# Table of hash values for escaped characters:
251	var $escape_chars = '\`*_{}[]()>#+-.!';
252	var $escape_chars_re;
253
254
255	function Markdown_Parser() {
256	#
257	# Constructor function. Initialize appropriate member variables.
258	#
259		$this->_initDetab();
260		$this->prepareItalicsAndBold();
261
262		$this->nested_brackets_re =
263			str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
264			str_repeat('\])*', $this->nested_brackets_depth);
265
266		$this->nested_url_parenthesis_re =
267			str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
268			str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
269
270		$this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
271
272		# Sort document, block, and span gamut in ascendent priority order.
273		asort($this->document_gamut);
274		asort($this->block_gamut);
275		asort($this->span_gamut);
276	}
277
278
279	# Internal hashes used during transformation.
280	var $urls = array();
281	var $titles = array();
282	var $html_hashes = array();
283
284	# Status flag to avoid invalid nesting.
285	var $in_anchor = false;
286
287
288	function setup() {
289	#
290	# Called before the transformation process starts to setup parser
291	# states.
292	#
293		# Clear global hashes.
294		$this->urls = $this->predef_urls;
295		$this->titles = $this->predef_titles;
296		$this->html_hashes = array();
297
298		$this->in_anchor = false;
299	}
300
301	function teardown() {
302	#
303	# Called after the transformation process to clear any variable
304	# which may be taking up memory unnecessarly.
305	#
306		$this->urls = array();
307		$this->titles = array();
308		$this->html_hashes = array();
309	}
310
311
312	function transform($text) {
313	#
314	# Main function. Performs some preprocessing on the input text
315	# and pass it through the document gamut.
316	#
317		$this->setup();
318
319		# Remove UTF-8 BOM and marker character in input, if present.
320		$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
321
322		# Standardize line endings:
323		#   DOS to Unix and Mac to Unix
324		$text = preg_replace('{\r\n?}', "\n", $text);
325
326		# Make sure $text ends with a couple of newlines:
327		$text .= "\n\n";
328
329		# Convert all tabs to spaces.
330		$text = $this->detab($text);
331
332		# Turn block-level HTML blocks into hash entries
333		$text = $this->hashHTMLBlocks($text);
334
335		# Strip any lines consisting only of spaces and tabs.
336		# This makes subsequent regexen easier to write, because we can
337		# match consecutive blank lines with /\n+/ instead of something
338		# contorted like /[ ]*\n+/ .
339		$text = preg_replace('/^[ ]+$/m', '', $text);
340
341		# Run document gamut methods.
342		foreach ($this->document_gamut as $method => $priority) {
343			$text = $this->$method($text);
344		}
345
346		$this->teardown();
347
348		return $text . "\n";
349	}
350
351	var $document_gamut = array(
352		# Strip link definitions, store in hashes.
353		"stripLinkDefinitions" => 20,
354
355		"runBasicBlockGamut"   => 30,
356		);
357
358
359	function stripLinkDefinitions($text) {
360	#
361	# Strips link definitions from text, stores the URLs and titles in
362	# hash references.
363	#
364		$less_than_tab = $this->tab_width - 1;
365
366		# Link defs are in the form: ^[id]: url "optional title"
367		$text = preg_replace_callback('{
368							^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:	# id = $1
369							  [ ]*
370							  \n?				# maybe *one* newline
371							  [ ]*
372							(?:
373							  <(.+?)>			# url = $2
374							|
375							  (\S+?)			# url = $3
376							)
377							  [ ]*
378							  \n?				# maybe one newline
379							  [ ]*
380							(?:
381								(?<=\s)			# lookbehind for whitespace
382								["(]
383								(.*?)			# title = $4
384								[")]
385								[ ]*
386							)?	# title is optional
387							(?:\n+|\Z)
388			}xm',
389			array(&$this, '_stripLinkDefinitions_callback'),
390			$text);
391		return $text;
392	}
393	function _stripLinkDefinitions_callback($matches) {
394		$link_id = strtolower($matches[1]);
395		$url = $matches[2] == '' ? $matches[3] : $matches[2];
396		$this->urls[$link_id] = $url;
397		$this->titles[$link_id] =& $matches[4];
398		return ''; # String that will replace the block
399	}
400
401
402	function hashHTMLBlocks($text) {
403		if ($this->no_markup)  return $text;
404
405		$less_than_tab = $this->tab_width - 1;
406
407		# Hashify HTML blocks:
408		# We only want to do this for block-level HTML tags, such as headers,
409		# lists, and tables. That's because we still want to wrap <p>s around
410		# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
411		# phrase emphasis, and spans. The list of tags we're looking for is
412		# hard-coded:
413		#
414		# *  List "a" is made of tags which can be both inline or block-level.
415		#    These will be treated block-level when the start tag is alone on
416		#    its line, otherwise they're not matched here and will be taken as
417		#    inline later.
418		# *  List "b" is made of tags which are always block-level;
419		#
420		$block_tags_a_re = 'ins|del';
421		$block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
422						   'script|noscript|form|fieldset|iframe|math|svg|'.
423						   'article|section|nav|aside|hgroup|header|footer|'.
424						   'figure';
425
426		# Regular expression for the content of a block tag.
427		$nested_tags_level = 4;
428		$attr = '
429			(?>				# optional tag attributes
430			  \s			# starts with whitespace
431			  (?>
432				[^>"/]+		# text outside quotes
433			  |
434				/+(?!>)		# slash not followed by ">"
435			  |
436				"[^"]*"		# text inside double quotes (tolerate ">")
437			  |
438				\'[^\']*\'	# text inside single quotes (tolerate ">")
439			  )*
440			)?
441			';
442		$content =
443			str_repeat('
444				(?>
445				  [^<]+			# content without tag
446				|
447				  <\2			# nested opening tag
448					'.$attr.'	# attributes
449					(?>
450					  />
451					|
452					  >', $nested_tags_level).	# end of opening tag
453					  '.*?'.					# last level nested tag content
454			str_repeat('
455					  </\2\s*>	# closing nested tag
456					)
457				  |
458					<(?!/\2\s*>	# other tags with a different name
459				  )
460				)*',
461				$nested_tags_level);
462		$content2 = str_replace('\2', '\3', $content);
463
464		# First, look for nested blocks, e.g.:
465		# 	<div>
466		# 		<div>
467		# 		tags for inner block must be indented.
468		# 		</div>
469		# 	</div>
470		#
471		# The outermost tags must start at the left margin for this to match, and
472		# the inner nested divs must be indented.
473		# We need to do this before the next, more liberal match, because the next
474		# match will start at the first `<div>` and stop at the first `</div>`.
475		$text = preg_replace_callback('{(?>
476			(?>
477				(?<=\n\n)		# Starting after a blank line
478				|				# or
479				\A\n?			# the beginning of the doc
480			)
481			(						# save in $1
482
483			  # Match from `\n<tag>` to `</tag>\n`, handling nested tags
484			  # in between.
485
486						[ ]{0,'.$less_than_tab.'}
487						<('.$block_tags_b_re.')# start tag = $2
488						'.$attr.'>			# attributes followed by > and \n
489						'.$content.'		# content, support nesting
490						</\2>				# the matching end tag
491						[ ]*				# trailing spaces/tabs
492						(?=\n+|\Z)	# followed by a newline or end of document
493
494			| # Special version for tags of group a.
495
496						[ ]{0,'.$less_than_tab.'}
497						<('.$block_tags_a_re.')# start tag = $3
498						'.$attr.'>[ ]*\n	# attributes followed by >
499						'.$content2.'		# content, support nesting
500						</\3>				# the matching end tag
501						[ ]*				# trailing spaces/tabs
502						(?=\n+|\Z)	# followed by a newline or end of document
503
504			| # Special case just for <hr />. It was easier to make a special
505			  # case than to make the other regex more complicated.
506
507						[ ]{0,'.$less_than_tab.'}
508						<(hr)				# start tag = $2
509						'.$attr.'			# attributes
510						/?>					# the matching end tag
511						[ ]*
512						(?=\n{2,}|\Z)		# followed by a blank line or end of document
513
514			| # Special case for standalone HTML comments:
515
516					[ ]{0,'.$less_than_tab.'}
517					(?s:
518						<!-- .*? -->
519					)
520					[ ]*
521					(?=\n{2,}|\Z)		# followed by a blank line or end of document
522
523			| # PHP and ASP-style processor instructions (<? and <%)
524
525					[ ]{0,'.$less_than_tab.'}
526					(?s:
527						<([?%])			# $2
528						.*?
529						\2>
530					)
531					[ ]*
532					(?=\n{2,}|\Z)		# followed by a blank line or end of document
533
534			)
535			)}Sxmi',
536			array(&$this, '_hashHTMLBlocks_callback'),
537			$text);
538
539		return $text;
540	}
541	function _hashHTMLBlocks_callback($matches) {
542		$text = $matches[1];
543		$key  = $this->hashBlock($text);
544		return "\n\n$key\n\n";
545	}
546
547
548	function hashPart($text, $boundary = 'X') {
549	#
550	# Called whenever a tag must be hashed when a function insert an atomic
551	# element in the text stream. Passing $text to through this function gives
552	# a unique text-token which will be reverted back when calling unhash.
553	#
554	# The $boundary argument specify what character should be used to surround
555	# the token. By convension, "B" is used for block elements that needs not
556	# to be wrapped into paragraph tags at the end, ":" is used for elements
557	# that are word separators and "X" is used in the general case.
558	#
559		# Swap back any tag hash found in $text so we do not have to `unhash`
560		# multiple times at the end.
561		$text = $this->unhash($text);
562
563		# Then hash the block.
564		static $i = 0;
565		$key = "$boundary\x1A" . ++$i . $boundary;
566		$this->html_hashes[$key] = $text;
567		return $key; # String that will replace the tag.
568	}
569
570
571	function hashBlock($text) {
572	#
573	# Shortcut function for hashPart with block-level boundaries.
574	#
575		return $this->hashPart($text, 'B');
576	}
577
578
579	var $block_gamut = array(
580	#
581	# These are all the transformations that form block-level
582	# tags like paragraphs, headers, and list items.
583	#
584		"doHeaders"         => 10,
585		"doHorizontalRules" => 20,
586
587		"doLists"           => 40,
588		"doCodeBlocks"      => 50,
589		"doBlockQuotes"     => 60,
590		);
591
592	function runBlockGamut($text) {
593	#
594	# Run block gamut tranformations.
595	#
596		# We need to escape raw HTML in Markdown source before doing anything
597		# else. This need to be done for each block, and not only at the
598		# begining in the Markdown function since hashed blocks can be part of
599		# list items and could have been indented. Indented blocks would have
600		# been seen as a code block in a previous pass of hashHTMLBlocks.
601		$text = $this->hashHTMLBlocks($text);
602
603		return $this->runBasicBlockGamut($text);
604	}
605
606	function runBasicBlockGamut($text) {
607	#
608	# Run block gamut tranformations, without hashing HTML blocks. This is
609	# useful when HTML blocks are known to be already hashed, like in the first
610	# whole-document pass.
611	#
612		foreach ($this->block_gamut as $method => $priority) {
613			$text = $this->$method($text);
614		}
615
616		# Finally form paragraph and restore hashed blocks.
617		$text = $this->formParagraphs($text);
618
619		return $text;
620	}
621
622
623	function doHorizontalRules($text) {
624		# Do Horizontal Rules:
625		return preg_replace(
626			'{
627				^[ ]{0,3}	# Leading space
628				([-*_])		# $1: First marker
629				(?>			# Repeated marker group
630					[ ]{0,2}	# Zero, one, or two spaces.
631					\1			# Marker character
632				){2,}		# Group repeated at least twice
633				[ ]*		# Tailing spaces
634				$			# End of line.
635			}mx',
636			"\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
637			$text);
638	}
639
640
641	var $span_gamut = array(
642	#
643	# These are all the transformations that occur *within* block-level
644	# tags like paragraphs, headers, and list items.
645	#
646		# Process character escapes, code spans, and inline HTML
647		# in one shot.
648		"parseSpan"           => -30,
649
650		# Process anchor and image tags. Images must come first,
651		# because ![foo][f] looks like an anchor.
652		"doImages"            =>  10,
653		"doAnchors"           =>  20,
654
655		# Make links out of things like `<http://example.com/>`
656		# Must come after doAnchors, because you can use < and >
657		# delimiters in inline links like [this](<url>).
658		"doAutoLinks"         =>  30,
659		"encodeAmpsAndAngles" =>  40,
660
661		"doItalicsAndBold"    =>  50,
662		"doHardBreaks"        =>  60,
663		);
664
665	function runSpanGamut($text) {
666	#
667	# Run span gamut tranformations.
668	#
669		foreach ($this->span_gamut as $method => $priority) {
670			$text = $this->$method($text);
671		}
672
673		return $text;
674	}
675
676
677	function doHardBreaks($text) {
678		# Do hard breaks:
679		return preg_replace_callback('/ {2,}\n/',
680			array(&$this, '_doHardBreaks_callback'), $text);
681	}
682	function _doHardBreaks_callback($matches) {
683		return $this->hashPart("<br$this->empty_element_suffix\n");
684	}
685
686
687	function doAnchors($text) {
688	#
689	# Turn Markdown link shortcuts into XHTML <a> tags.
690	#
691		if ($this->in_anchor) return $text;
692		$this->in_anchor = true;
693
694		#
695		# First, handle reference-style links: [link text] [id]
696		#
697		$text = preg_replace_callback('{
698			(					# wrap whole match in $1
699			  \[
700				('.$this->nested_brackets_re.')	# link text = $2
701			  \]
702
703			  [ ]?				# one optional space
704			  (?:\n[ ]*)?		# one optional newline followed by spaces
705
706			  \[
707				(.*?)		# id = $3
708			  \]
709			)
710			}xs',
711			array(&$this, '_doAnchors_reference_callback'), $text);
712
713		#
714		# Next, inline-style links: [link text](url "optional title")
715		#
716		$text = preg_replace_callback('{
717			(				# wrap whole match in $1
718			  \[
719				('.$this->nested_brackets_re.')	# link text = $2
720			  \]
721			  \(			# literal paren
722				[ \n]*
723				(?:
724					<(.+?)>	# href = $3
725				|
726					('.$this->nested_url_parenthesis_re.')	# href = $4
727				)
728				[ \n]*
729				(			# $5
730				  ([\'"])	# quote char = $6
731				  (.*?)		# Title = $7
732				  \6		# matching quote
733				  [ \n]*	# ignore any spaces/tabs between closing quote and )
734				)?			# title is optional
735			  \)
736			)
737			}xs',
738			array(&$this, '_doAnchors_inline_callback'), $text);
739
740		#
741		# Last, handle reference-style shortcuts: [link text]
742		# These must come last in case you've also got [link text][1]
743		# or [link text](/foo)
744		#
745		$text = preg_replace_callback('{
746			(					# wrap whole match in $1
747			  \[
748				([^\[\]]+)		# link text = $2; can\'t contain [ or ]
749			  \]
750			)
751			}xs',
752			array(&$this, '_doAnchors_reference_callback'), $text);
753
754		$this->in_anchor = false;
755		return $text;
756	}
757	function _doAnchors_reference_callback($matches) {
758		$whole_match =  $matches[1];
759		$link_text   =  $matches[2];
760		$link_id     =& $matches[3];
761
762		if ($link_id == "") {
763			# for shortcut links like [this][] or [this].
764			$link_id = $link_text;
765		}
766
767		# lower-case and turn embedded newlines into spaces
768		$link_id = strtolower($link_id);
769		$link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
770
771		if (isset($this->urls[$link_id])) {
772			$url = $this->urls[$link_id];
773			$url = $this->encodeAttribute($url);
774
775			$result = "<a href=\"$url\"";
776			if ( isset( $this->titles[$link_id] ) ) {
777				$title = $this->titles[$link_id];
778				$title = $this->encodeAttribute($title);
779				$result .=  " title=\"$title\"";
780			}
781
782			$link_text = $this->runSpanGamut($link_text);
783			$result .= ">$link_text</a>";
784			$result = $this->hashPart($result);
785		}
786		else {
787			$result = $whole_match;
788		}
789		return $result;
790	}
791	function _doAnchors_inline_callback($matches) {
792		$whole_match	=  $matches[1];
793		$link_text		=  $this->runSpanGamut($matches[2]);
794		$url			=  $matches[3] == '' ? $matches[4] : $matches[3];
795		$title			=& $matches[7];
796
797		$url = $this->encodeAttribute($url);
798
799		$result = "<a href=\"$url\"";
800		if (isset($title)) {
801			$title = $this->encodeAttribute($title);
802			$result .=  " title=\"$title\"";
803		}
804
805		$link_text = $this->runSpanGamut($link_text);
806		$result .= ">$link_text</a>";
807
808		return $this->hashPart($result);
809	}
810
811
812	function doImages($text) {
813	#
814	# Turn Markdown image shortcuts into <img> tags.
815	#
816		#
817		# First, handle reference-style labeled images: ![alt text][id]
818		#
819		$text = preg_replace_callback('{
820			(				# wrap whole match in $1
821			  !\[
822				('.$this->nested_brackets_re.')		# alt text = $2
823			  \]
824
825			  [ ]?				# one optional space
826			  (?:\n[ ]*)?		# one optional newline followed by spaces
827
828			  \[
829				(.*?)		# id = $3
830			  \]
831
832			)
833			}xs',
834			array(&$this, '_doImages_reference_callback'), $text);
835
836		#
837		# Next, handle inline images:  ![alt text](url "optional title")
838		# Don't forget: encode * and _
839		#
840		$text = preg_replace_callback('{
841			(				# wrap whole match in $1
842			  !\[
843				('.$this->nested_brackets_re.')		# alt text = $2
844			  \]
845			  \s?			# One optional whitespace character
846			  \(			# literal paren
847				[ \n]*
848				(?:
849					<(\S*)>	# src url = $3
850				|
851					('.$this->nested_url_parenthesis_re.')	# src url = $4
852				)
853				[ \n]*
854				(			# $5
855				  ([\'"])	# quote char = $6
856				  (.*?)		# title = $7
857				  \6		# matching quote
858				  [ \n]*
859				)?			# title is optional
860			  \)
861			)
862			}xs',
863			array(&$this, '_doImages_inline_callback'), $text);
864
865		return $text;
866	}
867	function _doImages_reference_callback($matches) {
868		$whole_match = $matches[1];
869		$alt_text    = $matches[2];
870		$link_id     = strtolower($matches[3]);
871
872		if ($link_id == "") {
873			$link_id = strtolower($alt_text); # for shortcut links like ![this][].
874		}
875
876		$alt_text = $this->encodeAttribute($alt_text);
877		if (isset($this->urls[$link_id])) {
878			$url = $this->encodeAttribute($this->urls[$link_id]);
879			$result = "<img src=\"$url\" alt=\"$alt_text\"";
880			if (isset($this->titles[$link_id])) {
881				$title = $this->titles[$link_id];
882				$title = $this->encodeAttribute($title);
883				$result .=  " title=\"$title\"";
884			}
885			$result .= $this->empty_element_suffix;
886			$result = $this->hashPart($result);
887		}
888		else {
889			# If there's no such link ID, leave intact:
890			$result = $whole_match;
891		}
892
893		return $result;
894	}
895	function _doImages_inline_callback($matches) {
896		$whole_match	= $matches[1];
897		$alt_text		= $matches[2];
898		$url			= $matches[3] == '' ? $matches[4] : $matches[3];
899		$title			=& $matches[7];
900
901		$alt_text = $this->encodeAttribute($alt_text);
902		$url = $this->encodeAttribute($url);
903		$result = "<img src=\"$url\" alt=\"$alt_text\"";
904		if (isset($title)) {
905			$title = $this->encodeAttribute($title);
906			$result .=  " title=\"$title\""; # $title already quoted
907		}
908		$result .= $this->empty_element_suffix;
909
910		return $this->hashPart($result);
911	}
912
913
914	function doHeaders($text) {
915		# Setext-style headers:
916		#	  Header 1
917		#	  ========
918		#
919		#	  Header 2
920		#	  --------
921		#
922		$text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
923			array(&$this, '_doHeaders_callback_setext'), $text);
924
925		# atx-style headers:
926		#	# Header 1
927		#	## Header 2
928		#	## Header 2 with closing hashes ##
929		#	...
930		#	###### Header 6
931		#
932		$text = preg_replace_callback('{
933				^(\#{1,6})	# $1 = string of #\'s
934				[ ]*
935				(.+?)		# $2 = Header text
936				[ ]*
937				\#*			# optional closing #\'s (not counted)
938				\n+
939			}xm',
940			array(&$this, '_doHeaders_callback_atx'), $text);
941
942		return $text;
943	}
944	function _doHeaders_callback_setext($matches) {
945		# Terrible hack to check we haven't found an empty list item.
946		if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
947			return $matches[0];
948
949		$level = $matches[2]{0} == '=' ? 1 : 2;
950		$block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
951		return "\n" . $this->hashBlock($block) . "\n\n";
952	}
953	function _doHeaders_callback_atx($matches) {
954		$level = strlen($matches[1]);
955		$block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
956		return "\n" . $this->hashBlock($block) . "\n\n";
957	}
958
959
960	function doLists($text) {
961	#
962	# Form HTML ordered (numbered) and unordered (bulleted) lists.
963	#
964		$less_than_tab = $this->tab_width - 1;
965
966		# Re-usable patterns to match list item bullets and number markers:
967		$marker_ul_re  = '[*+-]';
968		$marker_ol_re  = '\d+[\.]';
969		$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
970
971		$markers_relist = array(
972			$marker_ul_re => $marker_ol_re,
973			$marker_ol_re => $marker_ul_re,
974			);
975
976		foreach ($markers_relist as $marker_re => $other_marker_re) {
977			# Re-usable pattern to match any entirel ul or ol list:
978			$whole_list_re = '
979				(								# $1 = whole list
980				  (								# $2
981					([ ]{0,'.$less_than_tab.'})	# $3 = number of spaces
982					('.$marker_re.')			# $4 = first list item marker
983					[ ]+
984				  )
985				  (?s:.+?)
986				  (								# $5
987					  \z
988					|
989					  \n{2,}
990					  (?=\S)
991					  (?!						# Negative lookahead for another list item marker
992						[ ]*
993						'.$marker_re.'[ ]+
994					  )
995					|
996					  (?=						# Lookahead for another kind of list
997					    \n
998						\3						# Must have the same indentation
999						'.$other_marker_re.'[ ]+
1000					  )
1001				  )
1002				)
1003			'; // mx
1004
1005			# We use a different prefix before nested lists than top-level lists.
1006			# See extended comment in _ProcessListItems().
1007
1008			if ($this->list_level) {
1009				$text = preg_replace_callback('{
1010						^
1011						'.$whole_list_re.'
1012					}mx',
1013					array(&$this, '_doLists_callback'), $text);
1014			}
1015			else {
1016				$text = preg_replace_callback('{
1017						(?:(?<=\n)\n|\A\n?) # Must eat the newline
1018						'.$whole_list_re.'
1019					}mx',
1020					array(&$this, '_doLists_callback'), $text);
1021			}
1022		}
1023
1024		return $text;
1025	}
1026	function _doLists_callback($matches) {
1027		# Re-usable patterns to match list item bullets and number markers:
1028		$marker_ul_re  = '[*+-]';
1029		$marker_ol_re  = '\d+[\.]';
1030		$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1031
1032		$list = $matches[1];
1033		$list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1034
1035		$marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1036
1037		$list .= "\n";
1038		$result = $this->processListItems($list, $marker_any_re);
1039
1040		$result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1041		return "\n". $result ."\n\n";
1042	}
1043
1044	var $list_level = 0;
1045
1046	function processListItems($list_str, $marker_any_re) {
1047	#
1048	#	Process the contents of a single ordered or unordered list, splitting it
1049	#	into individual list items.
1050	#
1051		# The $this->list_level global keeps track of when we're inside a list.
1052		# Each time we enter a list, we increment it; when we leave a list,
1053		# we decrement. If it's zero, we're not in a list anymore.
1054		#
1055		# We do this because when we're not inside a list, we want to treat
1056		# something like this:
1057		#
1058		#		I recommend upgrading to version
1059		#		8. Oops, now this line is treated
1060		#		as a sub-list.
1061		#
1062		# As a single paragraph, despite the fact that the second line starts
1063		# with a digit-period-space sequence.
1064		#
1065		# Whereas when we're inside a list (or sub-list), that line will be
1066		# treated as the start of a sub-list. What a kludge, huh? This is
1067		# an aspect of Markdown's syntax that's hard to parse perfectly
1068		# without resorting to mind-reading. Perhaps the solution is to
1069		# change the syntax rules such that sub-lists must start with a
1070		# starting cardinal number; e.g. "1." or "a.".
1071
1072		$this->list_level++;
1073
1074		# trim trailing blank lines:
1075		$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1076
1077		$list_str = preg_replace_callback('{
1078			(\n)?							# leading line = $1
1079			(^[ ]*)							# leading whitespace = $2
1080			('.$marker_any_re.'				# list marker and space = $3
1081				(?:[ ]+|(?=\n))	# space only required if item is not empty
1082			)
1083			((?s:.*?))						# list item text   = $4
1084			(?:(\n+(?=\n))|\n)				# tailing blank line = $5
1085			(?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1086			}xm',
1087			array(&$this, '_processListItems_callback'), $list_str);
1088
1089		$this->list_level--;
1090		return $list_str;
1091	}
1092	function _processListItems_callback($matches) {
1093		$item = $matches[4];
1094		$leading_line =& $matches[1];
1095		$leading_space =& $matches[2];
1096		$marker_space = $matches[3];
1097		$tailing_blank_line =& $matches[5];
1098
1099		if ($leading_line || $tailing_blank_line ||
1100			preg_match('/\n{2,}/', $item))
1101		{
1102			# Replace marker with the appropriate whitespace indentation
1103			$item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1104			$item = $this->runBlockGamut($this->outdent($item)."\n");
1105		}
1106		else {
1107			# Recursion for sub-lists:
1108			$item = $this->doLists($this->outdent($item));
1109			$item = preg_replace('/\n+$/', '', $item);
1110			$item = $this->runSpanGamut($item);
1111		}
1112
1113		return "<li>" . $item . "</li>\n";
1114	}
1115
1116
1117	function doCodeBlocks($text) {
1118	#
1119	#	Process Markdown `<pre><code>` blocks.
1120	#
1121		$text = preg_replace_callback('{
1122				(?:\n\n|\A\n?)
1123				(	            # $1 = the code block -- one or more lines, starting with a space/tab
1124				  (?>
1125					[ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1126					.*\n+
1127				  )+
1128				)
1129				((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
1130			}xm',
1131			array(&$this, '_doCodeBlocks_callback'), $text);
1132
1133		return $text;
1134	}
1135	function _doCodeBlocks_callback($matches) {
1136		$codeblock = $matches[1];
1137
1138		$codeblock = $this->outdent($codeblock);
1139		$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1140
1141		# trim leading newlines and trailing newlines
1142		$codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1143
1144		$codeblock = "<pre><code>$codeblock\n</code></pre>";
1145		return "\n\n".$this->hashBlock($codeblock)."\n\n";
1146	}
1147
1148
1149	function makeCodeSpan($code) {
1150	#
1151	# Create a code span markup for $code. Called from handleSpanToken.
1152	#
1153		$code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1154		return $this->hashPart("<code>$code</code>");
1155	}
1156
1157
1158	var $em_relist = array(
1159		''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![\.,:;]\s)',
1160		'*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
1161		'_' => '(?<=\S|^)(?<!_)_(?!_)',
1162		);
1163	var $strong_relist = array(
1164		''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![\.,:;]\s)',
1165		'**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
1166		'__' => '(?<=\S|^)(?<!_)__(?!_)',
1167		);
1168	var $em_strong_relist = array(
1169		''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![\.,:;]\s)',
1170		'***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
1171		'___' => '(?<=\S|^)(?<!_)___(?!_)',
1172		);
1173	var $em_strong_prepared_relist;
1174
1175	function prepareItalicsAndBold() {
1176	#
1177	# Prepare regular expressions for searching emphasis tokens in any
1178	# context.
1179	#
1180		foreach ($this->em_relist as $em => $em_re) {
1181			foreach ($this->strong_relist as $strong => $strong_re) {
1182				# Construct list of allowed token expressions.
1183				$token_relist = array();
1184				if (isset($this->em_strong_relist["$em$strong"])) {
1185					$token_relist[] = $this->em_strong_relist["$em$strong"];
1186				}
1187				$token_relist[] = $em_re;
1188				$token_relist[] = $strong_re;
1189
1190				# Construct master expression from list.
1191				$token_re = '{('. implode('|', $token_relist) .')}';
1192				$this->em_strong_prepared_relist["$em$strong"] = $token_re;
1193			}
1194		}
1195	}
1196
1197	function doItalicsAndBold($text) {
1198		$token_stack = array('');
1199		$text_stack = array('');
1200		$em = '';
1201		$strong = '';
1202		$tree_char_em = false;
1203
1204		while (1) {
1205			#
1206			# Get prepared regular expression for seraching emphasis tokens
1207			# in current context.
1208			#
1209			$token_re = $this->em_strong_prepared_relist["$em$strong"];
1210
1211			#
1212			# Each loop iteration search for the next emphasis token.
1213			# Each token is then passed to handleSpanToken.
1214			#
1215			$parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1216			$text_stack[0] .= $parts[0];
1217			$token =& $parts[1];
1218			$text =& $parts[2];
1219
1220			if (empty($token)) {
1221				# Reached end of text span: empty stack without emitting.
1222				# any more emphasis.
1223				while ($token_stack[0]) {
1224					$text_stack[1] .= array_shift($token_stack);
1225					$text_stack[0] .= array_shift($text_stack);
1226				}
1227				break;
1228			}
1229
1230			$token_len = strlen($token);
1231			if ($tree_char_em) {
1232				# Reached closing marker while inside a three-char emphasis.
1233				if ($token_len == 3) {
1234					# Three-char closing marker, close em and strong.
1235					array_shift($token_stack);
1236					$span = array_shift($text_stack);
1237					$span = $this->runSpanGamut($span);
1238					$span = "<strong><em>$span</em></strong>";
1239					$text_stack[0] .= $this->hashPart($span);
1240					$em = '';
1241					$strong = '';
1242				} else {
1243					# Other closing marker: close one em or strong and
1244					# change current token state to match the other
1245					$token_stack[0] = str_repeat($token{0}, 3-$token_len);
1246					$tag = $token_len == 2 ? "strong" : "em";
1247					$span = $text_stack[0];
1248					$span = $this->runSpanGamut($span);
1249					$span = "<$tag>$span</$tag>";
1250					$text_stack[0] = $this->hashPart($span);
1251					$$tag = ''; # $$tag stands for $em or $strong
1252				}
1253				$tree_char_em = false;
1254			} else if ($token_len == 3) {
1255				if ($em) {
1256					# Reached closing marker for both em and strong.
1257					# Closing strong marker:
1258					for ($i = 0; $i < 2; ++$i) {
1259						$shifted_token = array_shift($token_stack);
1260						$tag = strlen($shifted_token) == 2 ? "strong" : "em";
1261						$span = array_shift($text_stack);
1262						$span = $this->runSpanGamut($span);
1263						$span = "<$tag>$span</$tag>";
1264						$text_stack[0] .= $this->hashPart($span);
1265						$$tag = ''; # $$tag stands for $em or $strong
1266					}
1267				} else {
1268					# Reached opening three-char emphasis marker. Push on token
1269					# stack; will be handled by the special condition above.
1270					$em = $token{0};
1271					$strong = "$em$em";
1272					array_unshift($token_stack, $token);
1273					array_unshift($text_stack, '');
1274					$tree_char_em = true;
1275				}
1276			} else if ($token_len == 2) {
1277				if ($strong) {
1278					# Unwind any dangling emphasis marker:
1279					if (strlen($token_stack[0]) == 1) {
1280						$text_stack[1] .= array_shift($token_stack);
1281						$text_stack[0] .= array_shift($text_stack);
1282					}
1283					# Closing strong marker:
1284					array_shift($token_stack);
1285					$span = array_shift($text_stack);
1286					$span = $this->runSpanGamut($span);
1287					$span = "<strong>$span</strong>";
1288					$text_stack[0] .= $this->hashPart($span);
1289					$strong = '';
1290				} else {
1291					array_unshift($token_stack, $token);
1292					array_unshift($text_stack, '');
1293					$strong = $token;
1294				}
1295			} else {
1296				# Here $token_len == 1
1297				if ($em) {
1298					if (strlen($token_stack[0]) == 1) {
1299						# Closing emphasis marker:
1300						array_shift($token_stack);
1301						$span = array_shift($text_stack);
1302						$span = $this->runSpanGamut($span);
1303						$span = "<em>$span</em>";
1304						$text_stack[0] .= $this->hashPart($span);
1305						$em = '';
1306					} else {
1307						$text_stack[0] .= $token;
1308					}
1309				} else {
1310					array_unshift($token_stack, $token);
1311					array_unshift($text_stack, '');
1312					$em = $token;
1313				}
1314			}
1315		}
1316		return $text_stack[0];
1317	}
1318
1319
1320	function doBlockQuotes($text) {
1321		$text = preg_replace_callback('/
1322			  (								# Wrap whole match in $1
1323				(?>
1324				  ^[ ]*>[ ]?			# ">" at the start of a line
1325					.+\n					# rest of the first line
1326				  (.+\n)*					# subsequent consecutive lines
1327				  \n*						# blanks
1328				)+
1329			  )
1330			/xm',
1331			array(&$this, '_doBlockQuotes_callback'), $text);
1332
1333		return $text;
1334	}
1335	function _doBlockQuotes_callback($matches) {
1336		$bq = $matches[1];
1337		# trim one level of quoting - trim whitespace-only lines
1338		$bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1339		$bq = $this->runBlockGamut($bq);		# recurse
1340
1341		$bq = preg_replace('/^/m', "  ", $bq);
1342		# These leading spaces cause problem with <pre> content,
1343		# so we need to fix that:
1344		$bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1345			array(&$this, '_doBlockQuotes_callback2'), $bq);
1346
1347		return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1348	}
1349	function _doBlockQuotes_callback2($matches) {
1350		$pre = $matches[1];
1351		$pre = preg_replace('/^  /m', '', $pre);
1352		return $pre;
1353	}
1354
1355
1356	function formParagraphs($text) {
1357	#
1358	#	Params:
1359	#		$text - string to process with html <p> tags
1360	#
1361		# Strip leading and trailing lines:
1362		$text = preg_replace('/\A\n+|\n+\z/', '', $text);
1363
1364		$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1365
1366		#
1367		# Wrap <p> tags and unhashify HTML blocks
1368		#
1369		foreach ($grafs as $key => $value) {
1370			if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1371				# Is a paragraph.
1372				$value = $this->runSpanGamut($value);
1373				$value = preg_replace('/^([ ]*)/', "<p>", $value);
1374				$value .= "</p>";
1375				$grafs[$key] = $this->unhash($value);
1376			}
1377			else {
1378				# Is a block.
1379				# Modify elements of @grafs in-place...
1380				$graf = $value;
1381				$block = $this->html_hashes[$graf];
1382				$graf = $block;
1383//				if (preg_match('{
1384//					\A
1385//					(							# $1 = <div> tag
1386//					  <div  \s+
1387//					  [^>]*
1388//					  \b
1389//					  markdown\s*=\s*  ([\'"])	#	$2 = attr quote char
1390//					  1
1391//					  \2
1392//					  [^>]*
1393//					  >
1394//					)
1395//					(							# $3 = contents
1396//					.*
1397//					)
1398//					(</div>)					# $4 = closing tag
1399//					\z
1400//					}xs', $block, $matches))
1401//				{
1402//					list(, $div_open, , $div_content, $div_close) = $matches;
1403//
1404//					# We can't call Markdown(), because that resets the hash;
1405//					# that initialization code should be pulled into its own sub, though.
1406//					$div_content = $this->hashHTMLBlocks($div_content);
1407//
1408//					# Run document gamut methods on the content.
1409//					foreach ($this->document_gamut as $method => $priority) {
1410//						$div_content = $this->$method($div_content);
1411//					}
1412//
1413//					$div_open = preg_replace(
1414//						'{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1415//
1416//					$graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1417//				}
1418				$grafs[$key] = $graf;
1419			}
1420		}
1421
1422		return implode("\n\n", $grafs);
1423	}
1424
1425
1426	function encodeAttribute($text) {
1427	#
1428	# Encode text for a double-quoted HTML attribute. This function
1429	# is *not* suitable for attributes enclosed in single quotes.
1430	#
1431		$text = $this->encodeAmpsAndAngles($text);
1432		$text = str_replace('"', '&quot;', $text);
1433		return $text;
1434	}
1435
1436
1437	function encodeAmpsAndAngles($text) {
1438	#
1439	# Smart processing for ampersands and angle brackets that need to
1440	# be encoded. Valid character entities are left alone unless the
1441	# no-entities mode is set.
1442	#
1443		if ($this->no_entities) {
1444			$text = str_replace('&', '&amp;', $text);
1445		} else {
1446			# Ampersand-encoding based entirely on Nat Irons's Amputator
1447			# MT plugin: <http://bumppo.net/projects/amputator/>
1448			$text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1449								'&amp;', $text);;
1450		}
1451		# Encode remaining <'s
1452		$text = str_replace('<', '&lt;', $text);
1453
1454		return $text;
1455	}
1456
1457
1458	function doAutoLinks($text) {
1459		$text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
1460			array(&$this, '_doAutoLinks_url_callback'), $text);
1461
1462		# Email addresses: <address@domain.foo>
1463		$text = preg_replace_callback('{
1464			<
1465			(?:mailto:)?
1466			(
1467				(?:
1468					[-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1469				|
1470					".*?"
1471				)
1472				\@
1473				(?:
1474					[-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1475				|
1476					\[[\d.a-fA-F:]+\]	# IPv4 & IPv6
1477				)
1478			)
1479			>
1480			}xi',
1481			array(&$this, '_doAutoLinks_email_callback'), $text);
1482		$text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i',array(&$this, '_doAutoLinks_tel_callback'), $text);
1483
1484		return $text;
1485	}
1486	function _doAutoLinks_tel_callback($matches) {
1487		$url = $this->encodeAttribute($matches[1]);
1488		$tel = $this->encodeAttribute($matches[2]);
1489		$link = "<a href=\"$url\">$tel</a>";
1490		return $this->hashPart($link);
1491	}
1492	function _doAutoLinks_url_callback($matches) {
1493		$url = $this->encodeAttribute($matches[1]);
1494		$link = "<a href=\"$url\">$url</a>";
1495		return $this->hashPart($link);
1496	}
1497	function _doAutoLinks_email_callback($matches) {
1498		$address = $matches[1];
1499		$link = $this->encodeEmailAddress($address);
1500		return $this->hashPart($link);
1501	}
1502
1503
1504	function encodeEmailAddress($addr) {
1505	#
1506	#	Input: an email address, e.g. "foo@example.com"
1507	#
1508	#	Output: the email address as a mailto link, with each character
1509	#		of the address encoded as either a decimal or hex entity, in
1510	#		the hopes of foiling most address harvesting spam bots. E.g.:
1511	#
1512	#	  <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1513	#        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1514	#        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1515	#        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1516	#
1517	#	Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1518	#   With some optimizations by Milian Wolff.
1519	#
1520		$addr = "mailto:" . $addr;
1521		$chars = preg_split('/(?<!^)(?!$)/', $addr);
1522		$seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1523
1524		foreach ($chars as $key => $char) {
1525			$ord = ord($char);
1526			# Ignore non-ascii chars.
1527			if ($ord < 128) {
1528				$r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1529				# roughly 10% raw, 45% hex, 45% dec
1530				# '@' *must* be encoded. I insist.
1531				if ($r > 90 && $char != '@') /* do nothing */;
1532				else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1533				else              $chars[$key] = '&#'.$ord.';';
1534			}
1535		}
1536
1537		$addr = implode('', $chars);
1538		$text = implode('', array_slice($chars, 7)); # text without `mailto:`
1539		$addr = "<a href=\"$addr\">$text</a>";
1540
1541		return $addr;
1542	}
1543
1544
1545	function parseSpan($str) {
1546	#
1547	# Take the string $str and parse it into tokens, hashing embeded HTML,
1548	# escaped characters and handling code spans.
1549	#
1550		$output = '';
1551
1552		$span_re = '{
1553				(
1554					\\\\'.$this->escape_chars_re.'
1555				|
1556					(?<![`\\\\])
1557					`+						# code span marker
1558			'.( $this->no_markup ? '' : '
1559				|
1560					<!--    .*?     -->		# comment
1561				|
1562					<\?.*?\?> | <%.*?%>		# processing instruction
1563				|
1564					<[!$]?[-a-zA-Z0-9:_]+	# regular tags
1565					(?>
1566						\s
1567						(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1568					)?
1569					>
1570				|
1571					<[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1572				|
1573					</[-a-zA-Z0-9:_]+\s*> # closing tag
1574			').'
1575				)
1576				}xs';
1577
1578		while (1) {
1579			#
1580			# Each loop iteration seach for either the next tag, the next
1581			# openning code span marker, or the next escaped character.
1582			# Each token is then passed to handleSpanToken.
1583			#
1584			$parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1585
1586			# Create token from text preceding tag.
1587			if ($parts[0] != "") {
1588				$output .= $parts[0];
1589			}
1590
1591			# Check if we reach the end.
1592			if (isset($parts[1])) {
1593				$output .= $this->handleSpanToken($parts[1], $parts[2]);
1594				$str = $parts[2];
1595			}
1596			else {
1597				break;
1598			}
1599		}
1600
1601		return $output;
1602	}
1603
1604
1605	function handleSpanToken($token, &$str) {
1606	#
1607	# Handle $token provided by parseSpan by determining its nature and
1608	# returning the corresponding value that should replace it.
1609	#
1610		switch ($token{0}) {
1611			case "\\":
1612				return $this->hashPart("&#". ord($token{1}). ";");
1613			case "`":
1614				# Search for end marker in remaining text.
1615				if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1616					$str, $matches))
1617				{
1618					$str = $matches[2];
1619					$codespan = $this->makeCodeSpan($matches[1]);
1620					return $this->hashPart($codespan);
1621				}
1622				return $token; // return as text since no ending marker found.
1623			default:
1624				return $this->hashPart($token);
1625		}
1626	}
1627
1628
1629	function outdent($text) {
1630	#
1631	# Remove one level of line-leading tabs or spaces
1632	#
1633		return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1634	}
1635
1636
1637	# String length function for detab. `_initDetab` will create a function to
1638	# hanlde UTF-8 if the default function does not exist.
1639	var $utf8_strlen = 'mb_strlen';
1640
1641	function detab($text) {
1642	#
1643	# Replace tabs with the appropriate amount of space.
1644	#
1645		# For each line we separate the line in blocks delemited by
1646		# tab characters. Then we reconstruct every line by adding the
1647		# appropriate number of space between each blocks.
1648
1649		$text = preg_replace_callback('/^.*\t.*$/m',
1650			array(&$this, '_detab_callback'), $text);
1651
1652		return $text;
1653	}
1654	function _detab_callback($matches) {
1655		$line = $matches[0];
1656		$strlen = $this->utf8_strlen; # strlen function for UTF-8.
1657
1658		# Split in blocks.
1659		$blocks = explode("\t", $line);
1660		# Add each blocks to the line.
1661		$line = $blocks[0];
1662		unset($blocks[0]); # Do not add first block twice.
1663		foreach ($blocks as $block) {
1664			# Calculate amount of space, insert spaces, insert block.
1665			$amount = $this->tab_width -
1666				$strlen($line, 'UTF-8') % $this->tab_width;
1667			$line .= str_repeat(" ", $amount) . $block;
1668		}
1669		return $line;
1670	}
1671	function _initDetab() {
1672	#
1673	# Check for the availability of the function in the `utf8_strlen` property
1674	# (initially `mb_strlen`). If the function is not available, create a
1675	# function that will loosely count the number of UTF-8 characters with a
1676	# regular expression.
1677	#
1678		if (function_exists($this->utf8_strlen)) return;
1679		$this->utf8_strlen = create_function('$text', 'return preg_match_all(
1680			"/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1681			$text, $m);');
1682	}
1683
1684
1685	function unhash($text) {
1686	#
1687	# Swap back in all the tags hashed by _HashHTMLBlocks.
1688	#
1689		return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1690			array(&$this, '_unhash_callback'), $text);
1691	}
1692	function _unhash_callback($matches) {
1693		return $this->html_hashes[$matches[0]];
1694	}
1695
1696}
1697
1698
1699#
1700# Markdown Extra Parser Class
1701#
1702
1703class MarkdownExtra_Parser extends Markdown_Parser {
1704
1705	### Configuration Variables ###
1706
1707	# Prefix for footnote ids.
1708	var $fn_id_prefix = "";
1709
1710	# Optional title attribute for footnote links and backlinks.
1711	var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
1712	var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
1713
1714	# Optional class attribute for footnote links and backlinks.
1715	var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
1716	var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
1717
1718	# Optional class prefix for fenced code block.
1719	var $code_class_prefix = MARKDOWN_CODE_CLASS_PREFIX;
1720	# Class attribute for code blocks goes on the `code` tag;
1721	# setting this to true will put attributes on the `pre` tag instead.
1722	var $code_attr_on_pre = MARKDOWN_CODE_ATTR_ON_PRE;
1723
1724	# Predefined abbreviations.
1725	var $predef_abbr = array();
1726
1727
1728	### Parser Implementation ###
1729
1730	function MarkdownExtra_Parser() {
1731	#
1732	# Constructor function. Initialize the parser object.
1733	#
1734		# Add extra escapable characters before parent constructor
1735		# initialize the table.
1736		$this->escape_chars .= ':|';
1737
1738		# Insert extra document, block, and span transformations.
1739		# Parent constructor will do the sorting.
1740		$this->document_gamut += array(
1741			"doFencedCodeBlocks" => 5,
1742			"stripFootnotes"     => 15,
1743			"stripAbbreviations" => 25,
1744			"appendFootnotes"    => 50,
1745			);
1746		$this->block_gamut += array(
1747			"doFencedCodeBlocks" => 5,
1748			"doTables"           => 15,
1749			"doDefLists"         => 45,
1750			);
1751		$this->span_gamut += array(
1752			"doFootnotes"        => 5,
1753			"doAbbreviations"    => 70,
1754			);
1755
1756		parent::Markdown_Parser();
1757	}
1758
1759
1760	# Extra variables used during extra transformations.
1761	var $footnotes = array();
1762	var $footnotes_ordered = array();
1763	var $footnotes_ref_count = array();
1764	var $footnotes_numbers = array();
1765	var $abbr_desciptions = array();
1766	var $abbr_word_re = '';
1767
1768	# Give the current footnote number.
1769	var $footnote_counter = 1;
1770
1771
1772	function setup() {
1773	#
1774	# Setting up Extra-specific variables.
1775	#
1776		parent::setup();
1777
1778		$this->footnotes = array();
1779		$this->footnotes_ordered = array();
1780		$this->footnotes_ref_count = array();
1781		$this->footnotes_numbers = array();
1782		$this->abbr_desciptions = array();
1783		$this->abbr_word_re = '';
1784		$this->footnote_counter = 1;
1785
1786		foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1787			if ($this->abbr_word_re)
1788				$this->abbr_word_re .= '|';
1789			$this->abbr_word_re .= preg_quote($abbr_word);
1790			$this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1791		}
1792	}
1793
1794	function teardown() {
1795	#
1796	# Clearing Extra-specific variables.
1797	#
1798		$this->footnotes = array();
1799		$this->footnotes_ordered = array();
1800		$this->footnotes_ref_count = array();
1801		$this->footnotes_numbers = array();
1802		$this->abbr_desciptions = array();
1803		$this->abbr_word_re = '';
1804
1805		parent::teardown();
1806	}
1807
1808
1809	### Extra Attribute Parser ###
1810
1811	# Expression to use to catch attributes (includes the braces)
1812	var $id_class_attr_catch_re = '\{((?:[ ]*[#.][-_:a-zA-Z0-9]+){1,})[ ]*\}';
1813	# Expression to use when parsing in a context when no capture is desired
1814	var $id_class_attr_nocatch_re = '\{(?:[ ]*[#.][-_:a-zA-Z0-9]+){1,}[ ]*\}';
1815
1816	function doExtraAttributes($tag_name, $attr) {
1817	#
1818	# Parse attributes caught by the $this->id_class_attr_catch_re expression
1819	# and return the HTML-formatted list of attributes.
1820	#
1821	# Currently supported attributes are .class and #id.
1822	#
1823		if (empty($attr)) return "";
1824
1825		# Split on components
1826		preg_match_all('/[#.][-_:a-zA-Z0-9]+/', $attr, $matches);
1827		$elements = $matches[0];
1828
1829		# handle classes and ids (only first id taken into account)
1830		$classes = array();
1831		$id = false;
1832		foreach ($elements as $element) {
1833			if ($element{0} == '.') {
1834				$classes[] = substr($element, 1);
1835			} else if ($element{0} == '#') {
1836				if ($id === false) $id = substr($element, 1);
1837			}
1838		}
1839
1840		# compose attributes as string
1841		$attr_str = "";
1842		if (!empty($id)) {
1843			$attr_str .= ' id="'.$id.'"';
1844		}
1845		if (!empty($classes)) {
1846			$attr_str .= ' class="'.implode(" ", $classes).'"';
1847		}
1848		return $attr_str;
1849	}
1850
1851
1852	function stripLinkDefinitions($text) {
1853	#
1854	# Strips link definitions from text, stores the URLs and titles in
1855	# hash references.
1856	#
1857		$less_than_tab = $this->tab_width - 1;
1858
1859		# Link defs are in the form: ^[id]: url "optional title"
1860		$text = preg_replace_callback('{
1861							^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:	# id = $1
1862							  [ ]*
1863							  \n?				# maybe *one* newline
1864							  [ ]*
1865							(?:
1866							  <(.+?)>			# url = $2
1867							|
1868							  (\S+?)			# url = $3
1869							)
1870							  [ ]*
1871							  \n?				# maybe one newline
1872							  [ ]*
1873							(?:
1874								(?<=\s)			# lookbehind for whitespace
1875								["(]
1876								(.*?)			# title = $4
1877								[")]
1878								[ ]*
1879							)?	# title is optional
1880					(?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
1881							(?:\n+|\Z)
1882			}xm',
1883			array(&$this, '_stripLinkDefinitions_callback'),
1884			$text);
1885		return $text;
1886	}
1887	function _stripLinkDefinitions_callback($matches) {
1888		$link_id = strtolower($matches[1]);
1889		$url = $matches[2] == '' ? $matches[3] : $matches[2];
1890		$this->urls[$link_id] = $url;
1891		$this->titles[$link_id] =& $matches[4];
1892		$this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
1893		return ''; # String that will replace the block
1894	}
1895
1896
1897	### HTML Block Parser ###
1898
1899	# Tags that are always treated as block tags:
1900	var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption';
1901
1902	# Tags treated as block tags only if the opening tag is alone on its line:
1903	var $context_block_tags_re = 'script|noscript|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
1904
1905	# Tags where markdown="1" default to span mode:
1906	var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1907
1908	# Tags which must not have their contents modified, no matter where
1909	# they appear:
1910	var $clean_tags_re = 'script|math|svg';
1911
1912	# Tags that do not need to be closed.
1913	var $auto_close_tags_re = 'hr|img|param|source|track';
1914
1915
1916	function hashHTMLBlocks($text) {
1917	#
1918	# Hashify HTML Blocks and "clean tags".
1919	#
1920	# We only want to do this for block-level HTML tags, such as headers,
1921	# lists, and tables. That's because we still want to wrap <p>s around
1922	# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1923	# phrase emphasis, and spans. The list of tags we're looking for is
1924	# hard-coded.
1925	#
1926	# This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1927	# _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1928	# attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
1929	#  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1930	# These two functions are calling each other. It's recursive!
1931	#
1932		if ($this->no_markup)  return $text;
1933
1934		#
1935		# Call the HTML-in-Markdown hasher.
1936		#
1937		list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1938
1939		return $text;
1940	}
1941	function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1942										$enclosing_tag_re = '', $span = false)
1943	{
1944	#
1945	# Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1946	#
1947	# *   $indent is the number of space to be ignored when checking for code
1948	#     blocks. This is important because if we don't take the indent into
1949	#     account, something like this (which looks right) won't work as expected:
1950	#
1951	#     <div>
1952	#         <div markdown="1">
1953	#         Hello World.  <-- Is this a Markdown code block or text?
1954	#         </div>  <-- Is this a Markdown code block or a real tag?
1955	#     <div>
1956	#
1957	#     If you don't like this, just don't indent the tag on which
1958	#     you apply the markdown="1" attribute.
1959	#
1960	# *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
1961	#     tag with that name. Nested tags supported.
1962	#
1963	# *   If $span is true, text inside must treated as span. So any double
1964	#     newline will be replaced by a single newline so that it does not create
1965	#     paragraphs.
1966	#
1967	# Returns an array of that form: ( processed text , remaining text )
1968	#
1969		if ($text === '') return array('', '');
1970
1971		# Regex to check for the presense of newlines around a block tag.
1972		$newline_before_re = '/(?:^\n?|\n\n)*$/';
1973		$newline_after_re =
1974			'{
1975				^						# Start of text following the tag.
1976				(?>[ ]*<!--.*?-->)?		# Optional comment.
1977				[ ]*\n					# Must be followed by newline.
1978			}xs';
1979
1980		# Regex to match any tag.
1981		$block_tag_re =
1982			'{
1983				(					# $2: Capture whole tag.
1984					</?					# Any opening or closing tag.
1985						(?>				# Tag name.
1986							'.$this->block_tags_re.'			|
1987							'.$this->context_block_tags_re.'	|
1988							'.$this->clean_tags_re.'        	|
1989							(?!\s)'.$enclosing_tag_re.'
1990						)
1991						(?:
1992							(?=[\s"\'/a-zA-Z0-9])	# Allowed characters after tag name.
1993							(?>
1994								".*?"		|	# Double quotes (can contain `>`)
1995								\'.*?\'   	|	# Single quotes (can contain `>`)
1996								.+?				# Anything but quotes and `>`.
1997							)*?
1998						)?
1999					>					# End of tag.
2000				|
2001					<!--    .*?     -->	# HTML Comment
2002				|
2003					<\?.*?\?> | <%.*?%>	# Processing instruction
2004				|
2005					<!\[CDATA\[.*?\]\]>	# CData Block
2006				'. ( !$span ? ' # If not in span.
2007				|
2008					# Indented code block
2009					(?: ^[ ]*\n | ^ | \n[ ]*\n )
2010					[ ]{'.($indent+4).'}[^\n]* \n
2011					(?>
2012						(?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
2013					)*
2014				|
2015					# Fenced code block marker
2016					(?<= ^ | \n )
2017					[ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
2018									[ ]*
2019					(?:
2020					\.?[-_:a-zA-Z0-9]+ # standalone class name
2021					|
2022						'.$this->id_class_attr_nocatch_re.' # extra attributes
2023					)?
2024					[ ]*
2025					(?= \n )
2026				' : '' ). ' # End (if not is span).
2027				|
2028					# Code span marker
2029					# Note, this regex needs to go after backtick fenced
2030					# code blocks but it should also be kept outside of the
2031					# "if not in span" condition adding backticks to the parser
2032					`+
2033				)
2034			}xs';
2035
2036
2037		$depth = 0;		# Current depth inside the tag tree.
2038		$parsed = "";	# Parsed text that will be returned.
2039
2040		#
2041		# Loop through every tag until we find the closing tag of the parent
2042		# or loop until reaching the end of text if no parent tag specified.
2043		#
2044		do {
2045			#
2046			# Split the text using the first $tag_match pattern found.
2047			# Text before  pattern will be first in the array, text after
2048			# pattern will be at the end, and between will be any catches made
2049			# by the pattern.
2050			#
2051			$parts = preg_split($block_tag_re, $text, 2,
2052								PREG_SPLIT_DELIM_CAPTURE);
2053
2054			# If in Markdown span mode, add a empty-string span-level hash
2055			# after each newline to prevent triggering any block element.
2056			if ($span) {
2057				$void = $this->hashPart("", ':');
2058				$newline = "$void\n";
2059				$parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
2060			}
2061
2062			$parsed .= $parts[0]; # Text before current tag.
2063
2064			# If end of $text has been reached. Stop loop.
2065			if (count($parts) < 3) {
2066				$text = "";
2067				break;
2068			}
2069
2070			$tag  = $parts[1]; # Tag to handle.
2071			$text = $parts[2]; # Remaining text after current tag.
2072			$tag_re = preg_quote($tag); # For use in a regular expression.
2073
2074			#
2075			# Check for: Fenced code block marker.
2076			# Note: need to recheck the whole tag to disambiguate backtick
2077			# fences from code spans
2078			#
2079			if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
2080				# Fenced code block marker: find matching end marker.
2081				$fence_indent = strlen($capture[1]); # use captured indent in re
2082				$fence_re = $capture[2]; # use captured fence in re
2083				if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
2084					$matches))
2085				{
2086					# End marker found: pass text unchanged until marker.
2087					$parsed .= $tag . $matches[0];
2088					$text = substr($text, strlen($matches[0]));
2089				}
2090				else {
2091					# No end marker: just skip it.
2092					$parsed .= $tag;
2093				}
2094			}
2095			#
2096			# Check for: Indented code block.
2097			#
2098			else if ($tag{0} == "\n" || $tag{0} == " ") {
2099				# Indented code block: pass it unchanged, will be handled
2100				# later.
2101				$parsed .= $tag;
2102			}
2103			#
2104			# Check for: Code span marker
2105			# Note: need to check this after backtick fenced code blocks
2106			#
2107			else if ($tag{0} == "`") {
2108				# Find corresponding end marker.
2109				$tag_re = preg_quote($tag);
2110				if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
2111					$text, $matches))
2112				{
2113					# End marker found: pass text unchanged until marker.
2114					$parsed .= $tag . $matches[0];
2115					$text = substr($text, strlen($matches[0]));
2116				}
2117				else {
2118					# Unmatched marker: just skip it.
2119					$parsed .= $tag;
2120				}
2121			}
2122			#
2123			# Check for: Opening Block level tag or
2124			#            Opening Context Block tag (like ins and del)
2125			#               used as a block tag (tag is alone on it's line).
2126			#
2127			else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
2128				(	preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
2129					preg_match($newline_before_re, $parsed) &&
2130					preg_match($newline_after_re, $text)	)
2131				)
2132			{
2133				# Need to parse tag and following text using the HTML parser.
2134				list($block_text, $text) =
2135					$this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
2136
2137				# Make sure it stays outside of any paragraph by adding newlines.
2138				$parsed .= "\n\n$block_text\n\n";
2139			}
2140			#
2141			# Check for: Clean tag (like script, math)
2142			#            HTML Comments, processing instructions.
2143			#
2144			else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
2145				$tag{1} == '!' || $tag{1} == '?')
2146			{
2147				# Need to parse tag and following text using the HTML parser.
2148				# (don't check for markdown attribute)
2149				list($block_text, $text) =
2150					$this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
2151
2152				$parsed .= $block_text;
2153			}
2154			#
2155			# Check for: Tag with same name as enclosing tag.
2156			#
2157			else if ($enclosing_tag_re !== '' &&
2158				# Same name as enclosing tag.
2159				preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
2160			{
2161				#
2162				# Increase/decrease nested tag count.
2163				#
2164				if ($tag{1} == '/')						$depth--;
2165				else if ($tag{strlen($tag)-2} != '/')	$depth++;
2166
2167				if ($depth < 0) {
2168					#
2169					# Going out of parent element. Clean up and break so we
2170					# return to the calling function.
2171					#
2172					$text = $tag . $text;
2173					break;
2174				}
2175
2176				$parsed .= $tag;
2177			}
2178			else {
2179				$parsed .= $tag;
2180			}
2181		} while ($depth >= 0);
2182
2183		return array($parsed, $text);
2184	}
2185	function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
2186	#
2187	# Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2188	#
2189	# *   Calls $hash_method to convert any blocks.
2190	# *   Stops when the first opening tag closes.
2191	# *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
2192	#     (it is not inside clean tags)
2193	#
2194	# Returns an array of that form: ( processed text , remaining text )
2195	#
2196		if ($text === '') return array('', '');
2197
2198		# Regex to match `markdown` attribute inside of a tag.
2199		$markdown_attr_re = '
2200			{
2201				\s*			# Eat whitespace before the `markdown` attribute
2202				markdown
2203				\s*=\s*
2204				(?>
2205					(["\'])		# $1: quote delimiter
2206					(.*?)		# $2: attribute value
2207					\1			# matching delimiter
2208				|
2209					([^\s>]*)	# $3: unquoted attribute value
2210				)
2211				()				# $4: make $3 always defined (avoid warnings)
2212			}xs';
2213
2214		# Regex to match any tag.
2215		$tag_re = '{
2216				(					# $2: Capture whole tag.
2217					</?					# Any opening or closing tag.
2218						[\w:$]+			# Tag name.
2219						(?:
2220							(?=[\s"\'/a-zA-Z0-9])	# Allowed characters after tag name.
2221							(?>
2222								".*?"		|	# Double quotes (can contain `>`)
2223								\'.*?\'   	|	# Single quotes (can contain `>`)
2224								.+?				# Anything but quotes and `>`.
2225							)*?
2226						)?
2227					>					# End of tag.
2228				|
2229					<!--    .*?     -->	# HTML Comment
2230				|
2231					<\?.*?\?> | <%.*?%>	# Processing instruction
2232				|
2233					<!\[CDATA\[.*?\]\]>	# CData Block
2234				)
2235			}xs';
2236
2237		$original_text = $text;		# Save original text in case of faliure.
2238
2239		$depth		= 0;	# Current depth inside the tag tree.
2240		$block_text	= "";	# Temporary text holder for current text.
2241		$parsed		= "";	# Parsed text that will be returned.
2242
2243		#
2244		# Get the name of the starting tag.
2245		# (This pattern makes $base_tag_name_re safe without quoting.)
2246		#
2247		if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
2248			$base_tag_name_re = $matches[1];
2249
2250		#
2251		# Loop through every tag until we find the corresponding closing tag.
2252		#
2253		do {
2254			#
2255			# Split the text using the first $tag_match pattern found.
2256			# Text before  pattern will be first in the array, text after
2257			# pattern will be at the end, and between will be any catches made
2258			# by the pattern.
2259			#
2260			$parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
2261
2262			if (count($parts) < 3) {
2263				#
2264				# End of $text reached with unbalenced tag(s).
2265				# In that case, we return original text unchanged and pass the
2266				# first character as filtered to prevent an infinite loop in the
2267				# parent function.
2268				#
2269				return array($original_text{0}, substr($original_text, 1));
2270			}
2271
2272			$block_text .= $parts[0]; # Text before current tag.
2273			$tag         = $parts[1]; # Tag to handle.
2274			$text        = $parts[2]; # Remaining text after current tag.
2275
2276			#
2277			# Check for: Auto-close tag (like <hr/>)
2278			#			 Comments and Processing Instructions.
2279			#
2280			if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
2281				$tag{1} == '!' || $tag{1} == '?')
2282			{
2283				# Just add the tag to the block as if it was text.
2284				$block_text .= $tag;
2285			}
2286			else {
2287				#
2288				# Increase/decrease nested tag count. Only do so if
2289				# the tag's name match base tag's.
2290				#
2291				if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
2292					if ($tag{1} == '/')						$depth--;
2293					else if ($tag{strlen($tag)-2} != '/')	$depth++;
2294				}
2295
2296				#
2297				# Check for `markdown="1"` attribute and handle it.
2298				#
2299				if ($md_attr &&
2300					preg_match($markdown_attr_re, $tag, $attr_m) &&
2301					preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2302				{
2303					# Remove `markdown` attribute from opening tag.
2304					$tag = preg_replace($markdown_attr_re, '', $tag);
2305
2306					# Check if text inside this tag must be parsed in span mode.
2307					$this->mode = $attr_m[2] . $attr_m[3];
2308					$span_mode = $this->mode == 'span' || $this->mode != 'block' &&
2309						preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
2310
2311					# Calculate indent before tag.
2312					if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
2313						$strlen = $this->utf8_strlen;
2314						$indent = $strlen($matches[1], 'UTF-8');
2315					} else {
2316						$indent = 0;
2317					}
2318
2319					# End preceding block with this tag.
2320					$block_text .= $tag;
2321					$parsed .= $this->$hash_method($block_text);
2322
2323					# Get enclosing tag name for the ParseMarkdown function.
2324					# (This pattern makes $tag_name_re safe without quoting.)
2325					preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2326					$tag_name_re = $matches[1];
2327
2328					# Parse the content using the HTML-in-Markdown parser.
2329					list ($block_text, $text)
2330						= $this->_hashHTMLBlocks_inMarkdown($text, $indent,
2331							$tag_name_re, $span_mode);
2332
2333					# Outdent markdown text.
2334					if ($indent > 0) {
2335						$block_text = preg_replace("/^[ ]{1,$indent}/m", "",
2336													$block_text);
2337					}
2338
2339					# Append tag content to parsed text.
2340					if (!$span_mode)	$parsed .= "\n\n$block_text\n\n";
2341					else				$parsed .= "$block_text";
2342
2343					# Start over with a new block.
2344					$block_text = "";
2345				}
2346				else $block_text .= $tag;
2347			}
2348
2349		} while ($depth > 0);
2350
2351		#
2352		# Hash last block text that wasn't processed inside the loop.
2353		#
2354		$parsed .= $this->$hash_method($block_text);
2355
2356		return array($parsed, $text);
2357	}
2358
2359
2360	function hashClean($text) {
2361	#
2362	# Called whenever a tag must be hashed when a function inserts a "clean" tag
2363	# in $text, it passes through this function and is automaticaly escaped,
2364	# blocking invalid nested overlap.
2365	#
2366		return $this->hashPart($text, 'C');
2367	}
2368
2369
2370	function doAnchors($text) {
2371	#
2372	# Turn Markdown link shortcuts into XHTML <a> tags.
2373	#
2374		if ($this->in_anchor) return $text;
2375		$this->in_anchor = true;
2376
2377		#
2378		# First, handle reference-style links: [link text] [id]
2379		#
2380		$text = preg_replace_callback('{
2381			(					# wrap whole match in $1
2382			  \[
2383				('.$this->nested_brackets_re.')	# link text = $2
2384			  \]
2385
2386			  [ ]?				# one optional space
2387			  (?:\n[ ]*)?		# one optional newline followed by spaces
2388
2389			  \[
2390				(.*?)		# id = $3
2391			  \]
2392			)
2393			}xs',
2394			array(&$this, '_doAnchors_reference_callback'), $text);
2395
2396		#
2397		# Next, inline-style links: [link text](url "optional title")
2398		#
2399		$text = preg_replace_callback('{
2400			(				# wrap whole match in $1
2401			  \[
2402				('.$this->nested_brackets_re.')	# link text = $2
2403			  \]
2404			  \(			# literal paren
2405				[ \n]*
2406				(?:
2407					<(.+?)>	# href = $3
2408				|
2409					('.$this->nested_url_parenthesis_re.')	# href = $4
2410				)
2411				[ \n]*
2412				(			# $5
2413				  ([\'"])	# quote char = $6
2414				  (.*?)		# Title = $7
2415				  \6		# matching quote
2416				  [ \n]*	# ignore any spaces/tabs between closing quote and )
2417				)?			# title is optional
2418			  \)
2419			  (?:[ ]? '.$this->id_class_attr_catch_re.' )?	 # $8 = id/class attributes
2420			)
2421			}xs',
2422			array(&$this, '_doAnchors_inline_callback'), $text);
2423
2424		#
2425		# Last, handle reference-style shortcuts: [link text]
2426		# These must come last in case you've also got [link text][1]
2427		# or [link text](/foo)
2428		#
2429		$text = preg_replace_callback('{
2430			(					# wrap whole match in $1
2431			  \[
2432				([^\[\]]+)		# link text = $2; can\'t contain [ or ]
2433			  \]
2434			)
2435			}xs',
2436			array(&$this, '_doAnchors_reference_callback'), $text);
2437
2438		$this->in_anchor = false;
2439		return $text;
2440	}
2441	function _doAnchors_reference_callback($matches) {
2442		$whole_match =  $matches[1];
2443		$link_text   =  $matches[2];
2444		$link_id     =& $matches[3];
2445
2446		if ($link_id == "") {
2447			# for shortcut links like [this][] or [this].
2448			$link_id = $link_text;
2449		}
2450
2451		# lower-case and turn embedded newlines into spaces
2452		$link_id = strtolower($link_id);
2453		$link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
2454
2455		if (isset($this->urls[$link_id])) {
2456			$url = $this->urls[$link_id];
2457			$url = $this->encodeAttribute($url);
2458
2459			$result = "<a href=\"$url\"";
2460			if ( isset( $this->titles[$link_id] ) ) {
2461				$title = $this->titles[$link_id];
2462				$title = $this->encodeAttribute($title);
2463				$result .=  " title=\"$title\"";
2464			}
2465			if (isset($this->ref_attr[$link_id]))
2466				$result .= $this->ref_attr[$link_id];
2467
2468			$link_text = $this->runSpanGamut($link_text);
2469			$result .= ">$link_text</a>";
2470			$result = $this->hashPart($result);
2471		}
2472		else {
2473			$result = $whole_match;
2474		}
2475		return $result;
2476	}
2477	function _doAnchors_inline_callback($matches) {
2478		$whole_match	=  $matches[1];
2479		$link_text		=  $this->runSpanGamut($matches[2]);
2480		$url			=  $matches[3] == '' ? $matches[4] : $matches[3];
2481		$title			=& $matches[7];
2482		$attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
2483
2484
2485		$url = $this->encodeAttribute($url);
2486
2487		$result = "<a href=\"$url\"";
2488		if (isset($title)) {
2489			$title = $this->encodeAttribute($title);
2490			$result .=  " title=\"$title\"";
2491		}
2492		$result .= $attr;
2493
2494		$link_text = $this->runSpanGamut($link_text);
2495		$result .= ">$link_text</a>";
2496
2497		return $this->hashPart($result);
2498	}
2499
2500
2501	function doImages($text) {
2502	#
2503	# Turn Markdown image shortcuts into <img> tags.
2504	#
2505		#
2506		# First, handle reference-style labeled images: ![alt text][id]
2507		#
2508		$text = preg_replace_callback('{
2509			(				# wrap whole match in $1
2510			  !\[
2511				('.$this->nested_brackets_re.')		# alt text = $2
2512			  \]
2513
2514			  [ ]?				# one optional space
2515			  (?:\n[ ]*)?		# one optional newline followed by spaces
2516
2517			  \[
2518				(.*?)		# id = $3
2519			  \]
2520
2521			)
2522			}xs',
2523			array(&$this, '_doImages_reference_callback'), $text);
2524
2525		#
2526		# Next, handle inline images:  ![alt text](url "optional title")
2527		# Don't forget: encode * and _
2528		#
2529		$text = preg_replace_callback('{
2530			(				# wrap whole match in $1
2531			  !\[
2532				('.$this->nested_brackets_re.')		# alt text = $2
2533			  \]
2534			  \s?			# One optional whitespace character
2535			  \(			# literal paren
2536				[ \n]*
2537				(?:
2538					<(\S*)>	# src url = $3
2539				|
2540					('.$this->nested_url_parenthesis_re.')	# src url = $4
2541				)
2542				[ \n]*
2543				(			# $5
2544				  ([\'"])	# quote char = $6
2545				  (.*?)		# title = $7
2546				  \6		# matching quote
2547				  [ \n]*
2548				)?			# title is optional
2549			  \)
2550			  (?:[ ]? '.$this->id_class_attr_catch_re.' )?	 # $8 = id/class attributes
2551			)
2552			}xs',
2553			array(&$this, '_doImages_inline_callback'), $text);
2554
2555		return $text;
2556	}
2557	function _doImages_reference_callback($matches) {
2558		$whole_match = $matches[1];
2559		$alt_text    = $matches[2];
2560		$link_id     = strtolower($matches[3]);
2561
2562		if ($link_id == "") {
2563			$link_id = strtolower($alt_text); # for shortcut links like ![this][].
2564		}
2565
2566		$alt_text = $this->encodeAttribute($alt_text);
2567		if (isset($this->urls[$link_id])) {
2568			$url = $this->encodeAttribute($this->urls[$link_id]);
2569			$result = "<img src=\"$url\" alt=\"$alt_text\"";
2570			if (isset($this->titles[$link_id])) {
2571				$title = $this->titles[$link_id];
2572				$title = $this->encodeAttribute($title);
2573				$result .=  " title=\"$title\"";
2574			}
2575			if (isset($this->ref_attr[$link_id]))
2576				$result .= $this->ref_attr[$link_id];
2577			$result .= $this->empty_element_suffix;
2578			$result = $this->hashPart($result);
2579		}
2580		else {
2581			# If there's no such link ID, leave intact:
2582			$result = $whole_match;
2583		}
2584
2585		return $result;
2586	}
2587	function _doImages_inline_callback($matches) {
2588		$whole_match	= $matches[1];
2589		$alt_text		= $matches[2];
2590		$url			= $matches[3] == '' ? $matches[4] : $matches[3];
2591		$title			=& $matches[7];
2592		$attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
2593
2594		$alt_text = $this->encodeAttribute($alt_text);
2595		$url = $this->encodeAttribute($url);
2596		$result = "<img src=\"$url\" alt=\"$alt_text\"";
2597		if (isset($title)) {
2598			$title = $this->encodeAttribute($title);
2599			$result .=  " title=\"$title\""; # $title already quoted
2600		}
2601		$result .= $attr;
2602		$result .= $this->empty_element_suffix;
2603
2604		return $this->hashPart($result);
2605	}
2606
2607
2608	function doHeaders($text) {
2609	#
2610	# Redefined to add id and class attribute support.
2611	#
2612		# Setext-style headers:
2613		#	  Header 1  {#header1}
2614		#	  ========
2615		#
2616		#	  Header 2  {#header2 .class1 .class2}
2617		#	  --------
2618		#
2619		$text = preg_replace_callback(
2620			'{
2621				(^.+?)								# $1: Header text
2622				(?:[ ]+ '.$this->id_class_attr_catch_re.' )?	 # $3 = id/class attributes
2623				[ ]*\n(=+|-+)[ ]*\n+				# $3: Header footer
2624			}mx',
2625			array(&$this, '_doHeaders_callback_setext'), $text);
2626
2627		# atx-style headers:
2628		#	# Header 1        {#header1}
2629		#	## Header 2       {#header2}
2630		#	## Header 2 with closing hashes ##  {#header3.class1.class2}
2631		#	...
2632		#	###### Header 6   {.class2}
2633		#
2634		$text = preg_replace_callback('{
2635				^(\#{1,6})	# $1 = string of #\'s
2636				[ ]*
2637				(.+?)		# $2 = Header text
2638				[ ]*
2639				\#*			# optional closing #\'s (not counted)
2640				(?:[ ]+ '.$this->id_class_attr_catch_re.' )?	 # $3 = id/class attributes
2641				[ ]*
2642				\n+
2643			}xm',
2644			array(&$this, '_doHeaders_callback_atx'), $text);
2645
2646		return $text;
2647	}
2648	function _doHeaders_callback_setext($matches) {
2649		if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2650			return $matches[0];
2651		$level = $matches[3]{0} == '=' ? 1 : 2;
2652		$attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2]);
2653		$block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2654		return "\n" . $this->hashBlock($block) . "\n\n";
2655	}
2656	function _doHeaders_callback_atx($matches) {
2657		$level = strlen($matches[1]);
2658		$attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3]);
2659		$block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2660		return "\n" . $this->hashBlock($block) . "\n\n";
2661	}
2662
2663
2664	function doTables($text) {
2665	#
2666	# Form HTML tables.
2667	#
2668		$less_than_tab = $this->tab_width - 1;
2669		#
2670		# Find tables with leading pipe.
2671		#
2672		#	| Header 1 | Header 2
2673		#	| -------- | --------
2674		#	| Cell 1   | Cell 2
2675		#	| Cell 3   | Cell 4
2676		#
2677		$text = preg_replace_callback('
2678			{
2679				^							# Start of a line
2680				[ ]{0,'.$less_than_tab.'}	# Allowed whitespace.
2681				[|]							# Optional leading pipe (present)
2682				(.+) \n						# $1: Header row (at least one pipe)
2683
2684				[ ]{0,'.$less_than_tab.'}	# Allowed whitespace.
2685				[|] ([ ]*[-:]+[-| :]*) \n	# $2: Header underline
2686
2687				(							# $3: Cells
2688					(?>
2689						[ ]*				# Allowed whitespace.
2690						[|] .* \n			# Row content.
2691					)*
2692				)
2693				(?=\n|\Z)					# Stop at final double newline.
2694			}xm',
2695			array(&$this, '_doTable_leadingPipe_callback'), $text);
2696
2697		#
2698		# Find tables without leading pipe.
2699		#
2700		#	Header 1 | Header 2
2701		#	-------- | --------
2702		#	Cell 1   | Cell 2
2703		#	Cell 3   | Cell 4
2704		#
2705		$text = preg_replace_callback('
2706			{
2707				^							# Start of a line
2708				[ ]{0,'.$less_than_tab.'}	# Allowed whitespace.
2709				(\S.*[|].*) \n				# $1: Header row (at least one pipe)
2710
2711				[ ]{0,'.$less_than_tab.'}	# Allowed whitespace.
2712				([-:]+[ ]*[|][-| :]*) \n	# $2: Header underline
2713
2714				(							# $3: Cells
2715					(?>
2716						.* [|] .* \n		# Row content
2717					)*
2718				)
2719				(?=\n|\Z)					# Stop at final double newline.
2720			}xm',
2721			array(&$this, '_DoTable_callback'), $text);
2722
2723		return $text;
2724	}
2725	function _doTable_leadingPipe_callback($matches) {
2726		$head		= $matches[1];
2727		$underline	= $matches[2];
2728		$content	= $matches[3];
2729
2730		# Remove leading pipe for each row.
2731		$content	= preg_replace('/^ *[|]/m', '', $content);
2732
2733		return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2734	}
2735	function _doTable_callback($matches) {
2736		$head		= $matches[1];
2737		$underline	= $matches[2];
2738		$content	= $matches[3];
2739
2740		# Remove any tailing pipes for each line.
2741		$head		= preg_replace('/[|] *$/m', '', $head);
2742		$underline	= preg_replace('/[|] *$/m', '', $underline);
2743		$content	= preg_replace('/[|] *$/m', '', $content);
2744
2745		# Reading alignement from header underline.
2746		$separators	= preg_split('/ *[|] */', $underline);
2747		foreach ($separators as $n => $s) {
2748			if (preg_match('/^ *-+: *$/', $s))		$attr[$n] = ' align="right"';
2749			else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
2750			else if (preg_match('/^ *:-+ *$/', $s))	$attr[$n] = ' align="left"';
2751			else									$attr[$n] = '';
2752		}
2753
2754		# Parsing span elements, including code spans, character escapes,
2755		# and inline HTML tags, so that pipes inside those gets ignored.
2756		$head		= $this->parseSpan($head);
2757		$headers	= preg_split('/ *[|] */', $head);
2758		$col_count	= count($headers);
2759		$attr       = array_pad($attr, $col_count, '');
2760
2761		# Write column headers.
2762		$text = "<table>\n";
2763		$text .= "<thead>\n";
2764		$text .= "<tr>\n";
2765		foreach ($headers as $n => $header)
2766			$text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2767		$text .= "</tr>\n";
2768		$text .= "</thead>\n";
2769
2770		# Split content by row.
2771		$rows = explode("\n", trim($content, "\n"));
2772
2773		$text .= "<tbody>\n";
2774		foreach ($rows as $row) {
2775			# Parsing span elements, including code spans, character escapes,
2776			# and inline HTML tags, so that pipes inside those gets ignored.
2777			$row = $this->parseSpan($row);
2778
2779			# Split row by cell.
2780			$row_cells = preg_split('/ *[|] */', $row, $col_count);
2781			$row_cells = array_pad($row_cells, $col_count, '');
2782
2783			$text .= "<tr>\n";
2784			foreach ($row_cells as $n => $cell)
2785				$text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2786			$text .= "</tr>\n";
2787		}
2788		$text .= "</tbody>\n";
2789		$text .= "</table>";
2790
2791		return $this->hashBlock($text) . "\n";
2792	}
2793
2794
2795	function doDefLists($text) {
2796	#
2797	# Form HTML definition lists.
2798	#
2799		$less_than_tab = $this->tab_width - 1;
2800
2801		# Re-usable pattern to match any entire dl list:
2802		$whole_list_re = '(?>
2803			(								# $1 = whole list
2804			  (								# $2
2805				[ ]{0,'.$less_than_tab.'}
2806				((?>.*\S.*\n)+)				# $3 = defined term
2807				\n?
2808				[ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2809			  )
2810			  (?s:.+?)
2811			  (								# $4
2812				  \z
2813				|
2814				  \n{2,}
2815				  (?=\S)
2816				  (?!						# Negative lookahead for another term
2817					[ ]{0,'.$less_than_tab.'}
2818					(?: \S.*\n )+?			# defined term
2819					\n?
2820					[ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2821				  )
2822				  (?!						# Negative lookahead for another definition
2823					[ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2824				  )
2825			  )
2826			)
2827		)'; // mx
2828
2829		$text = preg_replace_callback('{
2830				(?>\A\n?|(?<=\n\n))
2831				'.$whole_list_re.'
2832			}mx',
2833			array(&$this, '_doDefLists_callback'), $text);
2834
2835		return $text;
2836	}
2837	function _doDefLists_callback($matches) {
2838		# Re-usable patterns to match list item bullets and number markers:
2839		$list = $matches[1];
2840
2841		# Turn double returns into triple returns, so that we can make a
2842		# paragraph for the last item in a list, if necessary:
2843		$result = trim($this->processDefListItems($list));
2844		$result = "<dl>\n" . $result . "\n</dl>";
2845		return $this->hashBlock($result) . "\n\n";
2846	}
2847
2848
2849	function processDefListItems($list_str) {
2850	#
2851	#	Process the contents of a single definition list, splitting it
2852	#	into individual term and definition list items.
2853	#
2854		$less_than_tab = $this->tab_width - 1;
2855
2856		# trim trailing blank lines:
2857		$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2858
2859		# Process definition terms.
2860		$list_str = preg_replace_callback('{
2861			(?>\A\n?|\n\n+)					# leading line
2862			(								# definition terms = $1
2863				[ ]{0,'.$less_than_tab.'}	# leading whitespace
2864				(?!\:[ ]|[ ])				# negative lookahead for a definition
2865											#   mark (colon) or more whitespace.
2866				(?> \S.* \n)+?				# actual term (not whitespace).
2867			)
2868			(?=\n?[ ]{0,3}:[ ])				# lookahead for following line feed
2869											#   with a definition mark.
2870			}xm',
2871			array(&$this, '_processDefListItems_callback_dt'), $list_str);
2872
2873		# Process actual definitions.
2874		$list_str = preg_replace_callback('{
2875			\n(\n+)?						# leading line = $1
2876			(								# marker space = $2
2877				[ ]{0,'.$less_than_tab.'}	# whitespace before colon
2878				\:[ ]+						# definition mark (colon)
2879			)
2880			((?s:.+?))						# definition text = $3
2881			(?= \n+ 						# stop at next definition mark,
2882				(?:							# next term or end of text
2883					[ ]{0,'.$less_than_tab.'} \:[ ]	|
2884					<dt> | \z
2885				)
2886			)
2887			}xm',
2888			array(&$this, '_processDefListItems_callback_dd'), $list_str);
2889
2890		return $list_str;
2891	}
2892	function _processDefListItems_callback_dt($matches) {
2893		$terms = explode("\n", trim($matches[1]));
2894		$text = '';
2895		foreach ($terms as $term) {
2896			$term = $this->runSpanGamut(trim($term));
2897			$text .= "\n<dt>" . $term . "</dt>";
2898		}
2899		return $text . "\n";
2900	}
2901	function _processDefListItems_callback_dd($matches) {
2902		$leading_line	= $matches[1];
2903		$marker_space	= $matches[2];
2904		$def			= $matches[3];
2905
2906		if ($leading_line || preg_match('/\n{2,}/', $def)) {
2907			# Replace marker with the appropriate whitespace indentation
2908			$def = str_repeat(' ', strlen($marker_space)) . $def;
2909			$def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2910			$def = "\n". $def ."\n";
2911		}
2912		else {
2913			$def = rtrim($def);
2914			$def = $this->runSpanGamut($this->outdent($def));
2915		}
2916
2917		return "\n<dd>" . $def . "</dd>\n";
2918	}
2919
2920
2921	function doFencedCodeBlocks($text) {
2922	#
2923	# Adding the fenced code block syntax to regular Markdown:
2924	#
2925	# ~~~
2926	# Code block
2927	# ~~~
2928	#
2929		$less_than_tab = $this->tab_width;
2930
2931		$text = preg_replace_callback('{
2932				(?:\n|\A)
2933				# 1: Opening marker
2934				(
2935					(?:~{3,}|`{3,}) # 3 or more tildes/backticks.
2936				)
2937				[ ]*
2938				(?:
2939					\.?([-_:a-zA-Z0-9]+) # 2: standalone class name
2940				|
2941					'.$this->id_class_attr_catch_re.' # 3: Extra attributes
2942				)?
2943				[ ]* \n # Whitespace and newline following marker.
2944
2945				# 4: Content
2946				(
2947					(?>
2948						(?!\1 [ ]* \n)	# Not a closing marker.
2949						.*\n+
2950					)+
2951				)
2952
2953				# Closing marker.
2954				\1 [ ]* (?= \n )
2955			}xm',
2956			array(&$this, '_doFencedCodeBlocks_callback'), $text);
2957
2958		return $text;
2959	}
2960	function _doFencedCodeBlocks_callback($matches) {
2961		$classname =& $matches[2];
2962		$attrs     =& $matches[3];
2963		$codeblock = $matches[4];
2964		$codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
2965		$codeblock = preg_replace_callback('/^\n+/',
2966			array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
2967
2968		if ($classname != "") {
2969			if ($classname{0} == '.')
2970				$classname = substr($classname, 1);
2971			$attr_str = ' class="'.$this->code_class_prefix.$classname.'"';
2972		} else {
2973			$attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs);
2974		}
2975		$pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
2976		$code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
2977		$codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
2978
2979		return "\n\n".$this->hashBlock($codeblock)."\n\n";
2980	}
2981	function _doFencedCodeBlocks_newlines($matches) {
2982		return str_repeat("<br$this->empty_element_suffix",
2983			strlen($matches[0]));
2984	}
2985
2986
2987	#
2988	# Redefining emphasis markers so that emphasis by underscore does not
2989	# work in the middle of a word.
2990	#
2991	var $em_relist = array(
2992		''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![\.,:;]\s)',
2993		'*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
2994		'_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',
2995		);
2996	var $strong_relist = array(
2997		''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![\.,:;]\s)',
2998		'**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
2999		'__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',
3000		);
3001	var $em_strong_relist = array(
3002		''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![\.,:;]\s)',
3003		'***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
3004		'___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',
3005		);
3006
3007
3008	function formParagraphs($text) {
3009	#
3010	#	Params:
3011	#		$text - string to process with html <p> tags
3012	#
3013		# Strip leading and trailing lines:
3014		$text = preg_replace('/\A\n+|\n+\z/', '', $text);
3015
3016		$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
3017
3018		#
3019		# Wrap <p> tags and unhashify HTML blocks
3020		#
3021		foreach ($grafs as $key => $value) {
3022			$value = trim($this->runSpanGamut($value));
3023
3024			# Check if this should be enclosed in a paragraph.
3025			# Clean tag hashes & block tag hashes are left alone.
3026			$is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
3027
3028			if ($is_p) {
3029				$value = "<p>$value</p>";
3030			}
3031			$grafs[$key] = $value;
3032		}
3033
3034		# Join grafs in one text, then unhash HTML tags.
3035		$text = implode("\n\n", $grafs);
3036
3037		# Finish by removing any tag hashes still present in $text.
3038		$text = $this->unhash($text);
3039
3040		return $text;
3041	}
3042
3043
3044	### Footnotes
3045
3046	function stripFootnotes($text) {
3047	#
3048	# Strips link definitions from text, stores the URLs and titles in
3049	# hash references.
3050	#
3051		$less_than_tab = $this->tab_width - 1;
3052
3053		# Link defs are in the form: [^id]: url "optional title"
3054		$text = preg_replace_callback('{
3055			^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:	# note_id = $1
3056			  [ ]*
3057			  \n?					# maybe *one* newline
3058			(						# text = $2 (no blank lines allowed)
3059				(?:
3060					.+				# actual text
3061				|
3062					\n				# newlines but
3063					(?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
3064					(?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
3065									# by non-indented content
3066				)*
3067			)
3068			}xm',
3069			array(&$this, '_stripFootnotes_callback'),
3070			$text);
3071		return $text;
3072	}
3073	function _stripFootnotes_callback($matches) {
3074		$note_id = $this->fn_id_prefix . $matches[1];
3075		$this->footnotes[$note_id] = $this->outdent($matches[2]);
3076		return ''; # String that will replace the block
3077	}
3078
3079
3080	function doFootnotes($text) {
3081	#
3082	# Replace footnote references in $text [^id] with a special text-token
3083	# which will be replaced by the actual footnote marker in appendFootnotes.
3084	#
3085		if (!$this->in_anchor) {
3086			$text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
3087		}
3088		return $text;
3089	}
3090
3091
3092	function appendFootnotes($text) {
3093	#
3094	# Append footnote list to text.
3095	#
3096		$text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
3097			array(&$this, '_appendFootnotes_callback'), $text);
3098
3099		if (!empty($this->footnotes_ordered)) {
3100			$text .= "\n\n";
3101			$text .= "<div class=\"footnotes\">\n";
3102			$text .= "<hr". $this->empty_element_suffix ."\n";
3103			$text .= "<ol>\n\n";
3104
3105			$attr = " rev=\"footnote\"";
3106			if ($this->fn_backlink_class != "") {
3107				$class = $this->fn_backlink_class;
3108				$class = $this->encodeAttribute($class);
3109				$attr .= " class=\"$class\"";
3110			}
3111			if ($this->fn_backlink_title != "") {
3112				$title = $this->fn_backlink_title;
3113				$title = $this->encodeAttribute($title);
3114				$attr .= " title=\"$title\"";
3115			}
3116			$num = 0;
3117
3118			while (!empty($this->footnotes_ordered)) {
3119				$footnote = reset($this->footnotes_ordered);
3120				$note_id = key($this->footnotes_ordered);
3121				unset($this->footnotes_ordered[$note_id]);
3122				$ref_count = $this->footnotes_ref_count[$note_id];
3123				unset($this->footnotes_ref_count[$note_id]);
3124				unset($this->footnotes[$note_id]);
3125
3126				$footnote .= "\n"; # Need to append newline before parsing.
3127				$footnote = $this->runBlockGamut("$footnote\n");
3128				$footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
3129					array(&$this, '_appendFootnotes_callback'), $footnote);
3130
3131				$attr = str_replace("%%", ++$num, $attr);
3132				$note_id = $this->encodeAttribute($note_id);
3133
3134				# Prepare backlink, multiple backlinks if multiple references
3135				$backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
3136				for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
3137					$backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>&#8617;</a>";
3138				}
3139				# Add backlink to last paragraph; create new paragraph if needed.
3140				if (preg_match('{</p>$}', $footnote)) {
3141					$footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
3142				} else {
3143					$footnote .= "\n\n<p>$backlink</p>";
3144				}
3145
3146				$text .= "<li id=\"fn:$note_id\">\n";
3147				$text .= $footnote . "\n";
3148				$text .= "</li>\n\n";
3149			}
3150
3151			$text .= "</ol>\n";
3152			$text .= "</div>";
3153		}
3154		return $text;
3155	}
3156	function _appendFootnotes_callback($matches) {
3157		$node_id = $this->fn_id_prefix . $matches[1];
3158
3159		# Create footnote marker only if it has a corresponding footnote *and*
3160		# the footnote hasn't been used by another marker.
3161		if (isset($this->footnotes[$node_id])) {
3162			$num =& $this->footnotes_numbers[$node_id];
3163			if (!isset($num)) {
3164				# Transfer footnote content to the ordered list and give it its
3165				# number
3166				$this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
3167				$this->footnotes_ref_count[$node_id] = 1;
3168				$num = $this->footnote_counter++;
3169				$ref_count_mark = '';
3170			} else {
3171				$ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
3172			}
3173
3174			$attr = " rel=\"footnote\"";
3175			if ($this->fn_link_class != "") {
3176				$class = $this->fn_link_class;
3177				$class = $this->encodeAttribute($class);
3178				$attr .= " class=\"$class\"";
3179			}
3180			if ($this->fn_link_title != "") {
3181				$title = $this->fn_link_title;
3182				$title = $this->encodeAttribute($title);
3183				$attr .= " title=\"$title\"";
3184			}
3185
3186			$attr = str_replace("%%", $num, $attr);
3187			$node_id = $this->encodeAttribute($node_id);
3188
3189			return
3190				"<sup id=\"fnref$ref_count_mark:$node_id\">".
3191				"<a href=\"#fn:$node_id\"$attr>$num</a>".
3192				"</sup>";
3193		}
3194
3195		return "[^".$matches[1]."]";
3196	}
3197
3198
3199	### Abbreviations ###
3200
3201	function stripAbbreviations($text) {
3202	#
3203	# Strips abbreviations from text, stores titles in hash references.
3204	#
3205		$less_than_tab = $this->tab_width - 1;
3206
3207		# Link defs are in the form: [id]*: url "optional title"
3208		$text = preg_replace_callback('{
3209			^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:	# abbr_id = $1
3210			(.*)					# text = $2 (no blank lines allowed)
3211			}xm',
3212			array(&$this, '_stripAbbreviations_callback'),
3213			$text);
3214		return $text;
3215	}
3216	function _stripAbbreviations_callback($matches) {
3217		$abbr_word = $matches[1];
3218		$abbr_desc = $matches[2];
3219		if ($this->abbr_word_re)
3220			$this->abbr_word_re .= '|';
3221		$this->abbr_word_re .= preg_quote($abbr_word);
3222		$this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
3223		return ''; # String that will replace the block
3224	}
3225
3226
3227	function doAbbreviations($text) {
3228	#
3229	# Find defined abbreviations in text and wrap them in <abbr> elements.
3230	#
3231		if ($this->abbr_word_re) {
3232			// cannot use the /x modifier because abbr_word_re may
3233			// contain significant spaces:
3234			$text = preg_replace_callback('{'.
3235				'(?<![\w\x1A])'.
3236				'(?:'.$this->abbr_word_re.')'.
3237				'(?![\w\x1A])'.
3238				'}',
3239				array(&$this, '_doAbbreviations_callback'), $text);
3240		}
3241		return $text;
3242	}
3243	function _doAbbreviations_callback($matches) {
3244		$abbr = $matches[0];
3245		if (isset($this->abbr_desciptions[$abbr])) {
3246			$desc = $this->abbr_desciptions[$abbr];
3247			if (empty($desc)) {
3248				return $this->hashPart("<abbr>$abbr</abbr>");
3249			} else {
3250				$desc = $this->encodeAttribute($desc);
3251				return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
3252			}
3253		} else {
3254			return $matches[0];
3255		}
3256	}
3257
3258}
3259
3260
3261/*
3262
3263PHP Markdown Extra
3264==================
3265
3266Description
3267-----------
3268
3269This is a PHP port of the original Markdown formatter written in Perl
3270by John Gruber. This special "Extra" version of PHP Markdown features
3271further enhancements to the syntax for making additional constructs
3272such as tables and definition list.
3273
3274Markdown is a text-to-HTML filter; it translates an easy-to-read /
3275easy-to-write structured text format into HTML. Markdown's text format
3276is mostly similar to that of plain text email, and supports features such
3277as headers, *emphasis*, code blocks, blockquotes, and links.
3278
3279Markdown's syntax is designed not as a generic markup language, but
3280specifically to serve as a front-end to (X)HTML. You can use span-level
3281HTML tags anywhere in a Markdown document, and you can use block level
3282HTML tags (like <div> and <table> as well).
3283
3284For more information about Markdown's syntax, see:
3285
3286<http://daringfireball.net/projects/markdown/>
3287
3288
3289Bugs
3290----
3291
3292To file bug reports please send email to:
3293
3294<michel.fortin@michelf.ca>
3295
3296Please include with your report: (1) the example input; (2) the output you
3297expected; (3) the output Markdown actually produced.
3298
3299
3300Version History
3301---------------
3302
3303See the readme file for detailed release notes for this version.
3304
3305
3306Copyright and License
3307---------------------
3308
3309PHP Markdown & Extra
3310Copyright (c) 2004-2013 Michel Fortin
3311<http://michelf.ca/>
3312All rights reserved.
3313
3314Based on Markdown
3315Copyright (c) 2003-2006 John Gruber
3316<http://daringfireball.net/>
3317All rights reserved.
3318
3319Redistribution and use in source and binary forms, with or without
3320modification, are permitted provided that the following conditions are
3321met:
3322
3323*	Redistributions of source code must retain the above copyright notice,
3324	this list of conditions and the following disclaimer.
3325
3326*	Redistributions in binary form must reproduce the above copyright
3327	notice, this list of conditions and the following disclaimer in the
3328	documentation and/or other materials provided with the distribution.
3329
3330*	Neither the name "Markdown" nor the names of its contributors may
3331	be used to endorse or promote products derived from this software
3332	without specific prior written permission.
3333
3334This software is provided by the copyright holders and contributors "as
3335is" and any express or implied warranties, including, but not limited
3336to, the implied warranties of merchantability and fitness for a
3337particular purpose are disclaimed. In no event shall the copyright owner
3338or contributors be liable for any direct, indirect, incidental, special,
3339exemplary, or consequential damages (including, but not limited to,
3340procurement of substitute goods or services; loss of use, data, or
3341profits; or business interruption) however caused and on any theory of
3342liability, whether in contract, strict liability, or tort (including
3343negligence or otherwise) arising in any way out of the use of this
3344software, even if advised of the possibility of such damage.
3345
3346*/
3347?>
3348