1<?php
2
3namespace cebe\markdown;
4
5use cebe\markdown\block\TableTrait;
6
7// work around https://github.com/facebook/hhvm/issues/1120
8defined('ENT_HTML401') || define('ENT_HTML401', 0);
9
10/**
11 * Markdown parser for the [markdown extra](http://michelf.ca/projects/php-markdown/extra/) flavor.
12 *
13 * @author Carsten Brandt <mail@cebe.cc>
14 * @license https://github.com/cebe/markdown/blob/master/LICENSE
15 * @link https://github.com/cebe/markdown#readme
16 */
17class MarkdownExtra extends Markdown
18{
19	// include block element parsing using traits
20	use block\TableTrait;
21	use block\FencedCodeTrait;
22
23	// include inline element parsing using traits
24	// TODO
25
26	/**
27	 * @var bool whether special attributes on code blocks should be applied on the `<pre>` element.
28	 * The default behavior is to put them on the `<code>` element.
29	 */
30	public $codeAttributesOnPre = false;
31
32	/**
33	 * @inheritDoc
34	 */
35	protected $escapeCharacters = [
36		// from Markdown
37		'\\', // backslash
38		'`', // backtick
39		'*', // asterisk
40		'_', // underscore
41		'{', '}', // curly braces
42		'[', ']', // square brackets
43		'(', ')', // parentheses
44		'#', // hash mark
45		'+', // plus sign
46		'-', // minus sign (hyphen)
47		'.', // dot
48		'!', // exclamation mark
49		'<', '>',
50		// added by MarkdownExtra
51		':', // colon
52		'|', // pipe
53	];
54
55	private $_specialAttributesRegex = '\{(([#\.][A-z0-9-_]+\s*)+)\}';
56
57	// TODO allow HTML intended 3 spaces
58
59	// TODO add markdown inside HTML blocks
60
61	// TODO implement definition lists
62
63	// TODO implement footnotes
64
65	// TODO implement Abbreviations
66
67
68	// block parsing
69
70	protected function identifyReference($line)
71	{
72		return ($line[0] === ' ' || $line[0] === '[') && preg_match('/^ {0,3}\[[^\[](.*?)\]:\s*([^\s]+?)(?:\s+[\'"](.+?)[\'"])?\s*('.$this->_specialAttributesRegex.')?\s*$/', $line);
73	}
74
75	/**
76	 * Consume link references
77	 */
78	protected function consumeReference($lines, $current)
79	{
80		while (isset($lines[$current]) && preg_match('/^ {0,3}\[(.+?)\]:\s*(.+?)(?:\s+[\(\'"](.+?)[\)\'"])?\s*('.$this->_specialAttributesRegex.')?\s*$/', $lines[$current], $matches)) {
81			$label = strtolower($matches[1]);
82
83			$this->references[$label] = [
84				'url' => $this->replaceEscape($matches[2]),
85			];
86			if (isset($matches[3])) {
87				$this->references[$label]['title'] = $matches[3];
88			} else {
89				// title may be on the next line
90				if (isset($lines[$current + 1]) && preg_match('/^\s+[\(\'"](.+?)[\)\'"]\s*$/', $lines[$current + 1], $matches)) {
91					$this->references[$label]['title'] = $matches[1];
92					$current++;
93				}
94			}
95			if (isset($matches[5])) {
96				$this->references[$label]['attributes'] = $matches[5];
97			}
98			$current++;
99		}
100		return [false, --$current];
101	}
102
103	/**
104	 * Consume lines for a fenced code block
105	 */
106	protected function consumeFencedCode($lines, $current)
107	{
108		// consume until ```
109		$block = [
110			'code',
111		];
112		$line = trim($lines[$current]);
113		if (($pos = strrpos($line, '`')) === false) {
114			$pos = strrpos($line, '~');
115		}
116		$fence = substr($line, 0, $pos + 1);
117		$block['attributes'] = substr($line, $pos);
118		$content = [];
119		for($i = $current + 1, $count = count($lines); $i < $count; $i++) {
120			if (($pos = strpos($line = $lines[$i], $fence)) === false || $pos > 3) {
121				$content[] = $line;
122			} else {
123				break;
124			}
125		}
126		$block['content'] = implode("\n", $content);
127		return [$block, $i];
128	}
129
130	protected function renderCode($block)
131	{
132		$attributes = $this->renderAttributes($block);
133		return ($this->codeAttributesOnPre ? "<pre$attributes><code>" : "<pre><code$attributes>")
134			. htmlspecialchars($block['content'] . "\n", ENT_NOQUOTES | ENT_SUBSTITUTE, 'UTF-8')
135			. "</code></pre>\n";
136	}
137
138	/**
139	 * Renders a headline
140	 */
141	protected function renderHeadline($block)
142	{
143		foreach($block['content'] as $i => $element) {
144			if ($element[0] === 'specialAttributes') {
145				unset($block['content'][$i]);
146				$block['attributes'] = $element[1];
147			}
148		}
149		$tag = 'h' . $block['level'];
150		$attributes = $this->renderAttributes($block);
151		return "<$tag$attributes>" . rtrim($this->renderAbsy($block['content']), "# \t") . "</$tag>\n";
152	}
153
154	protected function renderAttributes($block)
155	{
156		$html = [];
157		if (isset($block['attributes'])) {
158			$attributes = preg_split('/\s+/', $block['attributes'], -1, PREG_SPLIT_NO_EMPTY);
159			foreach($attributes as $attribute) {
160				if ($attribute[0] === '#') {
161					$html['id'] = substr($attribute, 1);
162				} else {
163					$html['class'][] = substr($attribute, 1);
164				}
165			}
166		}
167		$result = '';
168		foreach($html as $attr => $value) {
169			if (is_array($value)) {
170				$value = trim(implode(' ', $value));
171			}
172			if (!empty($value)) {
173				$result .= " $attr=\"$value\"";
174			}
175		}
176		return $result;
177	}
178
179
180	// inline parsing
181
182
183	/**
184	 * @marker {
185	 */
186	protected function parseSpecialAttributes($text)
187	{
188		if (preg_match("~$this->_specialAttributesRegex~", $text, $matches)) {
189			return [['specialAttributes', $matches[1]], strlen($matches[0])];
190		}
191		return [['text', '{'], 1];
192	}
193
194	protected function renderSpecialAttributes($block)
195	{
196		return '{' . $block[1] . '}';
197	}
198
199	protected function parseInline($text)
200	{
201		$elements = parent::parseInline($text);
202		// merge special attribute elements to links and images as they are not part of the final absy later
203		$relatedElement = null;
204		foreach($elements as $i => $element) {
205			if ($element[0] === 'link' || $element[0] === 'image') {
206				$relatedElement = $i;
207			} elseif ($element[0] === 'specialAttributes') {
208				if ($relatedElement !== null) {
209					$elements[$relatedElement]['attributes'] = $element[1];
210					unset($elements[$i]);
211				}
212				$relatedElement = null;
213			} else {
214				$relatedElement = null;
215			}
216		}
217		return $elements;
218	}
219
220	protected function renderLink($block)
221	{
222		if (isset($block['refkey'])) {
223			if (($ref = $this->lookupReference($block['refkey'])) !== false) {
224				$block = array_merge($block, $ref);
225			} else {
226				if (strncmp($block['orig'], '[', 1) === 0) {
227					return '[' . $this->renderAbsy($this->parseInline(substr($block['orig'], 1)));
228				}
229				return $block['orig'];
230			}
231		}
232		$attributes = $this->renderAttributes($block);
233		return '<a href="' . htmlspecialchars($block['url'], ENT_COMPAT | ENT_HTML401, 'UTF-8') . '"'
234			. (empty($block['title']) ? '' : ' title="' . htmlspecialchars($block['title'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"')
235			. $attributes . '>' . $this->renderAbsy($block['text']) . '</a>';
236	}
237
238	protected function renderImage($block)
239	{
240		if (isset($block['refkey'])) {
241			if (($ref = $this->lookupReference($block['refkey'])) !== false) {
242				$block = array_merge($block, $ref);
243			} else {
244				if (strncmp($block['orig'], '![', 2) === 0) {
245					return '![' . $this->renderAbsy($this->parseInline(substr($block['orig'], 2)));
246				}
247				return $block['orig'];
248			}
249		}
250		$attributes = $this->renderAttributes($block);
251		return '<img src="' . htmlspecialchars($block['url'], ENT_COMPAT | ENT_HTML401, 'UTF-8') . '"'
252			. ' alt="' . htmlspecialchars($block['text'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"'
253			. (empty($block['title']) ? '' : ' title="' . htmlspecialchars($block['title'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"')
254			. $attributes . ($this->html5 ? '>' : ' />');
255	}
256}