1<?php
2/**
3 * @copyright Copyright (c) 2014 Carsten Brandt
4 * @license https://github.com/cebe/markdown/blob/master/LICENSE
5 * @link https://github.com/cebe/markdown#readme
6 */
7
8namespace cebe\markdown;
9use ReflectionMethod;
10
11/**
12 * A generic parser for markdown-like languages.
13 *
14 * @author Carsten Brandt <mail@cebe.cc>
15 */
16abstract class Parser
17{
18	/**
19	 * @var integer the maximum nesting level for language elements.
20	 */
21	public $maximumNestingLevel = 32;
22
23	/**
24	 * @var array the current context the parser is in.
25	 * TODO remove in favor of absy
26	 */
27	protected $context = [];
28	/**
29	 * @var array these are "escapeable" characters. When using one of these prefixed with a
30	 * backslash, the character will be outputted without the backslash and is not interpreted
31	 * as markdown.
32	 */
33	protected $escapeCharacters = [
34		'\\', // backslash
35	];
36
37	private $_depth = 0;
38
39
40	/**
41	 * Parses the given text considering the full language.
42	 *
43	 * This includes parsing block elements as well as inline elements.
44	 *
45	 * @param string $text the text to parse
46	 * @return string parsed markup
47	 */
48	public function parse($text)
49	{
50		$this->prepare();
51
52		if (ltrim($text) === '') {
53			return '';
54		}
55
56		$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
57
58		$this->prepareMarkers($text);
59
60		$absy = $this->parseBlocks(explode("\n", $text));
61		$markup = $this->renderAbsy($absy);
62
63		$this->cleanup();
64		return $markup;
65	}
66
67	/**
68	 * Parses a paragraph without block elements (block elements are ignored).
69	 *
70	 * @param string $text the text to parse
71	 * @return string parsed markup
72	 */
73	public function parseParagraph($text)
74	{
75		$this->prepare();
76
77		if (ltrim($text) === '') {
78			return '';
79		}
80
81		$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);
82
83		$this->prepareMarkers($text);
84
85		$absy = $this->parseInline($text);
86		$markup = $this->renderAbsy($absy);
87
88		$this->cleanup();
89		return $markup;
90	}
91
92	/**
93	 * This method will be called before `parse()` and `parseParagraph()`.
94	 * You can override it to do some initialization work.
95	 */
96	protected function prepare()
97	{
98	}
99
100	/**
101	 * This method will be called after `parse()` and `parseParagraph()`.
102	 * You can override it to do cleanup.
103	 */
104	protected function cleanup()
105	{
106	}
107
108
109	// block parsing
110
111	private $_blockTypes;
112
113	/**
114	 * @return array a list of block element types available.
115	 */
116	protected function blockTypes()
117	{
118		if ($this->_blockTypes === null) {
119			// detect block types via "identify" functions
120			$reflection = new \ReflectionClass($this);
121			$this->_blockTypes = array_filter(array_map(function($method) {
122				$name = $method->getName();
123				return strncmp($name, 'identify', 8) === 0 ? strtolower(substr($name, 8)) : false;
124			}, $reflection->getMethods(ReflectionMethod::IS_PROTECTED)));
125
126			sort($this->_blockTypes);
127		}
128		return $this->_blockTypes;
129	}
130
131	/**
132	 * Given a set of lines and an index of a current line it uses the registed block types to
133	 * detect the type of this line.
134	 * @param array $lines
135	 * @param integer $current
136	 * @return string name of the block type in lower case
137	 */
138	protected function detectLineType($lines, $current)
139	{
140		$line = $lines[$current];
141		$blockTypes = $this->blockTypes();
142		foreach($blockTypes as $blockType) {
143			if ($this->{'identify' . $blockType}($line, $lines, $current)) {
144				return $blockType;
145			}
146		}
147		// consider the line a normal paragraph if no other block type matches
148		return 'paragraph';
149	}
150
151	/**
152	 * Parse block elements by calling `detectLineType()` to identify them
153	 * and call consume function afterwards.
154	 */
155	protected function parseBlocks($lines)
156	{
157		if ($this->_depth >= $this->maximumNestingLevel) {
158			// maximum depth is reached, do not parse input
159			return [['text', implode("\n", $lines)]];
160		}
161		$this->_depth++;
162
163		$blocks = [];
164
165		// convert lines to blocks
166		for ($i = 0, $count = count($lines); $i < $count; $i++) {
167			$line = $lines[$i];
168			if ($line !== '' && rtrim($line) !== '') { // skip empty lines
169				// identify a blocks beginning and parse the content
170				list($block, $i) = $this->parseBlock($lines, $i);
171				if ($block !== false) {
172					$blocks[] = $block;
173				}
174			}
175		}
176
177		$this->_depth--;
178
179		return $blocks;
180	}
181
182	/**
183	 * Parses the block at current line by identifying the block type and parsing the content
184	 * @param $lines
185	 * @param $current
186	 * @return array Array of two elements, the first element contains the block,
187	 * the second contains the next line index to be parsed.
188	 */
189	protected function parseBlock($lines, $current)
190	{
191		// identify block type for this line
192		$blockType = $this->detectLineType($lines, $current);
193
194		// call consume method for the detected block type to consume further lines
195		return $this->{'consume' . $blockType}($lines, $current);
196	}
197
198	protected function renderAbsy($blocks)
199	{
200		$output = '';
201		foreach ($blocks as $block) {
202			array_unshift($this->context, $block[0]);
203			$output .= $this->{'render' . $block[0]}($block);
204			array_shift($this->context);
205		}
206		return $output;
207	}
208
209	/**
210	 * Consume lines for a paragraph
211	 *
212	 * @param $lines
213	 * @param $current
214	 * @return array
215	 */
216	protected function consumeParagraph($lines, $current)
217	{
218		// consume until newline
219		$content = [];
220		for ($i = $current, $count = count($lines); $i < $count; $i++) {
221			if (ltrim($lines[$i]) !== '') {
222				$content[] = $lines[$i];
223			} else {
224				break;
225			}
226		}
227		$block = [
228			'paragraph',
229			'content' => $this->parseInline(implode("\n", $content)),
230		];
231		return [$block, --$i];
232	}
233
234	/**
235	 * Render a paragraph block
236	 *
237	 * @param $block
238	 * @return string
239	 */
240	protected function renderParagraph($block)
241	{
242		return '<p>' . $this->renderAbsy($block['content']) . "</p>\n";
243	}
244
245
246	// inline parsing
247
248
249	/**
250	 * @var array the set of inline markers to use in different contexts.
251	 */
252	private $_inlineMarkers = [];
253
254	/**
255	 * Returns a map of inline markers to the corresponding parser methods.
256	 *
257	 * This array defines handler methods for inline markdown markers.
258	 * When a marker is found in the text, the handler method is called with the text
259	 * starting at the position of the marker.
260	 *
261	 * Note that markers starting with whitespace may slow down the parser,
262	 * you may want to use [[renderText]] to deal with them.
263	 *
264	 * You may override this method to define a set of markers and parsing methods.
265	 * The default implementation looks for protected methods starting with `parse` that
266	 * also have an `@marker` annotation in PHPDoc.
267	 *
268	 * @return array a map of markers to parser methods
269	 */
270	protected function inlineMarkers()
271	{
272		$markers = [];
273		// detect "parse" functions
274		$reflection = new \ReflectionClass($this);
275		foreach($reflection->getMethods(ReflectionMethod::IS_PROTECTED) as $method) {
276			$methodName = $method->getName();
277			if (strncmp($methodName, 'parse', 5) === 0) {
278				preg_match_all('/@marker ([^\s]+)/', $method->getDocComment(), $matches);
279				foreach($matches[1] as $match) {
280					$markers[$match] = $methodName;
281				}
282			}
283		}
284		return $markers;
285	}
286
287	/**
288	 * Prepare markers that are used in the text to parse
289	 *
290	 * Add all markers that are present in markdown.
291	 * Check is done to avoid iterations in parseInline(), good for huge markdown files
292	 * @param string $text
293	 */
294	protected function prepareMarkers($text)
295	{
296		$this->_inlineMarkers = [];
297		foreach ($this->inlineMarkers() as $marker => $method) {
298			if (strpos($text, $marker) !== false) {
299				$m = $marker[0];
300				// put the longest marker first
301				if (isset($this->_inlineMarkers[$m])) {
302					reset($this->_inlineMarkers[$m]);
303					if (strlen($marker) > strlen(key($this->_inlineMarkers[$m]))) {
304						$this->_inlineMarkers[$m] = array_merge([$marker => $method], $this->_inlineMarkers[$m]);
305						continue;
306					}
307				}
308				$this->_inlineMarkers[$m][$marker] = $method;
309			}
310		}
311	}
312
313	/**
314	 * Parses inline elements of the language.
315	 *
316	 * @param string $text the inline text to parse.
317	 * @return array
318	 */
319	protected function parseInline($text)
320	{
321		if ($this->_depth >= $this->maximumNestingLevel) {
322			// maximum depth is reached, do not parse input
323			return [['text', $text]];
324		}
325		$this->_depth++;
326
327		$markers = implode('', array_keys($this->_inlineMarkers));
328
329		$paragraph = [];
330
331		while (!empty($markers) && ($found = strpbrk($text, $markers)) !== false) {
332
333			$pos = strpos($text, $found);
334
335			// add the text up to next marker to the paragraph
336			if ($pos !== 0) {
337				$paragraph[] = ['text', substr($text, 0, $pos)];
338			}
339			$text = $found;
340
341			$parsed = false;
342			foreach ($this->_inlineMarkers[$text[0]] as $marker => $method) {
343				if (strncmp($text, $marker, strlen($marker)) === 0) {
344					// parse the marker
345					array_unshift($this->context, $method);
346					list($output, $offset) = $this->$method($text);
347					array_shift($this->context);
348
349					$paragraph[] = $output;
350					$text = substr($text, $offset);
351					$parsed = true;
352					break;
353				}
354			}
355			if (!$parsed) {
356				$paragraph[] = ['text', substr($text, 0, 1)];
357				$text = substr($text, 1);
358			}
359		}
360
361		$paragraph[] = ['text', $text];
362
363		$this->_depth--;
364
365		return $paragraph;
366	}
367
368	/**
369	 * Parses escaped special characters.
370	 * @marker \
371	 */
372	protected function parseEscape($text)
373	{
374		if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) {
375			return [['text', $text[1]], 2];
376		}
377		return [['text', $text[0]], 1];
378	}
379
380	/**
381	 * This function renders plain text sections in the markdown text.
382	 * It can be used to work on normal text sections for example to highlight keywords or
383	 * do special escaping.
384	 */
385	protected function renderText($block)
386	{
387		return $block[1];
388	}
389}
390