1<?php
2/**
3 * @copyright Copyright (c) 2014 Carsten Brandt
4 * @license https://github.com/cebe/markdown/blob/master/LICENSE
5 * @link https://github.com/cebe/markdown#readme
6 */
7
8namespace cebe\markdown\inline;
9
10// work around https://github.com/facebook/hhvm/issues/1120
11defined('ENT_HTML401') || define('ENT_HTML401', 0);
12
13/**
14 * Addes links and images as well as url markers.
15 *
16 * This trait conflicts with the HtmlTrait. If both are used together,
17 * you have to define a resolution, by defining the HtmlTrait::parseInlineHtml
18 * as private so it is not used directly:
19 *
20 * ```php
21 * use block\HtmlTrait {
22 *     parseInlineHtml as private parseInlineHtml;
23 * }
24 * ```
25 *
26 * If the method exists it is called internally by this trait.
27 *
28 * Also make sure to reset references on prepare():
29 *
30 * ```php
31 * protected function prepare()
32 * {
33 *     // reset references
34 *     $this->references = [];
35 * }
36 * ```
37 */
38trait LinkTrait
39{
40	/**
41	 * @var array a list of defined references in this document.
42	 */
43	protected $references = [];
44
45	/**
46	 * Remove backslash from escaped characters
47	 * @param $text
48	 * @return string
49	 */
50	protected function replaceEscape($text)
51	{
52		$strtr = [];
53		foreach($this->escapeCharacters as $char) {
54			$strtr["\\$char"] = $char;
55		}
56		return strtr($text, $strtr);
57	}
58
59	/**
60	 * Parses a link indicated by `[`.
61	 * @marker [
62	 */
63	protected function parseLink($markdown)
64	{
65		if (!in_array('parseLink', array_slice($this->context, 1)) && ($parts = $this->parseLinkOrImage($markdown)) !== false) {
66			list($text, $url, $title, $offset, $key) = $parts;
67			return [
68				[
69					'link',
70					'text' => $this->parseInline($text),
71					'url' => $url,
72					'title' => $title,
73					'refkey' => $key,
74					'orig' => substr($markdown, 0, $offset),
75				],
76				$offset
77			];
78		} else {
79			// remove all starting [ markers to avoid next one to be parsed as link
80			$result = '[';
81			$i = 1;
82			while (isset($markdown[$i]) && $markdown[$i] === '[') {
83				$result .= '[';
84				$i++;
85			}
86			return [['text', $result], $i];
87		}
88	}
89
90	/**
91	 * Parses an image indicated by `![`.
92	 * @marker ![
93	 */
94	protected function parseImage($markdown)
95	{
96		if (($parts = $this->parseLinkOrImage(substr($markdown, 1))) !== false) {
97			list($text, $url, $title, $offset, $key) = $parts;
98
99			return [
100				[
101					'image',
102					'text' => $text,
103					'url' => $url,
104					'title' => $title,
105					'refkey' => $key,
106					'orig' => substr($markdown, 0, $offset + 1),
107				],
108				$offset + 1
109			];
110		} else {
111			// remove all starting [ markers to avoid next one to be parsed as link
112			$result = '!';
113			$i = 1;
114			while (isset($markdown[$i]) && $markdown[$i] === '[') {
115				$result .= '[';
116				$i++;
117			}
118			return [['text', $result], $i];
119		}
120	}
121
122	protected function parseLinkOrImage($markdown)
123	{
124		if (strpos($markdown, ']') !== false && preg_match('/\[((?>[^\]\[]+|(?R))*)\]/', $markdown, $textMatches)) { // TODO improve bracket regex
125			$text = $textMatches[1];
126			$offset = strlen($textMatches[0]);
127			$markdown = substr($markdown, $offset);
128
129			$pattern = <<<REGEXP
130				/(?(R) # in case of recursion match parentheses
131					 \(((?>[^\s()]+)|(?R))*\)
132				|      # else match a link with title
133					^\(\s*(((?>[^\s()]+)|(?R))*)(\s+"(.*?)")?\s*\)
134				)/x
135REGEXP;
136			if (preg_match($pattern, $markdown, $refMatches)) {
137				// inline link
138				return [
139					$text,
140					isset($refMatches[2]) ? $this->replaceEscape($refMatches[2]) : '', // url
141					empty($refMatches[5]) ? null: $refMatches[5], // title
142					$offset + strlen($refMatches[0]), // offset
143					null, // reference key
144				];
145			} elseif (preg_match('/^([ \n]?\[(.*?)\])?/s', $markdown, $refMatches)) {
146				// reference style link
147				if (empty($refMatches[2])) {
148					$key = strtolower($text);
149				} else {
150					$key = strtolower($refMatches[2]);
151				}
152				return [
153					$text,
154					null, // url
155					null, // title
156					$offset + strlen($refMatches[0]), // offset
157					$key,
158				];
159			}
160		}
161		return false;
162	}
163
164	/**
165	 * Parses inline HTML.
166	 * @marker <
167	 */
168	protected function parseLt($text)
169	{
170		if (strpos($text, '>') !== false) {
171			if (!in_array('parseLink', $this->context)) { // do not allow links in links
172				if (preg_match('/^<([^\s>]*?@[^\s]*?\.\w+?)>/', $text, $matches)) {
173					// email address
174					return [
175						['email', $this->replaceEscape($matches[1])],
176						strlen($matches[0])
177					];
178				} elseif (preg_match('/^<([a-z]{3,}:\/\/[^\s]+?)>/', $text, $matches)) {
179					// URL
180					return [
181						['url', $this->replaceEscape($matches[1])],
182						strlen($matches[0])
183					];
184				}
185			}
186			// try inline HTML if it was neither a URL nor email if HtmlTrait is included.
187			if (method_exists($this, 'parseInlineHtml')) {
188				return $this->parseInlineHtml($text);
189			}
190		}
191		return [['text', '&lt;'], 1];
192	}
193
194	protected function renderEmail($block)
195	{
196		$email = htmlspecialchars($block[1], ENT_NOQUOTES | ENT_SUBSTITUTE, 'UTF-8');
197		return "<a href=\"mailto:$email\">$email</a>";
198	}
199
200	protected function renderUrl($block)
201	{
202		$url = htmlspecialchars($block[1], ENT_COMPAT | ENT_HTML401, 'UTF-8');
203		$decodedUrl = urldecode($block[1]);
204		$secureUrlText = preg_match('//u', $decodedUrl) ? $decodedUrl : $block[1];
205		$text = htmlspecialchars($secureUrlText, ENT_NOQUOTES | ENT_SUBSTITUTE, 'UTF-8');
206		return "<a href=\"$url\">$text</a>";
207	}
208
209	protected function lookupReference($key)
210	{
211		$normalizedKey = preg_replace('/\s+/', ' ', $key);
212		if (isset($this->references[$key]) || isset($this->references[$key = $normalizedKey])) {
213			return $this->references[$key];
214		}
215		return false;
216	}
217
218	protected function renderLink($block)
219	{
220		if (isset($block['refkey'])) {
221			if (($ref = $this->lookupReference($block['refkey'])) !== false) {
222				$block = array_merge($block, $ref);
223			} else {
224				if (strncmp($block['orig'], '[', 1) === 0) {
225					return '[' . $this->renderAbsy($this->parseInline(substr($block['orig'], 1)));
226				}
227				return $block['orig'];
228			}
229		}
230		return '<a href="' . htmlspecialchars($block['url'], ENT_COMPAT | ENT_HTML401, 'UTF-8') . '"'
231			. (empty($block['title']) ? '' : ' title="' . htmlspecialchars($block['title'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"')
232			. '>' . $this->renderAbsy($block['text']) . '</a>';
233	}
234
235	protected function renderImage($block)
236	{
237		if (isset($block['refkey'])) {
238			if (($ref = $this->lookupReference($block['refkey'])) !== false) {
239				$block = array_merge($block, $ref);
240			} else {
241				if (strncmp($block['orig'], '![', 2) === 0) {
242					return '![' . $this->renderAbsy($this->parseInline(substr($block['orig'], 2)));
243				}
244				return $block['orig'];
245			}
246		}
247		return '<img src="' . htmlspecialchars($block['url'], ENT_COMPAT | ENT_HTML401, 'UTF-8') . '"'
248			. ' alt="' . htmlspecialchars($block['text'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"'
249			. (empty($block['title']) ? '' : ' title="' . htmlspecialchars($block['title'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"')
250			. ($this->html5 ? '>' : ' />');
251	}
252
253	// references
254
255	protected function identifyReference($line)
256	{
257		return isset($line[0]) && ($line[0] === ' ' || $line[0] === '[') && preg_match('/^ {0,3}\[[^\[](.*?)\]:\s*([^\s]+?)(?:\s+[\'"](.+?)[\'"])?\s*$/', $line);
258	}
259
260	/**
261	 * Consume link references
262	 */
263	protected function consumeReference($lines, $current)
264	{
265		while (isset($lines[$current]) && preg_match('/^ {0,3}\[(.+?)\]:\s*(.+?)(?:\s+[\(\'"](.+?)[\)\'"])?\s*$/', $lines[$current], $matches)) {
266			$label = strtolower($matches[1]);
267
268			$this->references[$label] = [
269				'url' => $this->replaceEscape($matches[2]),
270			];
271			if (isset($matches[3])) {
272				$this->references[$label]['title'] = $matches[3];
273			} else {
274				// title may be on the next line
275				if (isset($lines[$current + 1]) && preg_match('/^\s+[\(\'"](.+?)[\)\'"]\s*$/', $lines[$current + 1], $matches)) {
276					$this->references[$label]['title'] = $matches[1];
277					$current++;
278				}
279			}
280			$current++;
281		}
282		return [false, --$current];
283	}
284
285	abstract protected function parseInline($text);
286	abstract protected function renderAbsy($blocks);
287}
288