1 <?php
2 /**
3  * @copyright Copyright (c) 2014 Carsten Brandt
4  * @license https://github.com/cebe/markdown/blob/master/LICENSE
5  * @link https://github.com/cebe/markdown#readme
6  */
7 
8 namespace cebe\markdown\block;
9 
10 /**
11  * Adds inline and block HTML support
12  */
13 trait HtmlTrait
14 {
15 	/**
16 	 * @var array HTML elements considered as inline elements.
17 	 * @see http://www.w3.org/wiki/HTML/Elements#Text-level_semantics
18 	 */
19 	protected $inlineHtmlElements = [
20 		'a', 'abbr', 'acronym',
21 		'b', 'basefont', 'bdo', 'big', 'br', 'button', 'blink',
22 		'cite', 'code',
23 		'del', 'dfn',
24 		'em',
25 		'font',
26 		'i', 'img', 'ins', 'input', 'iframe',
27 		'kbd',
28 		'label', 'listing',
29 		'map', 'mark',
30 		'nobr',
31 		'object',
32 		'q',
33 		'rp', 'rt', 'ruby',
34 		's', 'samp', 'script', 'select', 'small', 'spacer', 'span', 'strong', 'sub', 'sup',
35 		'tt', 'var',
36 		'u',
37 		'wbr',
38 		'time',
39 	];
40 	/**
41 	 * @var array HTML elements known to be self-closing.
42 	 */
43 	protected $selfClosingHtmlElements = [
44 		'br', 'hr', 'img', 'input', 'nobr',
45 	];
46 
47 	/**
48 	 * identify a line as the beginning of a HTML block.
49 	 */
50 	protected function identifyHtml($line, $lines, $current)
51 	{
52 		if ($line[0] !== '<' || isset($line[1]) && $line[1] == ' ') {
53 			return false; // no html tag
54 		}
55 
56 		if (strncmp($line, '<!--', 4) === 0) {
57 			return true; // a html comment
58 		}
59 
60 		$gtPos = strpos($lines[$current], '>');
61 		$spacePos = strpos($lines[$current], ' ');
62 		if ($gtPos === false && $spacePos === false) {
63 			return false; // no html tag
64 		} elseif ($spacePos === false) {
65 			$tag = rtrim(substr($line, 1, $gtPos - 1), '/');
66 		} else {
67 			$tag = rtrim(substr($line, 1, min($gtPos, $spacePos) - 1), '/');
68 		}
69 
70 		if (!ctype_alnum($tag) || in_array(strtolower($tag), $this->inlineHtmlElements)) {
71 			return false; // no html tag or inline html tag
72 		}
73 		return true;
74 	}
75 
76 	/**
77 	 * Consume lines for an HTML block
78 	 */
79 	protected function consumeHtml($lines, $current)
80 	{
81 		$content = [];
82 		if (strncmp($lines[$current], '<!--', 4) === 0) { // html comment
83 			for ($i = $current, $count = count($lines); $i < $count; $i++) {
84 				$line = $lines[$i];
85 				$content[] = $line;
86 				if (strpos($line, '-->') !== false) {
87 					break;
88 				}
89 			}
90 		} else {
91 			$tag = rtrim(substr($lines[$current], 1, min(strpos($lines[$current], '>'), strpos($lines[$current] . ' ', ' ')) - 1), '/');
92 			$level = 0;
93 			if (in_array($tag, $this->selfClosingHtmlElements)) {
94 				$level--;
95 			}
96 			for ($i = $current, $count = count($lines); $i < $count; $i++) {
97 				$line = $lines[$i];
98 				$content[] = $line;
99 				$level += substr_count($line, "<$tag") - substr_count($line, "</$tag>") - substr_count($line, "/>");
100 				if ($level <= 0) {
101 					break;
102 				}
103 			}
104 		}
105 		$block = [
106 			'html',
107 			'content' => implode("\n", $content),
108 		];
109 		return [$block, $i];
110 	}
111 
112 	/**
113 	 * Renders an HTML block
114 	 */
115 	protected function renderHtml($block)
116 	{
117 		return $block['content'] . "\n";
118 	}
119 
120 	/**
121 	 * Parses an & or a html entity definition.
122 	 * @marker &
123 	 */
124 	protected function parseEntity($text)
125 	{
126 		// html entities e.g. &copy; &#169; &#x00A9;
127 		if (preg_match('/^&#?[\w\d]+;/', $text, $matches)) {
128 			return [['inlineHtml', $matches[0]], strlen($matches[0])];
129 		} else {
130 			return [['text', '&amp;'], 1];
131 		}
132 	}
133 
134 	/**
135 	 * renders a html entity.
136 	 */
137 	protected function renderInlineHtml($block)
138 	{
139 		return $block[1];
140 	}
141 
142 	/**
143 	 * Parses inline HTML.
144 	 * @marker <
145 	 */
146 	protected function parseInlineHtml($text)
147 	{
148 		if (strpos($text, '>') !== false) {
149 			if (preg_match('~^</?(\w+\d?)( .*?)?>~s', $text, $matches)) {
150 				// HTML tags
151 				return [['inlineHtml', $matches[0]], strlen($matches[0])];
152 			} elseif (preg_match('~^<!--.*?-->~s', $text, $matches)) {
153 				// HTML comments
154 				return [['inlineHtml', $matches[0]], strlen($matches[0])];
155 			}
156 		}
157 		return [['text', '&lt;'], 1];
158 	}
159 
160 	/**
161 	 * Escapes `>` characters.
162 	 * @marker >
163 	 */
164 	protected function parseGt($text)
165 	{
166 		return [['text', '&gt;'], 1];
167 	}
168 }
169