1<?php 2 3namespace cebe\markdown; 4 5use cebe\markdown\block\TableTrait; 6 7// work around https://github.com/facebook/hhvm/issues/1120 8defined('ENT_HTML401') || define('ENT_HTML401', 0); 9 10/** 11 * Markdown parser for the [markdown extra](http://michelf.ca/projects/php-markdown/extra/) flavor. 12 * 13 * @author Carsten Brandt <mail@cebe.cc> 14 * @license https://github.com/cebe/markdown/blob/master/LICENSE 15 * @link https://github.com/cebe/markdown#readme 16 */ 17class MarkdownExtra extends Markdown 18{ 19 // include block element parsing using traits 20 use block\TableTrait; 21 use block\FencedCodeTrait; 22 23 // include inline element parsing using traits 24 // TODO 25 26 /** 27 * @var bool whether special attributes on code blocks should be applied on the `<pre>` element. 28 * The default behavior is to put them on the `<code>` element. 29 */ 30 public $codeAttributesOnPre = false; 31 32 /** 33 * @inheritDoc 34 */ 35 protected $escapeCharacters = [ 36 // from Markdown 37 '\\', // backslash 38 '`', // backtick 39 '*', // asterisk 40 '_', // underscore 41 '{', '}', // curly braces 42 '[', ']', // square brackets 43 '(', ')', // parentheses 44 '#', // hash mark 45 '+', // plus sign 46 '-', // minus sign (hyphen) 47 '.', // dot 48 '!', // exclamation mark 49 '<', '>', 50 // added by MarkdownExtra 51 ':', // colon 52 '|', // pipe 53 ]; 54 55 private $_specialAttributesRegex = '\{(([#\.][A-z0-9-_]+\s*)+)\}'; 56 57 // TODO allow HTML intended 3 spaces 58 59 // TODO add markdown inside HTML blocks 60 61 // TODO implement definition lists 62 63 // TODO implement footnotes 64 65 // TODO implement Abbreviations 66 67 68 // block parsing 69 70 protected function identifyReference($line) 71 { 72 return ($line[0] === ' ' || $line[0] === '[') && preg_match('/^ {0,3}\[[^\[](.*?)\]:\s*([^\s]+?)(?:\s+[\'"](.+?)[\'"])?\s*('.$this->_specialAttributesRegex.')?\s*$/', $line); 73 } 74 75 /** 76 * Consume link references 77 */ 78 protected function consumeReference($lines, $current) 79 { 80 while (isset($lines[$current]) && preg_match('/^ {0,3}\[(.+?)\]:\s*(.+?)(?:\s+[\(\'"](.+?)[\)\'"])?\s*('.$this->_specialAttributesRegex.')?\s*$/', $lines[$current], $matches)) { 81 $label = strtolower($matches[1]); 82 83 $this->references[$label] = [ 84 'url' => $this->replaceEscape($matches[2]), 85 ]; 86 if (isset($matches[3])) { 87 $this->references[$label]['title'] = $matches[3]; 88 } else { 89 // title may be on the next line 90 if (isset($lines[$current + 1]) && preg_match('/^\s+[\(\'"](.+?)[\)\'"]\s*$/', $lines[$current + 1], $matches)) { 91 $this->references[$label]['title'] = $matches[1]; 92 $current++; 93 } 94 } 95 if (isset($matches[5])) { 96 $this->references[$label]['attributes'] = $matches[5]; 97 } 98 $current++; 99 } 100 return [false, --$current]; 101 } 102 103 /** 104 * Consume lines for a fenced code block 105 */ 106 protected function consumeFencedCode($lines, $current) 107 { 108 // consume until ``` 109 $block = [ 110 'code', 111 ]; 112 $line = trim($lines[$current]); 113 if (($pos = strrpos($line, '`')) === false) { 114 $pos = strrpos($line, '~'); 115 } 116 $fence = substr($line, 0, $pos + 1); 117 $block['attributes'] = substr($line, $pos); 118 $content = []; 119 for($i = $current + 1, $count = count($lines); $i < $count; $i++) { 120 if (($pos = strpos($line = $lines[$i], $fence)) === false || $pos > 3) { 121 $content[] = $line; 122 } else { 123 break; 124 } 125 } 126 $block['content'] = implode("\n", $content); 127 return [$block, $i]; 128 } 129 130 protected function renderCode($block) 131 { 132 $attributes = $this->renderAttributes($block); 133 return ($this->codeAttributesOnPre ? "<pre$attributes><code>" : "<pre><code$attributes>") 134 . htmlspecialchars($block['content'] . "\n", ENT_NOQUOTES | ENT_SUBSTITUTE, 'UTF-8') 135 . "</code></pre>\n"; 136 } 137 138 /** 139 * Renders a headline 140 */ 141 protected function renderHeadline($block) 142 { 143 foreach($block['content'] as $i => $element) { 144 if ($element[0] === 'specialAttributes') { 145 unset($block['content'][$i]); 146 $block['attributes'] = $element[1]; 147 } 148 } 149 $tag = 'h' . $block['level']; 150 $attributes = $this->renderAttributes($block); 151 return "<$tag$attributes>" . rtrim($this->renderAbsy($block['content']), "# \t") . "</$tag>\n"; 152 } 153 154 protected function renderAttributes($block) 155 { 156 $html = []; 157 if (isset($block['attributes'])) { 158 $attributes = preg_split('/\s+/', $block['attributes'], -1, PREG_SPLIT_NO_EMPTY); 159 foreach($attributes as $attribute) { 160 if ($attribute[0] === '#') { 161 $html['id'] = substr($attribute, 1); 162 } else { 163 $html['class'][] = substr($attribute, 1); 164 } 165 } 166 } 167 $result = ''; 168 foreach($html as $attr => $value) { 169 if (is_array($value)) { 170 $value = trim(implode(' ', $value)); 171 } 172 if (!empty($value)) { 173 $result .= " $attr=\"$value\""; 174 } 175 } 176 return $result; 177 } 178 179 180 // inline parsing 181 182 183 /** 184 * @marker { 185 */ 186 protected function parseSpecialAttributes($text) 187 { 188 if (preg_match("~$this->_specialAttributesRegex~", $text, $matches)) { 189 return [['specialAttributes', $matches[1]], strlen($matches[0])]; 190 } 191 return [['text', '{'], 1]; 192 } 193 194 protected function renderSpecialAttributes($block) 195 { 196 return '{' . $block[1] . '}'; 197 } 198 199 protected function parseInline($text) 200 { 201 $elements = parent::parseInline($text); 202 // merge special attribute elements to links and images as they are not part of the final absy later 203 $relatedElement = null; 204 foreach($elements as $i => $element) { 205 if ($element[0] === 'link' || $element[0] === 'image') { 206 $relatedElement = $i; 207 } elseif ($element[0] === 'specialAttributes') { 208 if ($relatedElement !== null) { 209 $elements[$relatedElement]['attributes'] = $element[1]; 210 unset($elements[$i]); 211 } 212 $relatedElement = null; 213 } else { 214 $relatedElement = null; 215 } 216 } 217 return $elements; 218 } 219 220 protected function renderLink($block) 221 { 222 if (isset($block['refkey'])) { 223 if (($ref = $this->lookupReference($block['refkey'])) !== false) { 224 $block = array_merge($block, $ref); 225 } else { 226 if (strncmp($block['orig'], '[', 1) === 0) { 227 return '[' . $this->renderAbsy($this->parseInline(substr($block['orig'], 1))); 228 } 229 return $block['orig']; 230 } 231 } 232 $attributes = $this->renderAttributes($block); 233 return '<a href="' . htmlspecialchars($block['url'], ENT_COMPAT | ENT_HTML401, 'UTF-8') . '"' 234 . (empty($block['title']) ? '' : ' title="' . htmlspecialchars($block['title'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"') 235 . $attributes . '>' . $this->renderAbsy($block['text']) . '</a>'; 236 } 237 238 protected function renderImage($block) 239 { 240 if (isset($block['refkey'])) { 241 if (($ref = $this->lookupReference($block['refkey'])) !== false) { 242 $block = array_merge($block, $ref); 243 } else { 244 if (strncmp($block['orig'], '![', 2) === 0) { 245 return '![' . $this->renderAbsy($this->parseInline(substr($block['orig'], 2))); 246 } 247 return $block['orig']; 248 } 249 } 250 $attributes = $this->renderAttributes($block); 251 return '<img src="' . htmlspecialchars($block['url'], ENT_COMPAT | ENT_HTML401, 'UTF-8') . '"' 252 . ' alt="' . htmlspecialchars($block['text'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"' 253 . (empty($block['title']) ? '' : ' title="' . htmlspecialchars($block['title'], ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE, 'UTF-8') . '"') 254 . $attributes . ($this->html5 ? '>' : ' />'); 255 } 256}