1<?php 2/** 3 * @copyright Copyright (c) 2014 Carsten Brandt 4 * @license https://github.com/cebe/markdown/blob/master/LICENSE 5 * @link https://github.com/cebe/markdown#readme 6 */ 7 8namespace cebe\markdown; 9use ReflectionMethod; 10 11/** 12 * A generic parser for markdown-like languages. 13 * 14 * @author Carsten Brandt <mail@cebe.cc> 15 */ 16abstract class Parser 17{ 18 /** 19 * @var integer the maximum nesting level for language elements. 20 */ 21 public $maximumNestingLevel = 32; 22 23 /** 24 * @var array the current context the parser is in. 25 * TODO remove in favor of absy 26 */ 27 protected $context = []; 28 /** 29 * @var array these are "escapeable" characters. When using one of these prefixed with a 30 * backslash, the character will be outputted without the backslash and is not interpreted 31 * as markdown. 32 */ 33 protected $escapeCharacters = [ 34 '\\', // backslash 35 ]; 36 37 private $_depth = 0; 38 39 40 /** 41 * Parses the given text considering the full language. 42 * 43 * This includes parsing block elements as well as inline elements. 44 * 45 * @param string $text the text to parse 46 * @return string parsed markup 47 */ 48 public function parse($text) 49 { 50 $this->prepare(); 51 52 if (ltrim($text) === '') { 53 return ''; 54 } 55 56 $text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text); 57 58 $this->prepareMarkers($text); 59 60 $absy = $this->parseBlocks(explode("\n", $text)); 61 $markup = $this->renderAbsy($absy); 62 63 $this->cleanup(); 64 return $markup; 65 } 66 67 /** 68 * Parses a paragraph without block elements (block elements are ignored). 69 * 70 * @param string $text the text to parse 71 * @return string parsed markup 72 */ 73 public function parseParagraph($text) 74 { 75 $this->prepare(); 76 77 if (ltrim($text) === '') { 78 return ''; 79 } 80 81 $text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text); 82 83 $this->prepareMarkers($text); 84 85 $absy = $this->parseInline($text); 86 $markup = $this->renderAbsy($absy); 87 88 $this->cleanup(); 89 return $markup; 90 } 91 92 /** 93 * This method will be called before `parse()` and `parseParagraph()`. 94 * You can override it to do some initialization work. 95 */ 96 protected function prepare() 97 { 98 } 99 100 /** 101 * This method will be called after `parse()` and `parseParagraph()`. 102 * You can override it to do cleanup. 103 */ 104 protected function cleanup() 105 { 106 } 107 108 109 // block parsing 110 111 private $_blockTypes; 112 113 /** 114 * @return array a list of block element types available. 115 */ 116 protected function blockTypes() 117 { 118 if ($this->_blockTypes === null) { 119 // detect block types via "identify" functions 120 $reflection = new \ReflectionClass($this); 121 $this->_blockTypes = array_filter(array_map(function($method) { 122 $name = $method->getName(); 123 return strncmp($name, 'identify', 8) === 0 ? strtolower(substr($name, 8)) : false; 124 }, $reflection->getMethods(ReflectionMethod::IS_PROTECTED))); 125 126 sort($this->_blockTypes); 127 } 128 return $this->_blockTypes; 129 } 130 131 /** 132 * Given a set of lines and an index of a current line it uses the registed block types to 133 * detect the type of this line. 134 * @param array $lines 135 * @param integer $current 136 * @return string name of the block type in lower case 137 */ 138 protected function detectLineType($lines, $current) 139 { 140 $line = $lines[$current]; 141 $blockTypes = $this->blockTypes(); 142 foreach($blockTypes as $blockType) { 143 if ($this->{'identify' . $blockType}($line, $lines, $current)) { 144 return $blockType; 145 } 146 } 147 // consider the line a normal paragraph if no other block type matches 148 return 'paragraph'; 149 } 150 151 /** 152 * Parse block elements by calling `detectLineType()` to identify them 153 * and call consume function afterwards. 154 */ 155 protected function parseBlocks($lines) 156 { 157 if ($this->_depth >= $this->maximumNestingLevel) { 158 // maximum depth is reached, do not parse input 159 return [['text', implode("\n", $lines)]]; 160 } 161 $this->_depth++; 162 163 $blocks = []; 164 165 // convert lines to blocks 166 for ($i = 0, $count = count($lines); $i < $count; $i++) { 167 $line = $lines[$i]; 168 if ($line !== '' && rtrim($line) !== '') { // skip empty lines 169 // identify a blocks beginning and parse the content 170 list($block, $i) = $this->parseBlock($lines, $i); 171 if ($block !== false) { 172 $blocks[] = $block; 173 } 174 } 175 } 176 177 $this->_depth--; 178 179 return $blocks; 180 } 181 182 /** 183 * Parses the block at current line by identifying the block type and parsing the content 184 * @param $lines 185 * @param $current 186 * @return array Array of two elements, the first element contains the block, 187 * the second contains the next line index to be parsed. 188 */ 189 protected function parseBlock($lines, $current) 190 { 191 // identify block type for this line 192 $blockType = $this->detectLineType($lines, $current); 193 194 // call consume method for the detected block type to consume further lines 195 return $this->{'consume' . $blockType}($lines, $current); 196 } 197 198 protected function renderAbsy($blocks) 199 { 200 $output = ''; 201 foreach ($blocks as $block) { 202 array_unshift($this->context, $block[0]); 203 $output .= $this->{'render' . $block[0]}($block); 204 array_shift($this->context); 205 } 206 return $output; 207 } 208 209 /** 210 * Consume lines for a paragraph 211 * 212 * @param $lines 213 * @param $current 214 * @return array 215 */ 216 protected function consumeParagraph($lines, $current) 217 { 218 // consume until newline 219 $content = []; 220 for ($i = $current, $count = count($lines); $i < $count; $i++) { 221 if (ltrim($lines[$i]) !== '') { 222 $content[] = $lines[$i]; 223 } else { 224 break; 225 } 226 } 227 $block = [ 228 'paragraph', 229 'content' => $this->parseInline(implode("\n", $content)), 230 ]; 231 return [$block, --$i]; 232 } 233 234 /** 235 * Render a paragraph block 236 * 237 * @param $block 238 * @return string 239 */ 240 protected function renderParagraph($block) 241 { 242 return '<p>' . $this->renderAbsy($block['content']) . "</p>\n"; 243 } 244 245 246 // inline parsing 247 248 249 /** 250 * @var array the set of inline markers to use in different contexts. 251 */ 252 private $_inlineMarkers = []; 253 254 /** 255 * Returns a map of inline markers to the corresponding parser methods. 256 * 257 * This array defines handler methods for inline markdown markers. 258 * When a marker is found in the text, the handler method is called with the text 259 * starting at the position of the marker. 260 * 261 * Note that markers starting with whitespace may slow down the parser, 262 * you may want to use [[renderText]] to deal with them. 263 * 264 * You may override this method to define a set of markers and parsing methods. 265 * The default implementation looks for protected methods starting with `parse` that 266 * also have an `@marker` annotation in PHPDoc. 267 * 268 * @return array a map of markers to parser methods 269 */ 270 protected function inlineMarkers() 271 { 272 $markers = []; 273 // detect "parse" functions 274 $reflection = new \ReflectionClass($this); 275 foreach($reflection->getMethods(ReflectionMethod::IS_PROTECTED) as $method) { 276 $methodName = $method->getName(); 277 if (strncmp($methodName, 'parse', 5) === 0) { 278 preg_match_all('/@marker ([^\s]+)/', $method->getDocComment(), $matches); 279 foreach($matches[1] as $match) { 280 $markers[$match] = $methodName; 281 } 282 } 283 } 284 return $markers; 285 } 286 287 /** 288 * Prepare markers that are used in the text to parse 289 * 290 * Add all markers that are present in markdown. 291 * Check is done to avoid iterations in parseInline(), good for huge markdown files 292 * @param string $text 293 */ 294 protected function prepareMarkers($text) 295 { 296 $this->_inlineMarkers = []; 297 foreach ($this->inlineMarkers() as $marker => $method) { 298 if (strpos($text, $marker) !== false) { 299 $m = $marker[0]; 300 // put the longest marker first 301 if (isset($this->_inlineMarkers[$m])) { 302 reset($this->_inlineMarkers[$m]); 303 if (strlen($marker) > strlen(key($this->_inlineMarkers[$m]))) { 304 $this->_inlineMarkers[$m] = array_merge([$marker => $method], $this->_inlineMarkers[$m]); 305 continue; 306 } 307 } 308 $this->_inlineMarkers[$m][$marker] = $method; 309 } 310 } 311 } 312 313 /** 314 * Parses inline elements of the language. 315 * 316 * @param string $text the inline text to parse. 317 * @return array 318 */ 319 protected function parseInline($text) 320 { 321 if ($this->_depth >= $this->maximumNestingLevel) { 322 // maximum depth is reached, do not parse input 323 return [['text', $text]]; 324 } 325 $this->_depth++; 326 327 $markers = implode('', array_keys($this->_inlineMarkers)); 328 329 $paragraph = []; 330 331 while (!empty($markers) && ($found = strpbrk($text, $markers)) !== false) { 332 333 $pos = strpos($text, $found); 334 335 // add the text up to next marker to the paragraph 336 if ($pos !== 0) { 337 $paragraph[] = ['text', substr($text, 0, $pos)]; 338 } 339 $text = $found; 340 341 $parsed = false; 342 foreach ($this->_inlineMarkers[$text[0]] as $marker => $method) { 343 if (strncmp($text, $marker, strlen($marker)) === 0) { 344 // parse the marker 345 array_unshift($this->context, $method); 346 list($output, $offset) = $this->$method($text); 347 array_shift($this->context); 348 349 $paragraph[] = $output; 350 $text = substr($text, $offset); 351 $parsed = true; 352 break; 353 } 354 } 355 if (!$parsed) { 356 $paragraph[] = ['text', substr($text, 0, 1)]; 357 $text = substr($text, 1); 358 } 359 } 360 361 $paragraph[] = ['text', $text]; 362 363 $this->_depth--; 364 365 return $paragraph; 366 } 367 368 /** 369 * Parses escaped special characters. 370 * @marker \ 371 */ 372 protected function parseEscape($text) 373 { 374 if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) { 375 return [['text', $text[1]], 2]; 376 } 377 return [['text', $text[0]], 1]; 378 } 379 380 /** 381 * This function renders plain text sections in the markdown text. 382 * It can be used to work on normal text sections for example to highlight keywords or 383 * do special escaping. 384 */ 385 protected function renderText($block) 386 { 387 return $block[1]; 388 } 389} 390