1<?php 2# 3# Markdown Extra - A text-to-HTML conversion tool for web writers 4# 5# PHP Markdown & Extra 6# Copyright (c) 2004-2013 Michel Fortin 7# <http://michelf.ca/projects/php-markdown/> 8# 9# Original Markdown 10# Copyright (c) 2004-2006 John Gruber 11# <http://daringfireball.net/projects/markdown/> 12# 13 14 15define( 'MARKDOWN_VERSION', "1.0.2" ); # 29 Nov 2013 16define( 'MARKDOWNEXTRA_VERSION', "1.2.8" ); # 29 Nov 2013 17 18 19# 20# Global default settings: 21# 22 23# Change to ">" for HTML output 24@define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />"); 25 26# Define the width of a tab for code blocks. 27@define( 'MARKDOWN_TAB_WIDTH', 4 ); 28 29# Optional title attribute for footnote links and backlinks. 30@define( 'MARKDOWN_FN_LINK_TITLE', "" ); 31@define( 'MARKDOWN_FN_BACKLINK_TITLE', "" ); 32 33# Optional class attribute for footnote links and backlinks. 34@define( 'MARKDOWN_FN_LINK_CLASS', "" ); 35@define( 'MARKDOWN_FN_BACKLINK_CLASS', "" ); 36 37# Optional class prefix for fenced code block. 38@define( 'MARKDOWN_CODE_CLASS_PREFIX', "" ); 39 40# Class attribute for code blocks goes on the `code` tag; 41# setting this to true will put attributes on the `pre` tag instead. 42@define( 'MARKDOWN_CODE_ATTR_ON_PRE', false ); 43 44 45# 46# WordPress settings: 47# 48 49# Change to false to remove Markdown from posts and/or comments. 50@define( 'MARKDOWN_WP_POSTS', true ); 51@define( 'MARKDOWN_WP_COMMENTS', true ); 52 53 54 55### Standard Function Interface ### 56 57@define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' ); 58 59function Markdown($text) { 60# 61# Initialize the parser and return the result of its transform method. 62# 63 # Setup static parser variable. 64 static $parser; 65 if (!isset($parser)) { 66 $parser_class = MARKDOWN_PARSER_CLASS; 67 $parser = new $parser_class; 68 } 69 70 # Transform text using parser. 71 return $parser->transform($text); 72} 73 74 75### WordPress Plugin Interface ### 76 77/* 78Plugin Name: Markdown Extra 79Plugin Name: Markdown 80Plugin URI: http://michelf.ca/projects/php-markdown/ 81Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.ca/projects/php-markdown/">More...</a> 82Version: 1.2.8 83Author: Michel Fortin 84Author URI: http://michelf.ca/ 85*/ 86 87if (isset($wp_version)) { 88 # More details about how it works here: 89 # <http://michelf.ca/weblog/2005/wordpress-text-flow-vs-markdown/> 90 91 # Post content and excerpts 92 # - Remove WordPress paragraph generator. 93 # - Run Markdown on excerpt, then remove all tags. 94 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss. 95 if (MARKDOWN_WP_POSTS) { 96 remove_filter('the_content', 'wpautop'); 97 remove_filter('the_content_rss', 'wpautop'); 98 remove_filter('the_excerpt', 'wpautop'); 99 add_filter('the_content', 'mdwp_MarkdownPost', 6); 100 add_filter('the_content_rss', 'mdwp_MarkdownPost', 6); 101 add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6); 102 add_filter('get_the_excerpt', 'trim', 7); 103 add_filter('the_excerpt', 'mdwp_add_p'); 104 add_filter('the_excerpt_rss', 'mdwp_strip_p'); 105 106 remove_filter('content_save_pre', 'balanceTags', 50); 107 remove_filter('excerpt_save_pre', 'balanceTags', 50); 108 add_filter('the_content', 'balanceTags', 50); 109 add_filter('get_the_excerpt', 'balanceTags', 9); 110 } 111 112 # Add a footnote id prefix to posts when inside a loop. 113 function mdwp_MarkdownPost($text) { 114 static $parser; 115 if (!$parser) { 116 $parser_class = MARKDOWN_PARSER_CLASS; 117 $parser = new $parser_class; 118 } 119 if (is_single() || is_page() || is_feed()) { 120 $parser->fn_id_prefix = ""; 121 } else { 122 $parser->fn_id_prefix = get_the_ID() . "."; 123 } 124 return $parser->transform($text); 125 } 126 127 # Comments 128 # - Remove WordPress paragraph generator. 129 # - Remove WordPress auto-link generator. 130 # - Scramble important tags before passing them to the kses filter. 131 # - Run Markdown on excerpt then remove paragraph tags. 132 if (MARKDOWN_WP_COMMENTS) { 133 remove_filter('comment_text', 'wpautop', 30); 134 remove_filter('comment_text', 'make_clickable'); 135 add_filter('pre_comment_content', 'Markdown', 6); 136 add_filter('pre_comment_content', 'mdwp_hide_tags', 8); 137 add_filter('pre_comment_content', 'mdwp_show_tags', 12); 138 add_filter('get_comment_text', 'Markdown', 6); 139 add_filter('get_comment_excerpt', 'Markdown', 6); 140 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7); 141 142 global $mdwp_hidden_tags, $mdwp_placeholders; 143 $mdwp_hidden_tags = explode(' ', 144 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>'); 145 $mdwp_placeholders = explode(' ', str_rot13( 146 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '. 147 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli')); 148 } 149 150 function mdwp_add_p($text) { 151 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) { 152 $text = '<p>'.$text.'</p>'; 153 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text); 154 } 155 return $text; 156 } 157 158 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); } 159 160 function mdwp_hide_tags($text) { 161 global $mdwp_hidden_tags, $mdwp_placeholders; 162 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text); 163 } 164 function mdwp_show_tags($text) { 165 global $mdwp_hidden_tags, $mdwp_placeholders; 166 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text); 167 } 168} 169 170 171### bBlog Plugin Info ### 172 173function identify_modifier_markdown() { 174 return array( 175 'name' => 'markdown', 176 'type' => 'modifier', 177 'nicename' => 'PHP Markdown Extra', 178 'description' => 'A text-to-HTML conversion tool for web writers', 179 'authors' => 'Michel Fortin and John Gruber', 180 'licence' => 'GPL', 181 'version' => MARKDOWNEXTRA_VERSION, 182 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.ca/projects/php-markdown/">More...</a>', 183 ); 184} 185 186 187### Smarty Modifier Interface ### 188 189function smarty_modifier_markdown($text) { 190 return Markdown($text); 191} 192 193 194### Textile Compatibility Mode ### 195 196# Rename this file to "classTextile.php" and it can replace Textile everywhere. 197 198if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) { 199 # Try to include PHP SmartyPants. Should be in the same directory. 200 @include_once 'smartypants.php'; 201 # Fake Textile class. It calls Markdown instead. 202 class Textile { 203 function TextileThis($text, $lite='', $encode='') { 204 if ($lite == '' && $encode == '') $text = Markdown($text); 205 if (function_exists('SmartyPants')) $text = SmartyPants($text); 206 return $text; 207 } 208 # Fake restricted version: restrictions are not supported for now. 209 function TextileRestricted($text, $lite='', $noimage='') { 210 return $this->TextileThis($text, $lite); 211 } 212 # Workaround to ensure compatibility with TextPattern 4.0.3. 213 function blockLite($text) { return $text; } 214 } 215} 216 217 218 219# 220# Markdown Parser Class 221# 222 223class Markdown_Parser { 224 225 ### Configuration Variables ### 226 227 # Change to ">" for HTML output. 228 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; 229 var $tab_width = MARKDOWN_TAB_WIDTH; 230 231 # Change to `true` to disallow markup or entities. 232 var $no_markup = false; 233 var $no_entities = false; 234 235 # Predefined urls and titles for reference links and images. 236 var $predef_urls = array(); 237 var $predef_titles = array(); 238 239 240 ### Parser Implementation ### 241 242 # Regex to match balanced [brackets]. 243 # Needed to insert a maximum bracked depth while converting to PHP. 244 var $nested_brackets_depth = 6; 245 var $nested_brackets_re; 246 247 var $nested_url_parenthesis_depth = 4; 248 var $nested_url_parenthesis_re; 249 250 # Table of hash values for escaped characters: 251 var $escape_chars = '\`*_{}[]()>#+-.!'; 252 var $escape_chars_re; 253 254 255 function Markdown_Parser() { 256 # 257 # Constructor function. Initialize appropriate member variables. 258 # 259 $this->_initDetab(); 260 $this->prepareItalicsAndBold(); 261 262 $this->nested_brackets_re = 263 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 264 str_repeat('\])*', $this->nested_brackets_depth); 265 266 $this->nested_url_parenthesis_re = 267 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 268 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 269 270 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; 271 272 # Sort document, block, and span gamut in ascendent priority order. 273 asort($this->document_gamut); 274 asort($this->block_gamut); 275 asort($this->span_gamut); 276 } 277 278 279 # Internal hashes used during transformation. 280 var $urls = array(); 281 var $titles = array(); 282 var $html_hashes = array(); 283 284 # Status flag to avoid invalid nesting. 285 var $in_anchor = false; 286 287 288 function setup() { 289 # 290 # Called before the transformation process starts to setup parser 291 # states. 292 # 293 # Clear global hashes. 294 $this->urls = $this->predef_urls; 295 $this->titles = $this->predef_titles; 296 $this->html_hashes = array(); 297 298 $this->in_anchor = false; 299 } 300 301 function teardown() { 302 # 303 # Called after the transformation process to clear any variable 304 # which may be taking up memory unnecessarly. 305 # 306 $this->urls = array(); 307 $this->titles = array(); 308 $this->html_hashes = array(); 309 } 310 311 312 function transform($text) { 313 # 314 # Main function. Performs some preprocessing on the input text 315 # and pass it through the document gamut. 316 # 317 $this->setup(); 318 319 # Remove UTF-8 BOM and marker character in input, if present. 320 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); 321 322 # Standardize line endings: 323 # DOS to Unix and Mac to Unix 324 $text = preg_replace('{\r\n?}', "\n", $text); 325 326 # Make sure $text ends with a couple of newlines: 327 $text .= "\n\n"; 328 329 # Convert all tabs to spaces. 330 $text = $this->detab($text); 331 332 # Turn block-level HTML blocks into hash entries 333 $text = $this->hashHTMLBlocks($text); 334 335 # Strip any lines consisting only of spaces and tabs. 336 # This makes subsequent regexen easier to write, because we can 337 # match consecutive blank lines with /\n+/ instead of something 338 # contorted like /[ ]*\n+/ . 339 $text = preg_replace('/^[ ]+$/m', '', $text); 340 341 # Run document gamut methods. 342 foreach ($this->document_gamut as $method => $priority) { 343 $text = $this->$method($text); 344 } 345 346 $this->teardown(); 347 348 return $text . "\n"; 349 } 350 351 var $document_gamut = array( 352 # Strip link definitions, store in hashes. 353 "stripLinkDefinitions" => 20, 354 355 "runBasicBlockGamut" => 30, 356 ); 357 358 359 function stripLinkDefinitions($text) { 360 # 361 # Strips link definitions from text, stores the URLs and titles in 362 # hash references. 363 # 364 $less_than_tab = $this->tab_width - 1; 365 366 # Link defs are in the form: ^[id]: url "optional title" 367 $text = preg_replace_callback('{ 368 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 369 [ ]* 370 \n? # maybe *one* newline 371 [ ]* 372 (?: 373 <(.+?)> # url = $2 374 | 375 (\S+?) # url = $3 376 ) 377 [ ]* 378 \n? # maybe one newline 379 [ ]* 380 (?: 381 (?<=\s) # lookbehind for whitespace 382 ["(] 383 (.*?) # title = $4 384 [")] 385 [ ]* 386 )? # title is optional 387 (?:\n+|\Z) 388 }xm', 389 array(&$this, '_stripLinkDefinitions_callback'), 390 $text); 391 return $text; 392 } 393 function _stripLinkDefinitions_callback($matches) { 394 $link_id = strtolower($matches[1]); 395 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 396 $this->urls[$link_id] = $url; 397 $this->titles[$link_id] =& $matches[4]; 398 return ''; # String that will replace the block 399 } 400 401 402 function hashHTMLBlocks($text) { 403 if ($this->no_markup) return $text; 404 405 $less_than_tab = $this->tab_width - 1; 406 407 # Hashify HTML blocks: 408 # We only want to do this for block-level HTML tags, such as headers, 409 # lists, and tables. That's because we still want to wrap <p>s around 410 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 411 # phrase emphasis, and spans. The list of tags we're looking for is 412 # hard-coded: 413 # 414 # * List "a" is made of tags which can be both inline or block-level. 415 # These will be treated block-level when the start tag is alone on 416 # its line, otherwise they're not matched here and will be taken as 417 # inline later. 418 # * List "b" is made of tags which are always block-level; 419 # 420 $block_tags_a_re = 'ins|del'; 421 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 422 'script|noscript|form|fieldset|iframe|math|svg|'. 423 'article|section|nav|aside|hgroup|header|footer|'. 424 'figure'; 425 426 # Regular expression for the content of a block tag. 427 $nested_tags_level = 4; 428 $attr = ' 429 (?> # optional tag attributes 430 \s # starts with whitespace 431 (?> 432 [^>"/]+ # text outside quotes 433 | 434 /+(?!>) # slash not followed by ">" 435 | 436 "[^"]*" # text inside double quotes (tolerate ">") 437 | 438 \'[^\']*\' # text inside single quotes (tolerate ">") 439 )* 440 )? 441 '; 442 $content = 443 str_repeat(' 444 (?> 445 [^<]+ # content without tag 446 | 447 <\2 # nested opening tag 448 '.$attr.' # attributes 449 (?> 450 /> 451 | 452 >', $nested_tags_level). # end of opening tag 453 '.*?'. # last level nested tag content 454 str_repeat(' 455 </\2\s*> # closing nested tag 456 ) 457 | 458 <(?!/\2\s*> # other tags with a different name 459 ) 460 )*', 461 $nested_tags_level); 462 $content2 = str_replace('\2', '\3', $content); 463 464 # First, look for nested blocks, e.g.: 465 # <div> 466 # <div> 467 # tags for inner block must be indented. 468 # </div> 469 # </div> 470 # 471 # The outermost tags must start at the left margin for this to match, and 472 # the inner nested divs must be indented. 473 # We need to do this before the next, more liberal match, because the next 474 # match will start at the first `<div>` and stop at the first `</div>`. 475 $text = preg_replace_callback('{(?> 476 (?> 477 (?<=\n\n) # Starting after a blank line 478 | # or 479 \A\n? # the beginning of the doc 480 ) 481 ( # save in $1 482 483 # Match from `\n<tag>` to `</tag>\n`, handling nested tags 484 # in between. 485 486 [ ]{0,'.$less_than_tab.'} 487 <('.$block_tags_b_re.')# start tag = $2 488 '.$attr.'> # attributes followed by > and \n 489 '.$content.' # content, support nesting 490 </\2> # the matching end tag 491 [ ]* # trailing spaces/tabs 492 (?=\n+|\Z) # followed by a newline or end of document 493 494 | # Special version for tags of group a. 495 496 [ ]{0,'.$less_than_tab.'} 497 <('.$block_tags_a_re.')# start tag = $3 498 '.$attr.'>[ ]*\n # attributes followed by > 499 '.$content2.' # content, support nesting 500 </\3> # the matching end tag 501 [ ]* # trailing spaces/tabs 502 (?=\n+|\Z) # followed by a newline or end of document 503 504 | # Special case just for <hr />. It was easier to make a special 505 # case than to make the other regex more complicated. 506 507 [ ]{0,'.$less_than_tab.'} 508 <(hr) # start tag = $2 509 '.$attr.' # attributes 510 /?> # the matching end tag 511 [ ]* 512 (?=\n{2,}|\Z) # followed by a blank line or end of document 513 514 | # Special case for standalone HTML comments: 515 516 [ ]{0,'.$less_than_tab.'} 517 (?s: 518 <!-- .*? --> 519 ) 520 [ ]* 521 (?=\n{2,}|\Z) # followed by a blank line or end of document 522 523 | # PHP and ASP-style processor instructions (<? and <%) 524 525 [ ]{0,'.$less_than_tab.'} 526 (?s: 527 <([?%]) # $2 528 .*? 529 \2> 530 ) 531 [ ]* 532 (?=\n{2,}|\Z) # followed by a blank line or end of document 533 534 ) 535 )}Sxmi', 536 array(&$this, '_hashHTMLBlocks_callback'), 537 $text); 538 539 return $text; 540 } 541 function _hashHTMLBlocks_callback($matches) { 542 $text = $matches[1]; 543 $key = $this->hashBlock($text); 544 return "\n\n$key\n\n"; 545 } 546 547 548 function hashPart($text, $boundary = 'X') { 549 # 550 # Called whenever a tag must be hashed when a function insert an atomic 551 # element in the text stream. Passing $text to through this function gives 552 # a unique text-token which will be reverted back when calling unhash. 553 # 554 # The $boundary argument specify what character should be used to surround 555 # the token. By convension, "B" is used for block elements that needs not 556 # to be wrapped into paragraph tags at the end, ":" is used for elements 557 # that are word separators and "X" is used in the general case. 558 # 559 # Swap back any tag hash found in $text so we do not have to `unhash` 560 # multiple times at the end. 561 $text = $this->unhash($text); 562 563 # Then hash the block. 564 static $i = 0; 565 $key = "$boundary\x1A" . ++$i . $boundary; 566 $this->html_hashes[$key] = $text; 567 return $key; # String that will replace the tag. 568 } 569 570 571 function hashBlock($text) { 572 # 573 # Shortcut function for hashPart with block-level boundaries. 574 # 575 return $this->hashPart($text, 'B'); 576 } 577 578 579 var $block_gamut = array( 580 # 581 # These are all the transformations that form block-level 582 # tags like paragraphs, headers, and list items. 583 # 584 "doHeaders" => 10, 585 "doHorizontalRules" => 20, 586 587 "doLists" => 40, 588 "doCodeBlocks" => 50, 589 "doBlockQuotes" => 60, 590 ); 591 592 function runBlockGamut($text) { 593 # 594 # Run block gamut tranformations. 595 # 596 # We need to escape raw HTML in Markdown source before doing anything 597 # else. This need to be done for each block, and not only at the 598 # begining in the Markdown function since hashed blocks can be part of 599 # list items and could have been indented. Indented blocks would have 600 # been seen as a code block in a previous pass of hashHTMLBlocks. 601 $text = $this->hashHTMLBlocks($text); 602 603 return $this->runBasicBlockGamut($text); 604 } 605 606 function runBasicBlockGamut($text) { 607 # 608 # Run block gamut tranformations, without hashing HTML blocks. This is 609 # useful when HTML blocks are known to be already hashed, like in the first 610 # whole-document pass. 611 # 612 foreach ($this->block_gamut as $method => $priority) { 613 $text = $this->$method($text); 614 } 615 616 # Finally form paragraph and restore hashed blocks. 617 $text = $this->formParagraphs($text); 618 619 return $text; 620 } 621 622 623 function doHorizontalRules($text) { 624 # Do Horizontal Rules: 625 return preg_replace( 626 '{ 627 ^[ ]{0,3} # Leading space 628 ([-*_]) # $1: First marker 629 (?> # Repeated marker group 630 [ ]{0,2} # Zero, one, or two spaces. 631 \1 # Marker character 632 ){2,} # Group repeated at least twice 633 [ ]* # Tailing spaces 634 $ # End of line. 635 }mx', 636 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 637 $text); 638 } 639 640 641 var $span_gamut = array( 642 # 643 # These are all the transformations that occur *within* block-level 644 # tags like paragraphs, headers, and list items. 645 # 646 # Process character escapes, code spans, and inline HTML 647 # in one shot. 648 "parseSpan" => -30, 649 650 # Process anchor and image tags. Images must come first, 651 # because ![foo][f] looks like an anchor. 652 "doImages" => 10, 653 "doAnchors" => 20, 654 655 # Make links out of things like `<http://example.com/>` 656 # Must come after doAnchors, because you can use < and > 657 # delimiters in inline links like [this](<url>). 658 "doAutoLinks" => 30, 659 "encodeAmpsAndAngles" => 40, 660 661 "doItalicsAndBold" => 50, 662 "doHardBreaks" => 60, 663 ); 664 665 function runSpanGamut($text) { 666 # 667 # Run span gamut tranformations. 668 # 669 foreach ($this->span_gamut as $method => $priority) { 670 $text = $this->$method($text); 671 } 672 673 return $text; 674 } 675 676 677 function doHardBreaks($text) { 678 # Do hard breaks: 679 return preg_replace_callback('/ {2,}\n/', 680 array(&$this, '_doHardBreaks_callback'), $text); 681 } 682 function _doHardBreaks_callback($matches) { 683 return $this->hashPart("<br$this->empty_element_suffix\n"); 684 } 685 686 687 function doAnchors($text) { 688 # 689 # Turn Markdown link shortcuts into XHTML <a> tags. 690 # 691 if ($this->in_anchor) return $text; 692 $this->in_anchor = true; 693 694 # 695 # First, handle reference-style links: [link text] [id] 696 # 697 $text = preg_replace_callback('{ 698 ( # wrap whole match in $1 699 \[ 700 ('.$this->nested_brackets_re.') # link text = $2 701 \] 702 703 [ ]? # one optional space 704 (?:\n[ ]*)? # one optional newline followed by spaces 705 706 \[ 707 (.*?) # id = $3 708 \] 709 ) 710 }xs', 711 array(&$this, '_doAnchors_reference_callback'), $text); 712 713 # 714 # Next, inline-style links: [link text](url "optional title") 715 # 716 $text = preg_replace_callback('{ 717 ( # wrap whole match in $1 718 \[ 719 ('.$this->nested_brackets_re.') # link text = $2 720 \] 721 \( # literal paren 722 [ \n]* 723 (?: 724 <(.+?)> # href = $3 725 | 726 ('.$this->nested_url_parenthesis_re.') # href = $4 727 ) 728 [ \n]* 729 ( # $5 730 ([\'"]) # quote char = $6 731 (.*?) # Title = $7 732 \6 # matching quote 733 [ \n]* # ignore any spaces/tabs between closing quote and ) 734 )? # title is optional 735 \) 736 ) 737 }xs', 738 array(&$this, '_doAnchors_inline_callback'), $text); 739 740 # 741 # Last, handle reference-style shortcuts: [link text] 742 # These must come last in case you've also got [link text][1] 743 # or [link text](/foo) 744 # 745 $text = preg_replace_callback('{ 746 ( # wrap whole match in $1 747 \[ 748 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 749 \] 750 ) 751 }xs', 752 array(&$this, '_doAnchors_reference_callback'), $text); 753 754 $this->in_anchor = false; 755 return $text; 756 } 757 function _doAnchors_reference_callback($matches) { 758 $whole_match = $matches[1]; 759 $link_text = $matches[2]; 760 $link_id =& $matches[3]; 761 762 if ($link_id == "") { 763 # for shortcut links like [this][] or [this]. 764 $link_id = $link_text; 765 } 766 767 # lower-case and turn embedded newlines into spaces 768 $link_id = strtolower($link_id); 769 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 770 771 if (isset($this->urls[$link_id])) { 772 $url = $this->urls[$link_id]; 773 $url = $this->encodeAttribute($url); 774 775 $result = "<a href=\"$url\""; 776 if ( isset( $this->titles[$link_id] ) ) { 777 $title = $this->titles[$link_id]; 778 $title = $this->encodeAttribute($title); 779 $result .= " title=\"$title\""; 780 } 781 782 $link_text = $this->runSpanGamut($link_text); 783 $result .= ">$link_text</a>"; 784 $result = $this->hashPart($result); 785 } 786 else { 787 $result = $whole_match; 788 } 789 return $result; 790 } 791 function _doAnchors_inline_callback($matches) { 792 $whole_match = $matches[1]; 793 $link_text = $this->runSpanGamut($matches[2]); 794 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 795 $title =& $matches[7]; 796 797 $url = $this->encodeAttribute($url); 798 799 $result = "<a href=\"$url\""; 800 if (isset($title)) { 801 $title = $this->encodeAttribute($title); 802 $result .= " title=\"$title\""; 803 } 804 805 $link_text = $this->runSpanGamut($link_text); 806 $result .= ">$link_text</a>"; 807 808 return $this->hashPart($result); 809 } 810 811 812 function doImages($text) { 813 # 814 # Turn Markdown image shortcuts into <img> tags. 815 # 816 # 817 # First, handle reference-style labeled images: ![alt text][id] 818 # 819 $text = preg_replace_callback('{ 820 ( # wrap whole match in $1 821 !\[ 822 ('.$this->nested_brackets_re.') # alt text = $2 823 \] 824 825 [ ]? # one optional space 826 (?:\n[ ]*)? # one optional newline followed by spaces 827 828 \[ 829 (.*?) # id = $3 830 \] 831 832 ) 833 }xs', 834 array(&$this, '_doImages_reference_callback'), $text); 835 836 # 837 # Next, handle inline images: ![alt text](url "optional title") 838 # Don't forget: encode * and _ 839 # 840 $text = preg_replace_callback('{ 841 ( # wrap whole match in $1 842 !\[ 843 ('.$this->nested_brackets_re.') # alt text = $2 844 \] 845 \s? # One optional whitespace character 846 \( # literal paren 847 [ \n]* 848 (?: 849 <(\S*)> # src url = $3 850 | 851 ('.$this->nested_url_parenthesis_re.') # src url = $4 852 ) 853 [ \n]* 854 ( # $5 855 ([\'"]) # quote char = $6 856 (.*?) # title = $7 857 \6 # matching quote 858 [ \n]* 859 )? # title is optional 860 \) 861 ) 862 }xs', 863 array(&$this, '_doImages_inline_callback'), $text); 864 865 return $text; 866 } 867 function _doImages_reference_callback($matches) { 868 $whole_match = $matches[1]; 869 $alt_text = $matches[2]; 870 $link_id = strtolower($matches[3]); 871 872 if ($link_id == "") { 873 $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 874 } 875 876 $alt_text = $this->encodeAttribute($alt_text); 877 if (isset($this->urls[$link_id])) { 878 $url = $this->encodeAttribute($this->urls[$link_id]); 879 $result = "<img src=\"$url\" alt=\"$alt_text\""; 880 if (isset($this->titles[$link_id])) { 881 $title = $this->titles[$link_id]; 882 $title = $this->encodeAttribute($title); 883 $result .= " title=\"$title\""; 884 } 885 $result .= $this->empty_element_suffix; 886 $result = $this->hashPart($result); 887 } 888 else { 889 # If there's no such link ID, leave intact: 890 $result = $whole_match; 891 } 892 893 return $result; 894 } 895 function _doImages_inline_callback($matches) { 896 $whole_match = $matches[1]; 897 $alt_text = $matches[2]; 898 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 899 $title =& $matches[7]; 900 901 $alt_text = $this->encodeAttribute($alt_text); 902 $url = $this->encodeAttribute($url); 903 $result = "<img src=\"$url\" alt=\"$alt_text\""; 904 if (isset($title)) { 905 $title = $this->encodeAttribute($title); 906 $result .= " title=\"$title\""; # $title already quoted 907 } 908 $result .= $this->empty_element_suffix; 909 910 return $this->hashPart($result); 911 } 912 913 914 function doHeaders($text) { 915 # Setext-style headers: 916 # Header 1 917 # ======== 918 # 919 # Header 2 920 # -------- 921 # 922 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 923 array(&$this, '_doHeaders_callback_setext'), $text); 924 925 # atx-style headers: 926 # # Header 1 927 # ## Header 2 928 # ## Header 2 with closing hashes ## 929 # ... 930 # ###### Header 6 931 # 932 $text = preg_replace_callback('{ 933 ^(\#{1,6}) # $1 = string of #\'s 934 [ ]* 935 (.+?) # $2 = Header text 936 [ ]* 937 \#* # optional closing #\'s (not counted) 938 \n+ 939 }xm', 940 array(&$this, '_doHeaders_callback_atx'), $text); 941 942 return $text; 943 } 944 function _doHeaders_callback_setext($matches) { 945 # Terrible hack to check we haven't found an empty list item. 946 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) 947 return $matches[0]; 948 949 $level = $matches[2]{0} == '=' ? 1 : 2; 950 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>"; 951 return "\n" . $this->hashBlock($block) . "\n\n"; 952 } 953 function _doHeaders_callback_atx($matches) { 954 $level = strlen($matches[1]); 955 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>"; 956 return "\n" . $this->hashBlock($block) . "\n\n"; 957 } 958 959 960 function doLists($text) { 961 # 962 # Form HTML ordered (numbered) and unordered (bulleted) lists. 963 # 964 $less_than_tab = $this->tab_width - 1; 965 966 # Re-usable patterns to match list item bullets and number markers: 967 $marker_ul_re = '[*+-]'; 968 $marker_ol_re = '\d+[\.]'; 969 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 970 971 $markers_relist = array( 972 $marker_ul_re => $marker_ol_re, 973 $marker_ol_re => $marker_ul_re, 974 ); 975 976 foreach ($markers_relist as $marker_re => $other_marker_re) { 977 # Re-usable pattern to match any entirel ul or ol list: 978 $whole_list_re = ' 979 ( # $1 = whole list 980 ( # $2 981 ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces 982 ('.$marker_re.') # $4 = first list item marker 983 [ ]+ 984 ) 985 (?s:.+?) 986 ( # $5 987 \z 988 | 989 \n{2,} 990 (?=\S) 991 (?! # Negative lookahead for another list item marker 992 [ ]* 993 '.$marker_re.'[ ]+ 994 ) 995 | 996 (?= # Lookahead for another kind of list 997 \n 998 \3 # Must have the same indentation 999 '.$other_marker_re.'[ ]+ 1000 ) 1001 ) 1002 ) 1003 '; // mx 1004 1005 # We use a different prefix before nested lists than top-level lists. 1006 # See extended comment in _ProcessListItems(). 1007 1008 if ($this->list_level) { 1009 $text = preg_replace_callback('{ 1010 ^ 1011 '.$whole_list_re.' 1012 }mx', 1013 array(&$this, '_doLists_callback'), $text); 1014 } 1015 else { 1016 $text = preg_replace_callback('{ 1017 (?:(?<=\n)\n|\A\n?) # Must eat the newline 1018 '.$whole_list_re.' 1019 }mx', 1020 array(&$this, '_doLists_callback'), $text); 1021 } 1022 } 1023 1024 return $text; 1025 } 1026 function _doLists_callback($matches) { 1027 # Re-usable patterns to match list item bullets and number markers: 1028 $marker_ul_re = '[*+-]'; 1029 $marker_ol_re = '\d+[\.]'; 1030 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 1031 1032 $list = $matches[1]; 1033 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol"; 1034 1035 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); 1036 1037 $list .= "\n"; 1038 $result = $this->processListItems($list, $marker_any_re); 1039 1040 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>"); 1041 return "\n". $result ."\n\n"; 1042 } 1043 1044 var $list_level = 0; 1045 1046 function processListItems($list_str, $marker_any_re) { 1047 # 1048 # Process the contents of a single ordered or unordered list, splitting it 1049 # into individual list items. 1050 # 1051 # The $this->list_level global keeps track of when we're inside a list. 1052 # Each time we enter a list, we increment it; when we leave a list, 1053 # we decrement. If it's zero, we're not in a list anymore. 1054 # 1055 # We do this because when we're not inside a list, we want to treat 1056 # something like this: 1057 # 1058 # I recommend upgrading to version 1059 # 8. Oops, now this line is treated 1060 # as a sub-list. 1061 # 1062 # As a single paragraph, despite the fact that the second line starts 1063 # with a digit-period-space sequence. 1064 # 1065 # Whereas when we're inside a list (or sub-list), that line will be 1066 # treated as the start of a sub-list. What a kludge, huh? This is 1067 # an aspect of Markdown's syntax that's hard to parse perfectly 1068 # without resorting to mind-reading. Perhaps the solution is to 1069 # change the syntax rules such that sub-lists must start with a 1070 # starting cardinal number; e.g. "1." or "a.". 1071 1072 $this->list_level++; 1073 1074 # trim trailing blank lines: 1075 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 1076 1077 $list_str = preg_replace_callback('{ 1078 (\n)? # leading line = $1 1079 (^[ ]*) # leading whitespace = $2 1080 ('.$marker_any_re.' # list marker and space = $3 1081 (?:[ ]+|(?=\n)) # space only required if item is not empty 1082 ) 1083 ((?s:.*?)) # list item text = $4 1084 (?:(\n+(?=\n))|\n) # tailing blank line = $5 1085 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) 1086 }xm', 1087 array(&$this, '_processListItems_callback'), $list_str); 1088 1089 $this->list_level--; 1090 return $list_str; 1091 } 1092 function _processListItems_callback($matches) { 1093 $item = $matches[4]; 1094 $leading_line =& $matches[1]; 1095 $leading_space =& $matches[2]; 1096 $marker_space = $matches[3]; 1097 $tailing_blank_line =& $matches[5]; 1098 1099 if ($leading_line || $tailing_blank_line || 1100 preg_match('/\n{2,}/', $item)) 1101 { 1102 # Replace marker with the appropriate whitespace indentation 1103 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; 1104 $item = $this->runBlockGamut($this->outdent($item)."\n"); 1105 } 1106 else { 1107 # Recursion for sub-lists: 1108 $item = $this->doLists($this->outdent($item)); 1109 $item = preg_replace('/\n+$/', '', $item); 1110 $item = $this->runSpanGamut($item); 1111 } 1112 1113 return "<li>" . $item . "</li>\n"; 1114 } 1115 1116 1117 function doCodeBlocks($text) { 1118 # 1119 # Process Markdown `<pre><code>` blocks. 1120 # 1121 $text = preg_replace_callback('{ 1122 (?:\n\n|\A\n?) 1123 ( # $1 = the code block -- one or more lines, starting with a space/tab 1124 (?> 1125 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces 1126 .*\n+ 1127 )+ 1128 ) 1129 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc 1130 }xm', 1131 array(&$this, '_doCodeBlocks_callback'), $text); 1132 1133 return $text; 1134 } 1135 function _doCodeBlocks_callback($matches) { 1136 $codeblock = $matches[1]; 1137 1138 $codeblock = $this->outdent($codeblock); 1139 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1140 1141 # trim leading newlines and trailing newlines 1142 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); 1143 1144 $codeblock = "<pre><code>$codeblock\n</code></pre>"; 1145 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 1146 } 1147 1148 1149 function makeCodeSpan($code) { 1150 # 1151 # Create a code span markup for $code. Called from handleSpanToken. 1152 # 1153 $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 1154 return $this->hashPart("<code>$code</code>"); 1155 } 1156 1157 1158 var $em_relist = array( 1159 '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![\.,:;]\s)', 1160 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)', 1161 '_' => '(?<=\S|^)(?<!_)_(?!_)', 1162 ); 1163 var $strong_relist = array( 1164 '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![\.,:;]\s)', 1165 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)', 1166 '__' => '(?<=\S|^)(?<!_)__(?!_)', 1167 ); 1168 var $em_strong_relist = array( 1169 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![\.,:;]\s)', 1170 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)', 1171 '___' => '(?<=\S|^)(?<!_)___(?!_)', 1172 ); 1173 var $em_strong_prepared_relist; 1174 1175 function prepareItalicsAndBold() { 1176 # 1177 # Prepare regular expressions for searching emphasis tokens in any 1178 # context. 1179 # 1180 foreach ($this->em_relist as $em => $em_re) { 1181 foreach ($this->strong_relist as $strong => $strong_re) { 1182 # Construct list of allowed token expressions. 1183 $token_relist = array(); 1184 if (isset($this->em_strong_relist["$em$strong"])) { 1185 $token_relist[] = $this->em_strong_relist["$em$strong"]; 1186 } 1187 $token_relist[] = $em_re; 1188 $token_relist[] = $strong_re; 1189 1190 # Construct master expression from list. 1191 $token_re = '{('. implode('|', $token_relist) .')}'; 1192 $this->em_strong_prepared_relist["$em$strong"] = $token_re; 1193 } 1194 } 1195 } 1196 1197 function doItalicsAndBold($text) { 1198 $token_stack = array(''); 1199 $text_stack = array(''); 1200 $em = ''; 1201 $strong = ''; 1202 $tree_char_em = false; 1203 1204 while (1) { 1205 # 1206 # Get prepared regular expression for seraching emphasis tokens 1207 # in current context. 1208 # 1209 $token_re = $this->em_strong_prepared_relist["$em$strong"]; 1210 1211 # 1212 # Each loop iteration search for the next emphasis token. 1213 # Each token is then passed to handleSpanToken. 1214 # 1215 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 1216 $text_stack[0] .= $parts[0]; 1217 $token =& $parts[1]; 1218 $text =& $parts[2]; 1219 1220 if (empty($token)) { 1221 # Reached end of text span: empty stack without emitting. 1222 # any more emphasis. 1223 while ($token_stack[0]) { 1224 $text_stack[1] .= array_shift($token_stack); 1225 $text_stack[0] .= array_shift($text_stack); 1226 } 1227 break; 1228 } 1229 1230 $token_len = strlen($token); 1231 if ($tree_char_em) { 1232 # Reached closing marker while inside a three-char emphasis. 1233 if ($token_len == 3) { 1234 # Three-char closing marker, close em and strong. 1235 array_shift($token_stack); 1236 $span = array_shift($text_stack); 1237 $span = $this->runSpanGamut($span); 1238 $span = "<strong><em>$span</em></strong>"; 1239 $text_stack[0] .= $this->hashPart($span); 1240 $em = ''; 1241 $strong = ''; 1242 } else { 1243 # Other closing marker: close one em or strong and 1244 # change current token state to match the other 1245 $token_stack[0] = str_repeat($token{0}, 3-$token_len); 1246 $tag = $token_len == 2 ? "strong" : "em"; 1247 $span = $text_stack[0]; 1248 $span = $this->runSpanGamut($span); 1249 $span = "<$tag>$span</$tag>"; 1250 $text_stack[0] = $this->hashPart($span); 1251 $$tag = ''; # $$tag stands for $em or $strong 1252 } 1253 $tree_char_em = false; 1254 } else if ($token_len == 3) { 1255 if ($em) { 1256 # Reached closing marker for both em and strong. 1257 # Closing strong marker: 1258 for ($i = 0; $i < 2; ++$i) { 1259 $shifted_token = array_shift($token_stack); 1260 $tag = strlen($shifted_token) == 2 ? "strong" : "em"; 1261 $span = array_shift($text_stack); 1262 $span = $this->runSpanGamut($span); 1263 $span = "<$tag>$span</$tag>"; 1264 $text_stack[0] .= $this->hashPart($span); 1265 $$tag = ''; # $$tag stands for $em or $strong 1266 } 1267 } else { 1268 # Reached opening three-char emphasis marker. Push on token 1269 # stack; will be handled by the special condition above. 1270 $em = $token{0}; 1271 $strong = "$em$em"; 1272 array_unshift($token_stack, $token); 1273 array_unshift($text_stack, ''); 1274 $tree_char_em = true; 1275 } 1276 } else if ($token_len == 2) { 1277 if ($strong) { 1278 # Unwind any dangling emphasis marker: 1279 if (strlen($token_stack[0]) == 1) { 1280 $text_stack[1] .= array_shift($token_stack); 1281 $text_stack[0] .= array_shift($text_stack); 1282 } 1283 # Closing strong marker: 1284 array_shift($token_stack); 1285 $span = array_shift($text_stack); 1286 $span = $this->runSpanGamut($span); 1287 $span = "<strong>$span</strong>"; 1288 $text_stack[0] .= $this->hashPart($span); 1289 $strong = ''; 1290 } else { 1291 array_unshift($token_stack, $token); 1292 array_unshift($text_stack, ''); 1293 $strong = $token; 1294 } 1295 } else { 1296 # Here $token_len == 1 1297 if ($em) { 1298 if (strlen($token_stack[0]) == 1) { 1299 # Closing emphasis marker: 1300 array_shift($token_stack); 1301 $span = array_shift($text_stack); 1302 $span = $this->runSpanGamut($span); 1303 $span = "<em>$span</em>"; 1304 $text_stack[0] .= $this->hashPart($span); 1305 $em = ''; 1306 } else { 1307 $text_stack[0] .= $token; 1308 } 1309 } else { 1310 array_unshift($token_stack, $token); 1311 array_unshift($text_stack, ''); 1312 $em = $token; 1313 } 1314 } 1315 } 1316 return $text_stack[0]; 1317 } 1318 1319 1320 function doBlockQuotes($text) { 1321 $text = preg_replace_callback('/ 1322 ( # Wrap whole match in $1 1323 (?> 1324 ^[ ]*>[ ]? # ">" at the start of a line 1325 .+\n # rest of the first line 1326 (.+\n)* # subsequent consecutive lines 1327 \n* # blanks 1328 )+ 1329 ) 1330 /xm', 1331 array(&$this, '_doBlockQuotes_callback'), $text); 1332 1333 return $text; 1334 } 1335 function _doBlockQuotes_callback($matches) { 1336 $bq = $matches[1]; 1337 # trim one level of quoting - trim whitespace-only lines 1338 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 1339 $bq = $this->runBlockGamut($bq); # recurse 1340 1341 $bq = preg_replace('/^/m', " ", $bq); 1342 # These leading spaces cause problem with <pre> content, 1343 # so we need to fix that: 1344 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 1345 array(&$this, '_doBlockQuotes_callback2'), $bq); 1346 1347 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n"; 1348 } 1349 function _doBlockQuotes_callback2($matches) { 1350 $pre = $matches[1]; 1351 $pre = preg_replace('/^ /m', '', $pre); 1352 return $pre; 1353 } 1354 1355 1356 function formParagraphs($text) { 1357 # 1358 # Params: 1359 # $text - string to process with html <p> tags 1360 # 1361 # Strip leading and trailing lines: 1362 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1363 1364 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1365 1366 # 1367 # Wrap <p> tags and unhashify HTML blocks 1368 # 1369 foreach ($grafs as $key => $value) { 1370 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 1371 # Is a paragraph. 1372 $value = $this->runSpanGamut($value); 1373 $value = preg_replace('/^([ ]*)/', "<p>", $value); 1374 $value .= "</p>"; 1375 $grafs[$key] = $this->unhash($value); 1376 } 1377 else { 1378 # Is a block. 1379 # Modify elements of @grafs in-place... 1380 $graf = $value; 1381 $block = $this->html_hashes[$graf]; 1382 $graf = $block; 1383// if (preg_match('{ 1384// \A 1385// ( # $1 = <div> tag 1386// <div \s+ 1387// [^>]* 1388// \b 1389// markdown\s*=\s* ([\'"]) # $2 = attr quote char 1390// 1 1391// \2 1392// [^>]* 1393// > 1394// ) 1395// ( # $3 = contents 1396// .* 1397// ) 1398// (</div>) # $4 = closing tag 1399// \z 1400// }xs', $block, $matches)) 1401// { 1402// list(, $div_open, , $div_content, $div_close) = $matches; 1403// 1404// # We can't call Markdown(), because that resets the hash; 1405// # that initialization code should be pulled into its own sub, though. 1406// $div_content = $this->hashHTMLBlocks($div_content); 1407// 1408// # Run document gamut methods on the content. 1409// foreach ($this->document_gamut as $method => $priority) { 1410// $div_content = $this->$method($div_content); 1411// } 1412// 1413// $div_open = preg_replace( 1414// '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); 1415// 1416// $graf = $div_open . "\n" . $div_content . "\n" . $div_close; 1417// } 1418 $grafs[$key] = $graf; 1419 } 1420 } 1421 1422 return implode("\n\n", $grafs); 1423 } 1424 1425 1426 function encodeAttribute($text) { 1427 # 1428 # Encode text for a double-quoted HTML attribute. This function 1429 # is *not* suitable for attributes enclosed in single quotes. 1430 # 1431 $text = $this->encodeAmpsAndAngles($text); 1432 $text = str_replace('"', '"', $text); 1433 return $text; 1434 } 1435 1436 1437 function encodeAmpsAndAngles($text) { 1438 # 1439 # Smart processing for ampersands and angle brackets that need to 1440 # be encoded. Valid character entities are left alone unless the 1441 # no-entities mode is set. 1442 # 1443 if ($this->no_entities) { 1444 $text = str_replace('&', '&', $text); 1445 } else { 1446 # Ampersand-encoding based entirely on Nat Irons's Amputator 1447 # MT plugin: <http://bumppo.net/projects/amputator/> 1448 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 1449 '&', $text);; 1450 } 1451 # Encode remaining <'s 1452 $text = str_replace('<', '<', $text); 1453 1454 return $text; 1455 } 1456 1457 1458 function doAutoLinks($text) { 1459 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 1460 array(&$this, '_doAutoLinks_url_callback'), $text); 1461 1462 # Email addresses: <address@domain.foo> 1463 $text = preg_replace_callback('{ 1464 < 1465 (?:mailto:)? 1466 ( 1467 (?: 1468 [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+ 1469 | 1470 ".*?" 1471 ) 1472 \@ 1473 (?: 1474 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ 1475 | 1476 \[[\d.a-fA-F:]+\] # IPv4 & IPv6 1477 ) 1478 ) 1479 > 1480 }xi', 1481 array(&$this, '_doAutoLinks_email_callback'), $text); 1482 $text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i',array(&$this, '_doAutoLinks_tel_callback'), $text); 1483 1484 return $text; 1485 } 1486 function _doAutoLinks_tel_callback($matches) { 1487 $url = $this->encodeAttribute($matches[1]); 1488 $tel = $this->encodeAttribute($matches[2]); 1489 $link = "<a href=\"$url\">$tel</a>"; 1490 return $this->hashPart($link); 1491 } 1492 function _doAutoLinks_url_callback($matches) { 1493 $url = $this->encodeAttribute($matches[1]); 1494 $link = "<a href=\"$url\">$url</a>"; 1495 return $this->hashPart($link); 1496 } 1497 function _doAutoLinks_email_callback($matches) { 1498 $address = $matches[1]; 1499 $link = $this->encodeEmailAddress($address); 1500 return $this->hashPart($link); 1501 } 1502 1503 1504 function encodeEmailAddress($addr) { 1505 # 1506 # Input: an email address, e.g. "foo@example.com" 1507 # 1508 # Output: the email address as a mailto link, with each character 1509 # of the address encoded as either a decimal or hex entity, in 1510 # the hopes of foiling most address harvesting spam bots. E.g.: 1511 # 1512 # <p><a href="mailto:foo 1513 # @example.co 1514 # m">foo@exampl 1515 # e.com</a></p> 1516 # 1517 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. 1518 # With some optimizations by Milian Wolff. 1519 # 1520 $addr = "mailto:" . $addr; 1521 $chars = preg_split('/(?<!^)(?!$)/', $addr); 1522 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed. 1523 1524 foreach ($chars as $key => $char) { 1525 $ord = ord($char); 1526 # Ignore non-ascii chars. 1527 if ($ord < 128) { 1528 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. 1529 # roughly 10% raw, 45% hex, 45% dec 1530 # '@' *must* be encoded. I insist. 1531 if ($r > 90 && $char != '@') /* do nothing */; 1532 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; 1533 else $chars[$key] = '&#'.$ord.';'; 1534 } 1535 } 1536 1537 $addr = implode('', $chars); 1538 $text = implode('', array_slice($chars, 7)); # text without `mailto:` 1539 $addr = "<a href=\"$addr\">$text</a>"; 1540 1541 return $addr; 1542 } 1543 1544 1545 function parseSpan($str) { 1546 # 1547 # Take the string $str and parse it into tokens, hashing embeded HTML, 1548 # escaped characters and handling code spans. 1549 # 1550 $output = ''; 1551 1552 $span_re = '{ 1553 ( 1554 \\\\'.$this->escape_chars_re.' 1555 | 1556 (?<![`\\\\]) 1557 `+ # code span marker 1558 '.( $this->no_markup ? '' : ' 1559 | 1560 <!-- .*? --> # comment 1561 | 1562 <\?.*?\?> | <%.*?%> # processing instruction 1563 | 1564 <[!$]?[-a-zA-Z0-9:_]+ # regular tags 1565 (?> 1566 \s 1567 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 1568 )? 1569 > 1570 | 1571 <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag 1572 | 1573 </[-a-zA-Z0-9:_]+\s*> # closing tag 1574 ').' 1575 ) 1576 }xs'; 1577 1578 while (1) { 1579 # 1580 # Each loop iteration seach for either the next tag, the next 1581 # openning code span marker, or the next escaped character. 1582 # Each token is then passed to handleSpanToken. 1583 # 1584 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1585 1586 # Create token from text preceding tag. 1587 if ($parts[0] != "") { 1588 $output .= $parts[0]; 1589 } 1590 1591 # Check if we reach the end. 1592 if (isset($parts[1])) { 1593 $output .= $this->handleSpanToken($parts[1], $parts[2]); 1594 $str = $parts[2]; 1595 } 1596 else { 1597 break; 1598 } 1599 } 1600 1601 return $output; 1602 } 1603 1604 1605 function handleSpanToken($token, &$str) { 1606 # 1607 # Handle $token provided by parseSpan by determining its nature and 1608 # returning the corresponding value that should replace it. 1609 # 1610 switch ($token{0}) { 1611 case "\\": 1612 return $this->hashPart("&#". ord($token{1}). ";"); 1613 case "`": 1614 # Search for end marker in remaining text. 1615 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 1616 $str, $matches)) 1617 { 1618 $str = $matches[2]; 1619 $codespan = $this->makeCodeSpan($matches[1]); 1620 return $this->hashPart($codespan); 1621 } 1622 return $token; // return as text since no ending marker found. 1623 default: 1624 return $this->hashPart($token); 1625 } 1626 } 1627 1628 1629 function outdent($text) { 1630 # 1631 # Remove one level of line-leading tabs or spaces 1632 # 1633 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); 1634 } 1635 1636 1637 # String length function for detab. `_initDetab` will create a function to 1638 # hanlde UTF-8 if the default function does not exist. 1639 var $utf8_strlen = 'mb_strlen'; 1640 1641 function detab($text) { 1642 # 1643 # Replace tabs with the appropriate amount of space. 1644 # 1645 # For each line we separate the line in blocks delemited by 1646 # tab characters. Then we reconstruct every line by adding the 1647 # appropriate number of space between each blocks. 1648 1649 $text = preg_replace_callback('/^.*\t.*$/m', 1650 array(&$this, '_detab_callback'), $text); 1651 1652 return $text; 1653 } 1654 function _detab_callback($matches) { 1655 $line = $matches[0]; 1656 $strlen = $this->utf8_strlen; # strlen function for UTF-8. 1657 1658 # Split in blocks. 1659 $blocks = explode("\t", $line); 1660 # Add each blocks to the line. 1661 $line = $blocks[0]; 1662 unset($blocks[0]); # Do not add first block twice. 1663 foreach ($blocks as $block) { 1664 # Calculate amount of space, insert spaces, insert block. 1665 $amount = $this->tab_width - 1666 $strlen($line, 'UTF-8') % $this->tab_width; 1667 $line .= str_repeat(" ", $amount) . $block; 1668 } 1669 return $line; 1670 } 1671 function _initDetab() { 1672 # 1673 # Check for the availability of the function in the `utf8_strlen` property 1674 # (initially `mb_strlen`). If the function is not available, create a 1675 # function that will loosely count the number of UTF-8 characters with a 1676 # regular expression. 1677 # 1678 if (function_exists($this->utf8_strlen)) return; 1679 $this->utf8_strlen = create_function('$text', 'return preg_match_all( 1680 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 1681 $text, $m);'); 1682 } 1683 1684 1685 function unhash($text) { 1686 # 1687 # Swap back in all the tags hashed by _HashHTMLBlocks. 1688 # 1689 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 1690 array(&$this, '_unhash_callback'), $text); 1691 } 1692 function _unhash_callback($matches) { 1693 return $this->html_hashes[$matches[0]]; 1694 } 1695 1696} 1697 1698 1699# 1700# Markdown Extra Parser Class 1701# 1702 1703class MarkdownExtra_Parser extends Markdown_Parser { 1704 1705 ### Configuration Variables ### 1706 1707 # Prefix for footnote ids. 1708 var $fn_id_prefix = ""; 1709 1710 # Optional title attribute for footnote links and backlinks. 1711 var $fn_link_title = MARKDOWN_FN_LINK_TITLE; 1712 var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE; 1713 1714 # Optional class attribute for footnote links and backlinks. 1715 var $fn_link_class = MARKDOWN_FN_LINK_CLASS; 1716 var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS; 1717 1718 # Optional class prefix for fenced code block. 1719 var $code_class_prefix = MARKDOWN_CODE_CLASS_PREFIX; 1720 # Class attribute for code blocks goes on the `code` tag; 1721 # setting this to true will put attributes on the `pre` tag instead. 1722 var $code_attr_on_pre = MARKDOWN_CODE_ATTR_ON_PRE; 1723 1724 # Predefined abbreviations. 1725 var $predef_abbr = array(); 1726 1727 1728 ### Parser Implementation ### 1729 1730 function MarkdownExtra_Parser() { 1731 # 1732 # Constructor function. Initialize the parser object. 1733 # 1734 # Add extra escapable characters before parent constructor 1735 # initialize the table. 1736 $this->escape_chars .= ':|'; 1737 1738 # Insert extra document, block, and span transformations. 1739 # Parent constructor will do the sorting. 1740 $this->document_gamut += array( 1741 "doFencedCodeBlocks" => 5, 1742 "stripFootnotes" => 15, 1743 "stripAbbreviations" => 25, 1744 "appendFootnotes" => 50, 1745 ); 1746 $this->block_gamut += array( 1747 "doFencedCodeBlocks" => 5, 1748 "doTables" => 15, 1749 "doDefLists" => 45, 1750 ); 1751 $this->span_gamut += array( 1752 "doFootnotes" => 5, 1753 "doAbbreviations" => 70, 1754 ); 1755 1756 parent::Markdown_Parser(); 1757 } 1758 1759 1760 # Extra variables used during extra transformations. 1761 var $footnotes = array(); 1762 var $footnotes_ordered = array(); 1763 var $footnotes_ref_count = array(); 1764 var $footnotes_numbers = array(); 1765 var $abbr_desciptions = array(); 1766 var $abbr_word_re = ''; 1767 1768 # Give the current footnote number. 1769 var $footnote_counter = 1; 1770 1771 1772 function setup() { 1773 # 1774 # Setting up Extra-specific variables. 1775 # 1776 parent::setup(); 1777 1778 $this->footnotes = array(); 1779 $this->footnotes_ordered = array(); 1780 $this->footnotes_ref_count = array(); 1781 $this->footnotes_numbers = array(); 1782 $this->abbr_desciptions = array(); 1783 $this->abbr_word_re = ''; 1784 $this->footnote_counter = 1; 1785 1786 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { 1787 if ($this->abbr_word_re) 1788 $this->abbr_word_re .= '|'; 1789 $this->abbr_word_re .= preg_quote($abbr_word); 1790 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 1791 } 1792 } 1793 1794 function teardown() { 1795 # 1796 # Clearing Extra-specific variables. 1797 # 1798 $this->footnotes = array(); 1799 $this->footnotes_ordered = array(); 1800 $this->footnotes_ref_count = array(); 1801 $this->footnotes_numbers = array(); 1802 $this->abbr_desciptions = array(); 1803 $this->abbr_word_re = ''; 1804 1805 parent::teardown(); 1806 } 1807 1808 1809 ### Extra Attribute Parser ### 1810 1811 # Expression to use to catch attributes (includes the braces) 1812 var $id_class_attr_catch_re = '\{((?:[ ]*[#.][-_:a-zA-Z0-9]+){1,})[ ]*\}'; 1813 # Expression to use when parsing in a context when no capture is desired 1814 var $id_class_attr_nocatch_re = '\{(?:[ ]*[#.][-_:a-zA-Z0-9]+){1,}[ ]*\}'; 1815 1816 function doExtraAttributes($tag_name, $attr) { 1817 # 1818 # Parse attributes caught by the $this->id_class_attr_catch_re expression 1819 # and return the HTML-formatted list of attributes. 1820 # 1821 # Currently supported attributes are .class and #id. 1822 # 1823 if (empty($attr)) return ""; 1824 1825 # Split on components 1826 preg_match_all('/[#.][-_:a-zA-Z0-9]+/', $attr, $matches); 1827 $elements = $matches[0]; 1828 1829 # handle classes and ids (only first id taken into account) 1830 $classes = array(); 1831 $id = false; 1832 foreach ($elements as $element) { 1833 if ($element{0} == '.') { 1834 $classes[] = substr($element, 1); 1835 } else if ($element{0} == '#') { 1836 if ($id === false) $id = substr($element, 1); 1837 } 1838 } 1839 1840 # compose attributes as string 1841 $attr_str = ""; 1842 if (!empty($id)) { 1843 $attr_str .= ' id="'.$id.'"'; 1844 } 1845 if (!empty($classes)) { 1846 $attr_str .= ' class="'.implode(" ", $classes).'"'; 1847 } 1848 return $attr_str; 1849 } 1850 1851 1852 function stripLinkDefinitions($text) { 1853 # 1854 # Strips link definitions from text, stores the URLs and titles in 1855 # hash references. 1856 # 1857 $less_than_tab = $this->tab_width - 1; 1858 1859 # Link defs are in the form: ^[id]: url "optional title" 1860 $text = preg_replace_callback('{ 1861 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 1862 [ ]* 1863 \n? # maybe *one* newline 1864 [ ]* 1865 (?: 1866 <(.+?)> # url = $2 1867 | 1868 (\S+?) # url = $3 1869 ) 1870 [ ]* 1871 \n? # maybe one newline 1872 [ ]* 1873 (?: 1874 (?<=\s) # lookbehind for whitespace 1875 ["(] 1876 (.*?) # title = $4 1877 [")] 1878 [ ]* 1879 )? # title is optional 1880 (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr 1881 (?:\n+|\Z) 1882 }xm', 1883 array(&$this, '_stripLinkDefinitions_callback'), 1884 $text); 1885 return $text; 1886 } 1887 function _stripLinkDefinitions_callback($matches) { 1888 $link_id = strtolower($matches[1]); 1889 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 1890 $this->urls[$link_id] = $url; 1891 $this->titles[$link_id] =& $matches[4]; 1892 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]); 1893 return ''; # String that will replace the block 1894 } 1895 1896 1897 ### HTML Block Parser ### 1898 1899 # Tags that are always treated as block tags: 1900 var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption'; 1901 1902 # Tags treated as block tags only if the opening tag is alone on its line: 1903 var $context_block_tags_re = 'script|noscript|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video'; 1904 1905 # Tags where markdown="1" default to span mode: 1906 var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 1907 1908 # Tags which must not have their contents modified, no matter where 1909 # they appear: 1910 var $clean_tags_re = 'script|math|svg'; 1911 1912 # Tags that do not need to be closed. 1913 var $auto_close_tags_re = 'hr|img|param|source|track'; 1914 1915 1916 function hashHTMLBlocks($text) { 1917 # 1918 # Hashify HTML Blocks and "clean tags". 1919 # 1920 # We only want to do this for block-level HTML tags, such as headers, 1921 # lists, and tables. That's because we still want to wrap <p>s around 1922 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 1923 # phrase emphasis, and spans. The list of tags we're looking for is 1924 # hard-coded. 1925 # 1926 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls 1927 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 1928 # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back 1929 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 1930 # These two functions are calling each other. It's recursive! 1931 # 1932 if ($this->no_markup) return $text; 1933 1934 # 1935 # Call the HTML-in-Markdown hasher. 1936 # 1937 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 1938 1939 return $text; 1940 } 1941 function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 1942 $enclosing_tag_re = '', $span = false) 1943 { 1944 # 1945 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 1946 # 1947 # * $indent is the number of space to be ignored when checking for code 1948 # blocks. This is important because if we don't take the indent into 1949 # account, something like this (which looks right) won't work as expected: 1950 # 1951 # <div> 1952 # <div markdown="1"> 1953 # Hello World. <-- Is this a Markdown code block or text? 1954 # </div> <-- Is this a Markdown code block or a real tag? 1955 # <div> 1956 # 1957 # If you don't like this, just don't indent the tag on which 1958 # you apply the markdown="1" attribute. 1959 # 1960 # * If $enclosing_tag_re is not empty, stops at the first unmatched closing 1961 # tag with that name. Nested tags supported. 1962 # 1963 # * If $span is true, text inside must treated as span. So any double 1964 # newline will be replaced by a single newline so that it does not create 1965 # paragraphs. 1966 # 1967 # Returns an array of that form: ( processed text , remaining text ) 1968 # 1969 if ($text === '') return array('', ''); 1970 1971 # Regex to check for the presense of newlines around a block tag. 1972 $newline_before_re = '/(?:^\n?|\n\n)*$/'; 1973 $newline_after_re = 1974 '{ 1975 ^ # Start of text following the tag. 1976 (?>[ ]*<!--.*?-->)? # Optional comment. 1977 [ ]*\n # Must be followed by newline. 1978 }xs'; 1979 1980 # Regex to match any tag. 1981 $block_tag_re = 1982 '{ 1983 ( # $2: Capture whole tag. 1984 </? # Any opening or closing tag. 1985 (?> # Tag name. 1986 '.$this->block_tags_re.' | 1987 '.$this->context_block_tags_re.' | 1988 '.$this->clean_tags_re.' | 1989 (?!\s)'.$enclosing_tag_re.' 1990 ) 1991 (?: 1992 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 1993 (?> 1994 ".*?" | # Double quotes (can contain `>`) 1995 \'.*?\' | # Single quotes (can contain `>`) 1996 .+? # Anything but quotes and `>`. 1997 )*? 1998 )? 1999 > # End of tag. 2000 | 2001 <!-- .*? --> # HTML Comment 2002 | 2003 <\?.*?\?> | <%.*?%> # Processing instruction 2004 | 2005 <!\[CDATA\[.*?\]\]> # CData Block 2006 '. ( !$span ? ' # If not in span. 2007 | 2008 # Indented code block 2009 (?: ^[ ]*\n | ^ | \n[ ]*\n ) 2010 [ ]{'.($indent+4).'}[^\n]* \n 2011 (?> 2012 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n 2013 )* 2014 | 2015 # Fenced code block marker 2016 (?<= ^ | \n ) 2017 [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,}) 2018 [ ]* 2019 (?: 2020 \.?[-_:a-zA-Z0-9]+ # standalone class name 2021 | 2022 '.$this->id_class_attr_nocatch_re.' # extra attributes 2023 )? 2024 [ ]* 2025 (?= \n ) 2026 ' : '' ). ' # End (if not is span). 2027 | 2028 # Code span marker 2029 # Note, this regex needs to go after backtick fenced 2030 # code blocks but it should also be kept outside of the 2031 # "if not in span" condition adding backticks to the parser 2032 `+ 2033 ) 2034 }xs'; 2035 2036 2037 $depth = 0; # Current depth inside the tag tree. 2038 $parsed = ""; # Parsed text that will be returned. 2039 2040 # 2041 # Loop through every tag until we find the closing tag of the parent 2042 # or loop until reaching the end of text if no parent tag specified. 2043 # 2044 do { 2045 # 2046 # Split the text using the first $tag_match pattern found. 2047 # Text before pattern will be first in the array, text after 2048 # pattern will be at the end, and between will be any catches made 2049 # by the pattern. 2050 # 2051 $parts = preg_split($block_tag_re, $text, 2, 2052 PREG_SPLIT_DELIM_CAPTURE); 2053 2054 # If in Markdown span mode, add a empty-string span-level hash 2055 # after each newline to prevent triggering any block element. 2056 if ($span) { 2057 $void = $this->hashPart("", ':'); 2058 $newline = "$void\n"; 2059 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 2060 } 2061 2062 $parsed .= $parts[0]; # Text before current tag. 2063 2064 # If end of $text has been reached. Stop loop. 2065 if (count($parts) < 3) { 2066 $text = ""; 2067 break; 2068 } 2069 2070 $tag = $parts[1]; # Tag to handle. 2071 $text = $parts[2]; # Remaining text after current tag. 2072 $tag_re = preg_quote($tag); # For use in a regular expression. 2073 2074 # 2075 # Check for: Fenced code block marker. 2076 # Note: need to recheck the whole tag to disambiguate backtick 2077 # fences from code spans 2078 # 2079 if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) { 2080 # Fenced code block marker: find matching end marker. 2081 $fence_indent = strlen($capture[1]); # use captured indent in re 2082 $fence_re = $capture[2]; # use captured fence in re 2083 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text, 2084 $matches)) 2085 { 2086 # End marker found: pass text unchanged until marker. 2087 $parsed .= $tag . $matches[0]; 2088 $text = substr($text, strlen($matches[0])); 2089 } 2090 else { 2091 # No end marker: just skip it. 2092 $parsed .= $tag; 2093 } 2094 } 2095 # 2096 # Check for: Indented code block. 2097 # 2098 else if ($tag{0} == "\n" || $tag{0} == " ") { 2099 # Indented code block: pass it unchanged, will be handled 2100 # later. 2101 $parsed .= $tag; 2102 } 2103 # 2104 # Check for: Code span marker 2105 # Note: need to check this after backtick fenced code blocks 2106 # 2107 else if ($tag{0} == "`") { 2108 # Find corresponding end marker. 2109 $tag_re = preg_quote($tag); 2110 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}', 2111 $text, $matches)) 2112 { 2113 # End marker found: pass text unchanged until marker. 2114 $parsed .= $tag . $matches[0]; 2115 $text = substr($text, strlen($matches[0])); 2116 } 2117 else { 2118 # Unmatched marker: just skip it. 2119 $parsed .= $tag; 2120 } 2121 } 2122 # 2123 # Check for: Opening Block level tag or 2124 # Opening Context Block tag (like ins and del) 2125 # used as a block tag (tag is alone on it's line). 2126 # 2127 else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) || 2128 ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) && 2129 preg_match($newline_before_re, $parsed) && 2130 preg_match($newline_after_re, $text) ) 2131 ) 2132 { 2133 # Need to parse tag and following text using the HTML parser. 2134 list($block_text, $text) = 2135 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 2136 2137 # Make sure it stays outside of any paragraph by adding newlines. 2138 $parsed .= "\n\n$block_text\n\n"; 2139 } 2140 # 2141 # Check for: Clean tag (like script, math) 2142 # HTML Comments, processing instructions. 2143 # 2144 else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) || 2145 $tag{1} == '!' || $tag{1} == '?') 2146 { 2147 # Need to parse tag and following text using the HTML parser. 2148 # (don't check for markdown attribute) 2149 list($block_text, $text) = 2150 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 2151 2152 $parsed .= $block_text; 2153 } 2154 # 2155 # Check for: Tag with same name as enclosing tag. 2156 # 2157 else if ($enclosing_tag_re !== '' && 2158 # Same name as enclosing tag. 2159 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag)) 2160 { 2161 # 2162 # Increase/decrease nested tag count. 2163 # 2164 if ($tag{1} == '/') $depth--; 2165 else if ($tag{strlen($tag)-2} != '/') $depth++; 2166 2167 if ($depth < 0) { 2168 # 2169 # Going out of parent element. Clean up and break so we 2170 # return to the calling function. 2171 # 2172 $text = $tag . $text; 2173 break; 2174 } 2175 2176 $parsed .= $tag; 2177 } 2178 else { 2179 $parsed .= $tag; 2180 } 2181 } while ($depth >= 0); 2182 2183 return array($parsed, $text); 2184 } 2185 function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 2186 # 2187 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 2188 # 2189 # * Calls $hash_method to convert any blocks. 2190 # * Stops when the first opening tag closes. 2191 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 2192 # (it is not inside clean tags) 2193 # 2194 # Returns an array of that form: ( processed text , remaining text ) 2195 # 2196 if ($text === '') return array('', ''); 2197 2198 # Regex to match `markdown` attribute inside of a tag. 2199 $markdown_attr_re = ' 2200 { 2201 \s* # Eat whitespace before the `markdown` attribute 2202 markdown 2203 \s*=\s* 2204 (?> 2205 (["\']) # $1: quote delimiter 2206 (.*?) # $2: attribute value 2207 \1 # matching delimiter 2208 | 2209 ([^\s>]*) # $3: unquoted attribute value 2210 ) 2211 () # $4: make $3 always defined (avoid warnings) 2212 }xs'; 2213 2214 # Regex to match any tag. 2215 $tag_re = '{ 2216 ( # $2: Capture whole tag. 2217 </? # Any opening or closing tag. 2218 [\w:$]+ # Tag name. 2219 (?: 2220 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 2221 (?> 2222 ".*?" | # Double quotes (can contain `>`) 2223 \'.*?\' | # Single quotes (can contain `>`) 2224 .+? # Anything but quotes and `>`. 2225 )*? 2226 )? 2227 > # End of tag. 2228 | 2229 <!-- .*? --> # HTML Comment 2230 | 2231 <\?.*?\?> | <%.*?%> # Processing instruction 2232 | 2233 <!\[CDATA\[.*?\]\]> # CData Block 2234 ) 2235 }xs'; 2236 2237 $original_text = $text; # Save original text in case of faliure. 2238 2239 $depth = 0; # Current depth inside the tag tree. 2240 $block_text = ""; # Temporary text holder for current text. 2241 $parsed = ""; # Parsed text that will be returned. 2242 2243 # 2244 # Get the name of the starting tag. 2245 # (This pattern makes $base_tag_name_re safe without quoting.) 2246 # 2247 if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) 2248 $base_tag_name_re = $matches[1]; 2249 2250 # 2251 # Loop through every tag until we find the corresponding closing tag. 2252 # 2253 do { 2254 # 2255 # Split the text using the first $tag_match pattern found. 2256 # Text before pattern will be first in the array, text after 2257 # pattern will be at the end, and between will be any catches made 2258 # by the pattern. 2259 # 2260 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 2261 2262 if (count($parts) < 3) { 2263 # 2264 # End of $text reached with unbalenced tag(s). 2265 # In that case, we return original text unchanged and pass the 2266 # first character as filtered to prevent an infinite loop in the 2267 # parent function. 2268 # 2269 return array($original_text{0}, substr($original_text, 1)); 2270 } 2271 2272 $block_text .= $parts[0]; # Text before current tag. 2273 $tag = $parts[1]; # Tag to handle. 2274 $text = $parts[2]; # Remaining text after current tag. 2275 2276 # 2277 # Check for: Auto-close tag (like <hr/>) 2278 # Comments and Processing Instructions. 2279 # 2280 if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) || 2281 $tag{1} == '!' || $tag{1} == '?') 2282 { 2283 # Just add the tag to the block as if it was text. 2284 $block_text .= $tag; 2285 } 2286 else { 2287 # 2288 # Increase/decrease nested tag count. Only do so if 2289 # the tag's name match base tag's. 2290 # 2291 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) { 2292 if ($tag{1} == '/') $depth--; 2293 else if ($tag{strlen($tag)-2} != '/') $depth++; 2294 } 2295 2296 # 2297 # Check for `markdown="1"` attribute and handle it. 2298 # 2299 if ($md_attr && 2300 preg_match($markdown_attr_re, $tag, $attr_m) && 2301 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) 2302 { 2303 # Remove `markdown` attribute from opening tag. 2304 $tag = preg_replace($markdown_attr_re, '', $tag); 2305 2306 # Check if text inside this tag must be parsed in span mode. 2307 $this->mode = $attr_m[2] . $attr_m[3]; 2308 $span_mode = $this->mode == 'span' || $this->mode != 'block' && 2309 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag); 2310 2311 # Calculate indent before tag. 2312 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { 2313 $strlen = $this->utf8_strlen; 2314 $indent = $strlen($matches[1], 'UTF-8'); 2315 } else { 2316 $indent = 0; 2317 } 2318 2319 # End preceding block with this tag. 2320 $block_text .= $tag; 2321 $parsed .= $this->$hash_method($block_text); 2322 2323 # Get enclosing tag name for the ParseMarkdown function. 2324 # (This pattern makes $tag_name_re safe without quoting.) 2325 preg_match('/^<([\w:$]*)\b/', $tag, $matches); 2326 $tag_name_re = $matches[1]; 2327 2328 # Parse the content using the HTML-in-Markdown parser. 2329 list ($block_text, $text) 2330 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 2331 $tag_name_re, $span_mode); 2332 2333 # Outdent markdown text. 2334 if ($indent > 0) { 2335 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 2336 $block_text); 2337 } 2338 2339 # Append tag content to parsed text. 2340 if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; 2341 else $parsed .= "$block_text"; 2342 2343 # Start over with a new block. 2344 $block_text = ""; 2345 } 2346 else $block_text .= $tag; 2347 } 2348 2349 } while ($depth > 0); 2350 2351 # 2352 # Hash last block text that wasn't processed inside the loop. 2353 # 2354 $parsed .= $this->$hash_method($block_text); 2355 2356 return array($parsed, $text); 2357 } 2358 2359 2360 function hashClean($text) { 2361 # 2362 # Called whenever a tag must be hashed when a function inserts a "clean" tag 2363 # in $text, it passes through this function and is automaticaly escaped, 2364 # blocking invalid nested overlap. 2365 # 2366 return $this->hashPart($text, 'C'); 2367 } 2368 2369 2370 function doAnchors($text) { 2371 # 2372 # Turn Markdown link shortcuts into XHTML <a> tags. 2373 # 2374 if ($this->in_anchor) return $text; 2375 $this->in_anchor = true; 2376 2377 # 2378 # First, handle reference-style links: [link text] [id] 2379 # 2380 $text = preg_replace_callback('{ 2381 ( # wrap whole match in $1 2382 \[ 2383 ('.$this->nested_brackets_re.') # link text = $2 2384 \] 2385 2386 [ ]? # one optional space 2387 (?:\n[ ]*)? # one optional newline followed by spaces 2388 2389 \[ 2390 (.*?) # id = $3 2391 \] 2392 ) 2393 }xs', 2394 array(&$this, '_doAnchors_reference_callback'), $text); 2395 2396 # 2397 # Next, inline-style links: [link text](url "optional title") 2398 # 2399 $text = preg_replace_callback('{ 2400 ( # wrap whole match in $1 2401 \[ 2402 ('.$this->nested_brackets_re.') # link text = $2 2403 \] 2404 \( # literal paren 2405 [ \n]* 2406 (?: 2407 <(.+?)> # href = $3 2408 | 2409 ('.$this->nested_url_parenthesis_re.') # href = $4 2410 ) 2411 [ \n]* 2412 ( # $5 2413 ([\'"]) # quote char = $6 2414 (.*?) # Title = $7 2415 \6 # matching quote 2416 [ \n]* # ignore any spaces/tabs between closing quote and ) 2417 )? # title is optional 2418 \) 2419 (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes 2420 ) 2421 }xs', 2422 array(&$this, '_doAnchors_inline_callback'), $text); 2423 2424 # 2425 # Last, handle reference-style shortcuts: [link text] 2426 # These must come last in case you've also got [link text][1] 2427 # or [link text](/foo) 2428 # 2429 $text = preg_replace_callback('{ 2430 ( # wrap whole match in $1 2431 \[ 2432 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 2433 \] 2434 ) 2435 }xs', 2436 array(&$this, '_doAnchors_reference_callback'), $text); 2437 2438 $this->in_anchor = false; 2439 return $text; 2440 } 2441 function _doAnchors_reference_callback($matches) { 2442 $whole_match = $matches[1]; 2443 $link_text = $matches[2]; 2444 $link_id =& $matches[3]; 2445 2446 if ($link_id == "") { 2447 # for shortcut links like [this][] or [this]. 2448 $link_id = $link_text; 2449 } 2450 2451 # lower-case and turn embedded newlines into spaces 2452 $link_id = strtolower($link_id); 2453 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 2454 2455 if (isset($this->urls[$link_id])) { 2456 $url = $this->urls[$link_id]; 2457 $url = $this->encodeAttribute($url); 2458 2459 $result = "<a href=\"$url\""; 2460 if ( isset( $this->titles[$link_id] ) ) { 2461 $title = $this->titles[$link_id]; 2462 $title = $this->encodeAttribute($title); 2463 $result .= " title=\"$title\""; 2464 } 2465 if (isset($this->ref_attr[$link_id])) 2466 $result .= $this->ref_attr[$link_id]; 2467 2468 $link_text = $this->runSpanGamut($link_text); 2469 $result .= ">$link_text</a>"; 2470 $result = $this->hashPart($result); 2471 } 2472 else { 2473 $result = $whole_match; 2474 } 2475 return $result; 2476 } 2477 function _doAnchors_inline_callback($matches) { 2478 $whole_match = $matches[1]; 2479 $link_text = $this->runSpanGamut($matches[2]); 2480 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 2481 $title =& $matches[7]; 2482 $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]); 2483 2484 2485 $url = $this->encodeAttribute($url); 2486 2487 $result = "<a href=\"$url\""; 2488 if (isset($title)) { 2489 $title = $this->encodeAttribute($title); 2490 $result .= " title=\"$title\""; 2491 } 2492 $result .= $attr; 2493 2494 $link_text = $this->runSpanGamut($link_text); 2495 $result .= ">$link_text</a>"; 2496 2497 return $this->hashPart($result); 2498 } 2499 2500 2501 function doImages($text) { 2502 # 2503 # Turn Markdown image shortcuts into <img> tags. 2504 # 2505 # 2506 # First, handle reference-style labeled images: ![alt text][id] 2507 # 2508 $text = preg_replace_callback('{ 2509 ( # wrap whole match in $1 2510 !\[ 2511 ('.$this->nested_brackets_re.') # alt text = $2 2512 \] 2513 2514 [ ]? # one optional space 2515 (?:\n[ ]*)? # one optional newline followed by spaces 2516 2517 \[ 2518 (.*?) # id = $3 2519 \] 2520 2521 ) 2522 }xs', 2523 array(&$this, '_doImages_reference_callback'), $text); 2524 2525 # 2526 # Next, handle inline images: ![alt text](url "optional title") 2527 # Don't forget: encode * and _ 2528 # 2529 $text = preg_replace_callback('{ 2530 ( # wrap whole match in $1 2531 !\[ 2532 ('.$this->nested_brackets_re.') # alt text = $2 2533 \] 2534 \s? # One optional whitespace character 2535 \( # literal paren 2536 [ \n]* 2537 (?: 2538 <(\S*)> # src url = $3 2539 | 2540 ('.$this->nested_url_parenthesis_re.') # src url = $4 2541 ) 2542 [ \n]* 2543 ( # $5 2544 ([\'"]) # quote char = $6 2545 (.*?) # title = $7 2546 \6 # matching quote 2547 [ \n]* 2548 )? # title is optional 2549 \) 2550 (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes 2551 ) 2552 }xs', 2553 array(&$this, '_doImages_inline_callback'), $text); 2554 2555 return $text; 2556 } 2557 function _doImages_reference_callback($matches) { 2558 $whole_match = $matches[1]; 2559 $alt_text = $matches[2]; 2560 $link_id = strtolower($matches[3]); 2561 2562 if ($link_id == "") { 2563 $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 2564 } 2565 2566 $alt_text = $this->encodeAttribute($alt_text); 2567 if (isset($this->urls[$link_id])) { 2568 $url = $this->encodeAttribute($this->urls[$link_id]); 2569 $result = "<img src=\"$url\" alt=\"$alt_text\""; 2570 if (isset($this->titles[$link_id])) { 2571 $title = $this->titles[$link_id]; 2572 $title = $this->encodeAttribute($title); 2573 $result .= " title=\"$title\""; 2574 } 2575 if (isset($this->ref_attr[$link_id])) 2576 $result .= $this->ref_attr[$link_id]; 2577 $result .= $this->empty_element_suffix; 2578 $result = $this->hashPart($result); 2579 } 2580 else { 2581 # If there's no such link ID, leave intact: 2582 $result = $whole_match; 2583 } 2584 2585 return $result; 2586 } 2587 function _doImages_inline_callback($matches) { 2588 $whole_match = $matches[1]; 2589 $alt_text = $matches[2]; 2590 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 2591 $title =& $matches[7]; 2592 $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); 2593 2594 $alt_text = $this->encodeAttribute($alt_text); 2595 $url = $this->encodeAttribute($url); 2596 $result = "<img src=\"$url\" alt=\"$alt_text\""; 2597 if (isset($title)) { 2598 $title = $this->encodeAttribute($title); 2599 $result .= " title=\"$title\""; # $title already quoted 2600 } 2601 $result .= $attr; 2602 $result .= $this->empty_element_suffix; 2603 2604 return $this->hashPart($result); 2605 } 2606 2607 2608 function doHeaders($text) { 2609 # 2610 # Redefined to add id and class attribute support. 2611 # 2612 # Setext-style headers: 2613 # Header 1 {#header1} 2614 # ======== 2615 # 2616 # Header 2 {#header2 .class1 .class2} 2617 # -------- 2618 # 2619 $text = preg_replace_callback( 2620 '{ 2621 (^.+?) # $1: Header text 2622 (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes 2623 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 2624 }mx', 2625 array(&$this, '_doHeaders_callback_setext'), $text); 2626 2627 # atx-style headers: 2628 # # Header 1 {#header1} 2629 # ## Header 2 {#header2} 2630 # ## Header 2 with closing hashes ## {#header3.class1.class2} 2631 # ... 2632 # ###### Header 6 {.class2} 2633 # 2634 $text = preg_replace_callback('{ 2635 ^(\#{1,6}) # $1 = string of #\'s 2636 [ ]* 2637 (.+?) # $2 = Header text 2638 [ ]* 2639 \#* # optional closing #\'s (not counted) 2640 (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes 2641 [ ]* 2642 \n+ 2643 }xm', 2644 array(&$this, '_doHeaders_callback_atx'), $text); 2645 2646 return $text; 2647 } 2648 function _doHeaders_callback_setext($matches) { 2649 if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) 2650 return $matches[0]; 2651 $level = $matches[3]{0} == '=' ? 1 : 2; 2652 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2]); 2653 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>"; 2654 return "\n" . $this->hashBlock($block) . "\n\n"; 2655 } 2656 function _doHeaders_callback_atx($matches) { 2657 $level = strlen($matches[1]); 2658 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3]); 2659 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>"; 2660 return "\n" . $this->hashBlock($block) . "\n\n"; 2661 } 2662 2663 2664 function doTables($text) { 2665 # 2666 # Form HTML tables. 2667 # 2668 $less_than_tab = $this->tab_width - 1; 2669 # 2670 # Find tables with leading pipe. 2671 # 2672 # | Header 1 | Header 2 2673 # | -------- | -------- 2674 # | Cell 1 | Cell 2 2675 # | Cell 3 | Cell 4 2676 # 2677 $text = preg_replace_callback(' 2678 { 2679 ^ # Start of a line 2680 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2681 [|] # Optional leading pipe (present) 2682 (.+) \n # $1: Header row (at least one pipe) 2683 2684 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2685 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 2686 2687 ( # $3: Cells 2688 (?> 2689 [ ]* # Allowed whitespace. 2690 [|] .* \n # Row content. 2691 )* 2692 ) 2693 (?=\n|\Z) # Stop at final double newline. 2694 }xm', 2695 array(&$this, '_doTable_leadingPipe_callback'), $text); 2696 2697 # 2698 # Find tables without leading pipe. 2699 # 2700 # Header 1 | Header 2 2701 # -------- | -------- 2702 # Cell 1 | Cell 2 2703 # Cell 3 | Cell 4 2704 # 2705 $text = preg_replace_callback(' 2706 { 2707 ^ # Start of a line 2708 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2709 (\S.*[|].*) \n # $1: Header row (at least one pipe) 2710 2711 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2712 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 2713 2714 ( # $3: Cells 2715 (?> 2716 .* [|] .* \n # Row content 2717 )* 2718 ) 2719 (?=\n|\Z) # Stop at final double newline. 2720 }xm', 2721 array(&$this, '_DoTable_callback'), $text); 2722 2723 return $text; 2724 } 2725 function _doTable_leadingPipe_callback($matches) { 2726 $head = $matches[1]; 2727 $underline = $matches[2]; 2728 $content = $matches[3]; 2729 2730 # Remove leading pipe for each row. 2731 $content = preg_replace('/^ *[|]/m', '', $content); 2732 2733 return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 2734 } 2735 function _doTable_callback($matches) { 2736 $head = $matches[1]; 2737 $underline = $matches[2]; 2738 $content = $matches[3]; 2739 2740 # Remove any tailing pipes for each line. 2741 $head = preg_replace('/[|] *$/m', '', $head); 2742 $underline = preg_replace('/[|] *$/m', '', $underline); 2743 $content = preg_replace('/[|] *$/m', '', $content); 2744 2745 # Reading alignement from header underline. 2746 $separators = preg_split('/ *[|] */', $underline); 2747 foreach ($separators as $n => $s) { 2748 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"'; 2749 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"'; 2750 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"'; 2751 else $attr[$n] = ''; 2752 } 2753 2754 # Parsing span elements, including code spans, character escapes, 2755 # and inline HTML tags, so that pipes inside those gets ignored. 2756 $head = $this->parseSpan($head); 2757 $headers = preg_split('/ *[|] */', $head); 2758 $col_count = count($headers); 2759 $attr = array_pad($attr, $col_count, ''); 2760 2761 # Write column headers. 2762 $text = "<table>\n"; 2763 $text .= "<thead>\n"; 2764 $text .= "<tr>\n"; 2765 foreach ($headers as $n => $header) 2766 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n"; 2767 $text .= "</tr>\n"; 2768 $text .= "</thead>\n"; 2769 2770 # Split content by row. 2771 $rows = explode("\n", trim($content, "\n")); 2772 2773 $text .= "<tbody>\n"; 2774 foreach ($rows as $row) { 2775 # Parsing span elements, including code spans, character escapes, 2776 # and inline HTML tags, so that pipes inside those gets ignored. 2777 $row = $this->parseSpan($row); 2778 2779 # Split row by cell. 2780 $row_cells = preg_split('/ *[|] */', $row, $col_count); 2781 $row_cells = array_pad($row_cells, $col_count, ''); 2782 2783 $text .= "<tr>\n"; 2784 foreach ($row_cells as $n => $cell) 2785 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n"; 2786 $text .= "</tr>\n"; 2787 } 2788 $text .= "</tbody>\n"; 2789 $text .= "</table>"; 2790 2791 return $this->hashBlock($text) . "\n"; 2792 } 2793 2794 2795 function doDefLists($text) { 2796 # 2797 # Form HTML definition lists. 2798 # 2799 $less_than_tab = $this->tab_width - 1; 2800 2801 # Re-usable pattern to match any entire dl list: 2802 $whole_list_re = '(?> 2803 ( # $1 = whole list 2804 ( # $2 2805 [ ]{0,'.$less_than_tab.'} 2806 ((?>.*\S.*\n)+) # $3 = defined term 2807 \n? 2808 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2809 ) 2810 (?s:.+?) 2811 ( # $4 2812 \z 2813 | 2814 \n{2,} 2815 (?=\S) 2816 (?! # Negative lookahead for another term 2817 [ ]{0,'.$less_than_tab.'} 2818 (?: \S.*\n )+? # defined term 2819 \n? 2820 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2821 ) 2822 (?! # Negative lookahead for another definition 2823 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2824 ) 2825 ) 2826 ) 2827 )'; // mx 2828 2829 $text = preg_replace_callback('{ 2830 (?>\A\n?|(?<=\n\n)) 2831 '.$whole_list_re.' 2832 }mx', 2833 array(&$this, '_doDefLists_callback'), $text); 2834 2835 return $text; 2836 } 2837 function _doDefLists_callback($matches) { 2838 # Re-usable patterns to match list item bullets and number markers: 2839 $list = $matches[1]; 2840 2841 # Turn double returns into triple returns, so that we can make a 2842 # paragraph for the last item in a list, if necessary: 2843 $result = trim($this->processDefListItems($list)); 2844 $result = "<dl>\n" . $result . "\n</dl>"; 2845 return $this->hashBlock($result) . "\n\n"; 2846 } 2847 2848 2849 function processDefListItems($list_str) { 2850 # 2851 # Process the contents of a single definition list, splitting it 2852 # into individual term and definition list items. 2853 # 2854 $less_than_tab = $this->tab_width - 1; 2855 2856 # trim trailing blank lines: 2857 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 2858 2859 # Process definition terms. 2860 $list_str = preg_replace_callback('{ 2861 (?>\A\n?|\n\n+) # leading line 2862 ( # definition terms = $1 2863 [ ]{0,'.$less_than_tab.'} # leading whitespace 2864 (?!\:[ ]|[ ]) # negative lookahead for a definition 2865 # mark (colon) or more whitespace. 2866 (?> \S.* \n)+? # actual term (not whitespace). 2867 ) 2868 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 2869 # with a definition mark. 2870 }xm', 2871 array(&$this, '_processDefListItems_callback_dt'), $list_str); 2872 2873 # Process actual definitions. 2874 $list_str = preg_replace_callback('{ 2875 \n(\n+)? # leading line = $1 2876 ( # marker space = $2 2877 [ ]{0,'.$less_than_tab.'} # whitespace before colon 2878 \:[ ]+ # definition mark (colon) 2879 ) 2880 ((?s:.+?)) # definition text = $3 2881 (?= \n+ # stop at next definition mark, 2882 (?: # next term or end of text 2883 [ ]{0,'.$less_than_tab.'} \:[ ] | 2884 <dt> | \z 2885 ) 2886 ) 2887 }xm', 2888 array(&$this, '_processDefListItems_callback_dd'), $list_str); 2889 2890 return $list_str; 2891 } 2892 function _processDefListItems_callback_dt($matches) { 2893 $terms = explode("\n", trim($matches[1])); 2894 $text = ''; 2895 foreach ($terms as $term) { 2896 $term = $this->runSpanGamut(trim($term)); 2897 $text .= "\n<dt>" . $term . "</dt>"; 2898 } 2899 return $text . "\n"; 2900 } 2901 function _processDefListItems_callback_dd($matches) { 2902 $leading_line = $matches[1]; 2903 $marker_space = $matches[2]; 2904 $def = $matches[3]; 2905 2906 if ($leading_line || preg_match('/\n{2,}/', $def)) { 2907 # Replace marker with the appropriate whitespace indentation 2908 $def = str_repeat(' ', strlen($marker_space)) . $def; 2909 $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 2910 $def = "\n". $def ."\n"; 2911 } 2912 else { 2913 $def = rtrim($def); 2914 $def = $this->runSpanGamut($this->outdent($def)); 2915 } 2916 2917 return "\n<dd>" . $def . "</dd>\n"; 2918 } 2919 2920 2921 function doFencedCodeBlocks($text) { 2922 # 2923 # Adding the fenced code block syntax to regular Markdown: 2924 # 2925 # ~~~ 2926 # Code block 2927 # ~~~ 2928 # 2929 $less_than_tab = $this->tab_width; 2930 2931 $text = preg_replace_callback('{ 2932 (?:\n|\A) 2933 # 1: Opening marker 2934 ( 2935 (?:~{3,}|`{3,}) # 3 or more tildes/backticks. 2936 ) 2937 [ ]* 2938 (?: 2939 \.?([-_:a-zA-Z0-9]+) # 2: standalone class name 2940 | 2941 '.$this->id_class_attr_catch_re.' # 3: Extra attributes 2942 )? 2943 [ ]* \n # Whitespace and newline following marker. 2944 2945 # 4: Content 2946 ( 2947 (?> 2948 (?!\1 [ ]* \n) # Not a closing marker. 2949 .*\n+ 2950 )+ 2951 ) 2952 2953 # Closing marker. 2954 \1 [ ]* (?= \n ) 2955 }xm', 2956 array(&$this, '_doFencedCodeBlocks_callback'), $text); 2957 2958 return $text; 2959 } 2960 function _doFencedCodeBlocks_callback($matches) { 2961 $classname =& $matches[2]; 2962 $attrs =& $matches[3]; 2963 $codeblock = $matches[4]; 2964 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 2965 $codeblock = preg_replace_callback('/^\n+/', 2966 array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock); 2967 2968 if ($classname != "") { 2969 if ($classname{0} == '.') 2970 $classname = substr($classname, 1); 2971 $attr_str = ' class="'.$this->code_class_prefix.$classname.'"'; 2972 } else { 2973 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs); 2974 } 2975 $pre_attr_str = $this->code_attr_on_pre ? $attr_str : ''; 2976 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str; 2977 $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>"; 2978 2979 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 2980 } 2981 function _doFencedCodeBlocks_newlines($matches) { 2982 return str_repeat("<br$this->empty_element_suffix", 2983 strlen($matches[0])); 2984 } 2985 2986 2987 # 2988 # Redefining emphasis markers so that emphasis by underscore does not 2989 # work in the middle of a word. 2990 # 2991 var $em_relist = array( 2992 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![\.,:;]\s)', 2993 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)', 2994 '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])', 2995 ); 2996 var $strong_relist = array( 2997 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![\.,:;]\s)', 2998 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)', 2999 '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])', 3000 ); 3001 var $em_strong_relist = array( 3002 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![\.,:;]\s)', 3003 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)', 3004 '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])', 3005 ); 3006 3007 3008 function formParagraphs($text) { 3009 # 3010 # Params: 3011 # $text - string to process with html <p> tags 3012 # 3013 # Strip leading and trailing lines: 3014 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 3015 3016 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 3017 3018 # 3019 # Wrap <p> tags and unhashify HTML blocks 3020 # 3021 foreach ($grafs as $key => $value) { 3022 $value = trim($this->runSpanGamut($value)); 3023 3024 # Check if this should be enclosed in a paragraph. 3025 # Clean tag hashes & block tag hashes are left alone. 3026 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 3027 3028 if ($is_p) { 3029 $value = "<p>$value</p>"; 3030 } 3031 $grafs[$key] = $value; 3032 } 3033 3034 # Join grafs in one text, then unhash HTML tags. 3035 $text = implode("\n\n", $grafs); 3036 3037 # Finish by removing any tag hashes still present in $text. 3038 $text = $this->unhash($text); 3039 3040 return $text; 3041 } 3042 3043 3044 ### Footnotes 3045 3046 function stripFootnotes($text) { 3047 # 3048 # Strips link definitions from text, stores the URLs and titles in 3049 # hash references. 3050 # 3051 $less_than_tab = $this->tab_width - 1; 3052 3053 # Link defs are in the form: [^id]: url "optional title" 3054 $text = preg_replace_callback('{ 3055 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1 3056 [ ]* 3057 \n? # maybe *one* newline 3058 ( # text = $2 (no blank lines allowed) 3059 (?: 3060 .+ # actual text 3061 | 3062 \n # newlines but 3063 (?!\[\^.+?\]:\s)# negative lookahead for footnote marker. 3064 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 3065 # by non-indented content 3066 )* 3067 ) 3068 }xm', 3069 array(&$this, '_stripFootnotes_callback'), 3070 $text); 3071 return $text; 3072 } 3073 function _stripFootnotes_callback($matches) { 3074 $note_id = $this->fn_id_prefix . $matches[1]; 3075 $this->footnotes[$note_id] = $this->outdent($matches[2]); 3076 return ''; # String that will replace the block 3077 } 3078 3079 3080 function doFootnotes($text) { 3081 # 3082 # Replace footnote references in $text [^id] with a special text-token 3083 # which will be replaced by the actual footnote marker in appendFootnotes. 3084 # 3085 if (!$this->in_anchor) { 3086 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 3087 } 3088 return $text; 3089 } 3090 3091 3092 function appendFootnotes($text) { 3093 # 3094 # Append footnote list to text. 3095 # 3096 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 3097 array(&$this, '_appendFootnotes_callback'), $text); 3098 3099 if (!empty($this->footnotes_ordered)) { 3100 $text .= "\n\n"; 3101 $text .= "<div class=\"footnotes\">\n"; 3102 $text .= "<hr". $this->empty_element_suffix ."\n"; 3103 $text .= "<ol>\n\n"; 3104 3105 $attr = " rev=\"footnote\""; 3106 if ($this->fn_backlink_class != "") { 3107 $class = $this->fn_backlink_class; 3108 $class = $this->encodeAttribute($class); 3109 $attr .= " class=\"$class\""; 3110 } 3111 if ($this->fn_backlink_title != "") { 3112 $title = $this->fn_backlink_title; 3113 $title = $this->encodeAttribute($title); 3114 $attr .= " title=\"$title\""; 3115 } 3116 $num = 0; 3117 3118 while (!empty($this->footnotes_ordered)) { 3119 $footnote = reset($this->footnotes_ordered); 3120 $note_id = key($this->footnotes_ordered); 3121 unset($this->footnotes_ordered[$note_id]); 3122 $ref_count = $this->footnotes_ref_count[$note_id]; 3123 unset($this->footnotes_ref_count[$note_id]); 3124 unset($this->footnotes[$note_id]); 3125 3126 $footnote .= "\n"; # Need to append newline before parsing. 3127 $footnote = $this->runBlockGamut("$footnote\n"); 3128 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 3129 array(&$this, '_appendFootnotes_callback'), $footnote); 3130 3131 $attr = str_replace("%%", ++$num, $attr); 3132 $note_id = $this->encodeAttribute($note_id); 3133 3134 # Prepare backlink, multiple backlinks if multiple references 3135 $backlink = "<a href=\"#fnref:$note_id\"$attr>↩</a>"; 3136 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) { 3137 $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>↩</a>"; 3138 } 3139 # Add backlink to last paragraph; create new paragraph if needed. 3140 if (preg_match('{</p>$}', $footnote)) { 3141 $footnote = substr($footnote, 0, -4) . " $backlink</p>"; 3142 } else { 3143 $footnote .= "\n\n<p>$backlink</p>"; 3144 } 3145 3146 $text .= "<li id=\"fn:$note_id\">\n"; 3147 $text .= $footnote . "\n"; 3148 $text .= "</li>\n\n"; 3149 } 3150 3151 $text .= "</ol>\n"; 3152 $text .= "</div>"; 3153 } 3154 return $text; 3155 } 3156 function _appendFootnotes_callback($matches) { 3157 $node_id = $this->fn_id_prefix . $matches[1]; 3158 3159 # Create footnote marker only if it has a corresponding footnote *and* 3160 # the footnote hasn't been used by another marker. 3161 if (isset($this->footnotes[$node_id])) { 3162 $num =& $this->footnotes_numbers[$node_id]; 3163 if (!isset($num)) { 3164 # Transfer footnote content to the ordered list and give it its 3165 # number 3166 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 3167 $this->footnotes_ref_count[$node_id] = 1; 3168 $num = $this->footnote_counter++; 3169 $ref_count_mark = ''; 3170 } else { 3171 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1; 3172 } 3173 3174 $attr = " rel=\"footnote\""; 3175 if ($this->fn_link_class != "") { 3176 $class = $this->fn_link_class; 3177 $class = $this->encodeAttribute($class); 3178 $attr .= " class=\"$class\""; 3179 } 3180 if ($this->fn_link_title != "") { 3181 $title = $this->fn_link_title; 3182 $title = $this->encodeAttribute($title); 3183 $attr .= " title=\"$title\""; 3184 } 3185 3186 $attr = str_replace("%%", $num, $attr); 3187 $node_id = $this->encodeAttribute($node_id); 3188 3189 return 3190 "<sup id=\"fnref$ref_count_mark:$node_id\">". 3191 "<a href=\"#fn:$node_id\"$attr>$num</a>". 3192 "</sup>"; 3193 } 3194 3195 return "[^".$matches[1]."]"; 3196 } 3197 3198 3199 ### Abbreviations ### 3200 3201 function stripAbbreviations($text) { 3202 # 3203 # Strips abbreviations from text, stores titles in hash references. 3204 # 3205 $less_than_tab = $this->tab_width - 1; 3206 3207 # Link defs are in the form: [id]*: url "optional title" 3208 $text = preg_replace_callback('{ 3209 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1 3210 (.*) # text = $2 (no blank lines allowed) 3211 }xm', 3212 array(&$this, '_stripAbbreviations_callback'), 3213 $text); 3214 return $text; 3215 } 3216 function _stripAbbreviations_callback($matches) { 3217 $abbr_word = $matches[1]; 3218 $abbr_desc = $matches[2]; 3219 if ($this->abbr_word_re) 3220 $this->abbr_word_re .= '|'; 3221 $this->abbr_word_re .= preg_quote($abbr_word); 3222 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 3223 return ''; # String that will replace the block 3224 } 3225 3226 3227 function doAbbreviations($text) { 3228 # 3229 # Find defined abbreviations in text and wrap them in <abbr> elements. 3230 # 3231 if ($this->abbr_word_re) { 3232 // cannot use the /x modifier because abbr_word_re may 3233 // contain significant spaces: 3234 $text = preg_replace_callback('{'. 3235 '(?<![\w\x1A])'. 3236 '(?:'.$this->abbr_word_re.')'. 3237 '(?![\w\x1A])'. 3238 '}', 3239 array(&$this, '_doAbbreviations_callback'), $text); 3240 } 3241 return $text; 3242 } 3243 function _doAbbreviations_callback($matches) { 3244 $abbr = $matches[0]; 3245 if (isset($this->abbr_desciptions[$abbr])) { 3246 $desc = $this->abbr_desciptions[$abbr]; 3247 if (empty($desc)) { 3248 return $this->hashPart("<abbr>$abbr</abbr>"); 3249 } else { 3250 $desc = $this->encodeAttribute($desc); 3251 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>"); 3252 } 3253 } else { 3254 return $matches[0]; 3255 } 3256 } 3257 3258} 3259 3260 3261/* 3262 3263PHP Markdown Extra 3264================== 3265 3266Description 3267----------- 3268 3269This is a PHP port of the original Markdown formatter written in Perl 3270by John Gruber. This special "Extra" version of PHP Markdown features 3271further enhancements to the syntax for making additional constructs 3272such as tables and definition list. 3273 3274Markdown is a text-to-HTML filter; it translates an easy-to-read / 3275easy-to-write structured text format into HTML. Markdown's text format 3276is mostly similar to that of plain text email, and supports features such 3277as headers, *emphasis*, code blocks, blockquotes, and links. 3278 3279Markdown's syntax is designed not as a generic markup language, but 3280specifically to serve as a front-end to (X)HTML. You can use span-level 3281HTML tags anywhere in a Markdown document, and you can use block level 3282HTML tags (like <div> and <table> as well). 3283 3284For more information about Markdown's syntax, see: 3285 3286<http://daringfireball.net/projects/markdown/> 3287 3288 3289Bugs 3290---- 3291 3292To file bug reports please send email to: 3293 3294<michel.fortin@michelf.ca> 3295 3296Please include with your report: (1) the example input; (2) the output you 3297expected; (3) the output Markdown actually produced. 3298 3299 3300Version History 3301--------------- 3302 3303See the readme file for detailed release notes for this version. 3304 3305 3306Copyright and License 3307--------------------- 3308 3309PHP Markdown & Extra 3310Copyright (c) 2004-2013 Michel Fortin 3311<http://michelf.ca/> 3312All rights reserved. 3313 3314Based on Markdown 3315Copyright (c) 2003-2006 John Gruber 3316<http://daringfireball.net/> 3317All rights reserved. 3318 3319Redistribution and use in source and binary forms, with or without 3320modification, are permitted provided that the following conditions are 3321met: 3322 3323* Redistributions of source code must retain the above copyright notice, 3324 this list of conditions and the following disclaimer. 3325 3326* Redistributions in binary form must reproduce the above copyright 3327 notice, this list of conditions and the following disclaimer in the 3328 documentation and/or other materials provided with the distribution. 3329 3330* Neither the name "Markdown" nor the names of its contributors may 3331 be used to endorse or promote products derived from this software 3332 without specific prior written permission. 3333 3334This software is provided by the copyright holders and contributors "as 3335is" and any express or implied warranties, including, but not limited 3336to, the implied warranties of merchantability and fitness for a 3337particular purpose are disclaimed. In no event shall the copyright owner 3338or contributors be liable for any direct, indirect, incidental, special, 3339exemplary, or consequential damages (including, but not limited to, 3340procurement of substitute goods or services; loss of use, data, or 3341profits; or business interruption) however caused and on any theory of 3342liability, whether in contract, strict liability, or tort (including 3343negligence or otherwise) arising in any way out of the use of this 3344software, even if advised of the possibility of such damage. 3345 3346*/ 3347?> 3348