1<?php 2 3# 4# 5# Parsedown 6# http://parsedown.org 7# 8# (c) Emanuil Rusev 9# http://erusev.com 10# 11# For the full license information, view the LICENSE file that was distributed 12# with this source code. 13# 14# 15 16class Parsedown 17{ 18 # ~ 19 20 const version = '1.7.4'; 21 22 # ~ 23 24 function text($text) 25 { 26 # make sure no definitions are set 27 $this->DefinitionData = array(); 28 29 # standardize line breaks 30 $text = str_replace(array("\r\n", "\r"), "\n", $text); 31 32 # remove surrounding line breaks 33 $text = trim($text, "\n"); 34 35 # split text into lines 36 $lines = explode("\n", $text); 37 38 # iterate through lines to identify blocks 39 $markup = $this->lines($lines); 40 41 # trim line breaks 42 $markup = trim($markup, "\n"); 43 44 return $markup; 45 } 46 47 # 48 # Setters 49 # 50 51 function setBreaksEnabled($breaksEnabled) 52 { 53 $this->breaksEnabled = $breaksEnabled; 54 55 return $this; 56 } 57 58 protected $breaksEnabled; 59 60 function setMarkupEscaped($markupEscaped) 61 { 62 $this->markupEscaped = $markupEscaped; 63 64 return $this; 65 } 66 67 protected $markupEscaped; 68 69 function setUrlsLinked($urlsLinked) 70 { 71 $this->urlsLinked = $urlsLinked; 72 73 return $this; 74 } 75 76 protected $urlsLinked = true; 77 78 function setSafeMode($safeMode) 79 { 80 $this->safeMode = (bool) $safeMode; 81 82 return $this; 83 } 84 85 protected $safeMode; 86 87 protected $safeLinksWhitelist = array( 88 'http://', 89 'https://', 90 'ftp://', 91 'ftps://', 92 'mailto:', 93 'data:image/png;base64,', 94 'data:image/gif;base64,', 95 'data:image/jpeg;base64,', 96 'irc:', 97 'ircs:', 98 'git:', 99 'ssh:', 100 'news:', 101 'steam:', 102 ); 103 104 # 105 # Lines 106 # 107 108 protected $BlockTypes = array( 109 '#' => array('Header'), 110 '*' => array('Rule', 'List'), 111 '+' => array('List'), 112 '-' => array('SetextHeader', 'Table', 'Rule', 'List'), 113 '0' => array('List'), 114 '1' => array('List'), 115 '2' => array('List'), 116 '3' => array('List'), 117 '4' => array('List'), 118 '5' => array('List'), 119 '6' => array('List'), 120 '7' => array('List'), 121 '8' => array('List'), 122 '9' => array('List'), 123 ':' => array('Table'), 124 '<' => array('Comment', 'Markup'), 125 '=' => array('SetextHeader'), 126 '>' => array('Quote'), 127 '[' => array('Reference'), 128 '_' => array('Rule'), 129 '`' => array('FencedCode'), 130 '|' => array('Table'), 131 '~' => array('FencedCode'), 132 ); 133 134 # ~ 135 136 protected $unmarkedBlockTypes = array( 137 'Code', 138 ); 139 140 # 141 # Blocks 142 # 143 144 protected function lines(array $lines) 145 { 146 $CurrentBlock = null; 147 148 foreach ($lines as $line) 149 { 150 if (chop($line) === '') 151 { 152 if (isset($CurrentBlock)) 153 { 154 $CurrentBlock['interrupted'] = true; 155 } 156 157 continue; 158 } 159 160 if (strpos($line, "\t") !== false) 161 { 162 $parts = explode("\t", $line); 163 164 $line = $parts[0]; 165 166 unset($parts[0]); 167 168 foreach ($parts as $part) 169 { 170 $shortage = 4 - mb_strlen($line, 'utf-8') % 4; 171 172 $line .= str_repeat(' ', $shortage); 173 $line .= $part; 174 } 175 } 176 177 $indent = 0; 178 179 while (isset($line[$indent]) and $line[$indent] === ' ') 180 { 181 $indent ++; 182 } 183 184 $text = $indent > 0 ? substr($line, $indent) : $line; 185 186 # ~ 187 188 $Line = array('body' => $line, 'indent' => $indent, 'text' => $text); 189 190 # ~ 191 192 if (isset($CurrentBlock['continuable'])) 193 { 194 $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock); 195 196 if (isset($Block)) 197 { 198 $CurrentBlock = $Block; 199 200 continue; 201 } 202 else 203 { 204 if ($this->isBlockCompletable($CurrentBlock['type'])) 205 { 206 $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); 207 } 208 } 209 } 210 211 # ~ 212 213 $marker = $text[0]; 214 215 # ~ 216 217 $blockTypes = $this->unmarkedBlockTypes; 218 219 if (isset($this->BlockTypes[$marker])) 220 { 221 foreach ($this->BlockTypes[$marker] as $blockType) 222 { 223 $blockTypes []= $blockType; 224 } 225 } 226 227 # 228 # ~ 229 230 foreach ($blockTypes as $blockType) 231 { 232 $Block = $this->{'block'.$blockType}($Line, $CurrentBlock); 233 234 if (isset($Block)) 235 { 236 $Block['type'] = $blockType; 237 238 if ( ! isset($Block['identified'])) 239 { 240 $Blocks []= $CurrentBlock; 241 242 $Block['identified'] = true; 243 } 244 245 if ($this->isBlockContinuable($blockType)) 246 { 247 $Block['continuable'] = true; 248 } 249 250 $CurrentBlock = $Block; 251 252 continue 2; 253 } 254 } 255 256 # ~ 257 258 if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted'])) 259 { 260 $CurrentBlock['element']['text'] .= "\n".$text; 261 } 262 else 263 { 264 $Blocks []= $CurrentBlock; 265 266 $CurrentBlock = $this->paragraph($Line); 267 268 $CurrentBlock['identified'] = true; 269 } 270 } 271 272 # ~ 273 274 if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type'])) 275 { 276 $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); 277 } 278 279 # ~ 280 281 $Blocks []= $CurrentBlock; 282 283 unset($Blocks[0]); 284 285 # ~ 286 287 $markup = ''; 288 289 foreach ($Blocks as $Block) 290 { 291 if (isset($Block['hidden'])) 292 { 293 continue; 294 } 295 296 $markup .= "\n"; 297 $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']); 298 } 299 300 $markup .= "\n"; 301 302 # ~ 303 304 return $markup; 305 } 306 307 protected function isBlockContinuable($Type) 308 { 309 return method_exists($this, 'block'.$Type.'Continue'); 310 } 311 312 protected function isBlockCompletable($Type) 313 { 314 return method_exists($this, 'block'.$Type.'Complete'); 315 } 316 317 # 318 # Code 319 320 protected function blockCode($Line, $Block = null) 321 { 322 if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted'])) 323 { 324 return; 325 } 326 327 if ($Line['indent'] >= 4) 328 { 329 $text = substr($Line['body'], 4); 330 331 $Block = array( 332 'element' => array( 333 'name' => 'pre', 334 'handler' => 'element', 335 'text' => array( 336 'name' => 'code', 337 'text' => $text, 338 ), 339 ), 340 ); 341 342 return $Block; 343 } 344 } 345 346 protected function blockCodeContinue($Line, $Block) 347 { 348 if ($Line['indent'] >= 4) 349 { 350 if (isset($Block['interrupted'])) 351 { 352 $Block['element']['text']['text'] .= "\n"; 353 354 unset($Block['interrupted']); 355 } 356 357 $Block['element']['text']['text'] .= "\n"; 358 359 $text = substr($Line['body'], 4); 360 361 $Block['element']['text']['text'] .= $text; 362 363 return $Block; 364 } 365 } 366 367 protected function blockCodeComplete($Block) 368 { 369 $text = $Block['element']['text']['text']; 370 371 $Block['element']['text']['text'] = $text; 372 373 return $Block; 374 } 375 376 # 377 # Comment 378 379 protected function blockComment($Line) 380 { 381 if ($this->markupEscaped or $this->safeMode) 382 { 383 return; 384 } 385 386 if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!') 387 { 388 $Block = array( 389 'markup' => $Line['body'], 390 ); 391 392 if (preg_match('/-->$/', $Line['text'])) 393 { 394 $Block['closed'] = true; 395 } 396 397 return $Block; 398 } 399 } 400 401 protected function blockCommentContinue($Line, array $Block) 402 { 403 if (isset($Block['closed'])) 404 { 405 return; 406 } 407 408 $Block['markup'] .= "\n" . $Line['body']; 409 410 if (preg_match('/-->$/', $Line['text'])) 411 { 412 $Block['closed'] = true; 413 } 414 415 return $Block; 416 } 417 418 # 419 # Fenced Code 420 421 protected function blockFencedCode($Line) 422 { 423 if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches)) 424 { 425 $Element = array( 426 'name' => 'code', 427 'text' => '', 428 ); 429 430 if (isset($matches[1])) 431 { 432 /** 433 * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes 434 * Every HTML element may have a class attribute specified. 435 * The attribute, if specified, must have a value that is a set 436 * of space-separated tokens representing the various classes 437 * that the element belongs to. 438 * [...] 439 * The space characters, for the purposes of this specification, 440 * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab), 441 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and 442 * U+000D CARRIAGE RETURN (CR). 443 */ 444 $language = substr($matches[1], 0, strcspn($matches[1], " \t\n\f\r")); 445 446 $class = 'language-'.$language; 447 448 $Element['attributes'] = array( 449 'class' => $class, 450 ); 451 } 452 453 $Block = array( 454 'char' => $Line['text'][0], 455 'element' => array( 456 'name' => 'pre', 457 'handler' => 'element', 458 'text' => $Element, 459 ), 460 ); 461 462 return $Block; 463 } 464 } 465 466 protected function blockFencedCodeContinue($Line, $Block) 467 { 468 if (isset($Block['complete'])) 469 { 470 return; 471 } 472 473 if (isset($Block['interrupted'])) 474 { 475 $Block['element']['text']['text'] .= "\n"; 476 477 unset($Block['interrupted']); 478 } 479 480 if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text'])) 481 { 482 $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1); 483 484 $Block['complete'] = true; 485 486 return $Block; 487 } 488 489 $Block['element']['text']['text'] .= "\n".$Line['body']; 490 491 return $Block; 492 } 493 494 protected function blockFencedCodeComplete($Block) 495 { 496 $text = $Block['element']['text']['text']; 497 498 $Block['element']['text']['text'] = $text; 499 500 return $Block; 501 } 502 503 # 504 # Header 505 506 protected function blockHeader($Line) 507 { 508 if (isset($Line['text'][1])) 509 { 510 $level = 1; 511 512 while (isset($Line['text'][$level]) and $Line['text'][$level] === '#') 513 { 514 $level ++; 515 } 516 517 if ($level > 6) 518 { 519 return; 520 } 521 522 $text = trim($Line['text'], '# '); 523 524 $Block = array( 525 'element' => array( 526 'name' => 'h' . min(6, $level), 527 'text' => $text, 528 'handler' => 'line', 529 ), 530 ); 531 532 return $Block; 533 } 534 } 535 536 # 537 # List 538 539 protected function blockList($Line) 540 { 541 list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]'); 542 543 if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches)) 544 { 545 $Block = array( 546 'indent' => $Line['indent'], 547 'pattern' => $pattern, 548 'element' => array( 549 'name' => $name, 550 'handler' => 'elements', 551 ), 552 ); 553 554 if($name === 'ol') 555 { 556 $listStart = stristr($matches[0], '.', true); 557 558 if($listStart !== '1') 559 { 560 $Block['element']['attributes'] = array('start' => $listStart); 561 } 562 } 563 564 $Block['li'] = array( 565 'name' => 'li', 566 'handler' => 'li', 567 'text' => array( 568 $matches[2], 569 ), 570 ); 571 572 $Block['element']['text'] []= & $Block['li']; 573 574 return $Block; 575 } 576 } 577 578 protected function blockListContinue($Line, array $Block) 579 { 580 if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches)) 581 { 582 if (isset($Block['interrupted'])) 583 { 584 $Block['li']['text'] []= ''; 585 586 $Block['loose'] = true; 587 588 unset($Block['interrupted']); 589 } 590 591 unset($Block['li']); 592 593 $text = isset($matches[1]) ? $matches[1] : ''; 594 595 $Block['li'] = array( 596 'name' => 'li', 597 'handler' => 'li', 598 'text' => array( 599 $text, 600 ), 601 ); 602 603 $Block['element']['text'] []= & $Block['li']; 604 605 return $Block; 606 } 607 608 if ($Line['text'][0] === '[' and $this->blockReference($Line)) 609 { 610 return $Block; 611 } 612 613 if ( ! isset($Block['interrupted'])) 614 { 615 $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); 616 617 $Block['li']['text'] []= $text; 618 619 return $Block; 620 } 621 622 if ($Line['indent'] > 0) 623 { 624 $Block['li']['text'] []= ''; 625 626 $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); 627 628 $Block['li']['text'] []= $text; 629 630 unset($Block['interrupted']); 631 632 return $Block; 633 } 634 } 635 636 protected function blockListComplete(array $Block) 637 { 638 if (isset($Block['loose'])) 639 { 640 foreach ($Block['element']['text'] as &$li) 641 { 642 if (end($li['text']) !== '') 643 { 644 $li['text'] []= ''; 645 } 646 } 647 } 648 649 return $Block; 650 } 651 652 # 653 # Quote 654 655 protected function blockQuote($Line) 656 { 657 if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) 658 { 659 $Block = array( 660 'element' => array( 661 'name' => 'blockquote', 662 'handler' => 'lines', 663 'text' => (array) $matches[1], 664 ), 665 ); 666 667 return $Block; 668 } 669 } 670 671 protected function blockQuoteContinue($Line, array $Block) 672 { 673 if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) 674 { 675 if (isset($Block['interrupted'])) 676 { 677 $Block['element']['text'] []= ''; 678 679 unset($Block['interrupted']); 680 } 681 682 $Block['element']['text'] []= $matches[1]; 683 684 return $Block; 685 } 686 687 if ( ! isset($Block['interrupted'])) 688 { 689 $Block['element']['text'] []= $Line['text']; 690 691 return $Block; 692 } 693 } 694 695 # 696 # Rule 697 698 protected function blockRule($Line) 699 { 700 if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text'])) 701 { 702 $Block = array( 703 'element' => array( 704 'name' => 'hr' 705 ), 706 ); 707 708 return $Block; 709 } 710 } 711 712 # 713 # Setext 714 715 protected function blockSetextHeader($Line, array $Block = null) 716 { 717 if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) 718 { 719 return; 720 } 721 722 if (chop($Line['text'], $Line['text'][0]) === '') 723 { 724 $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2'; 725 726 return $Block; 727 } 728 } 729 730 # 731 # Markup 732 733 protected function blockMarkup($Line) 734 { 735 if ($this->markupEscaped or $this->safeMode) 736 { 737 return; 738 } 739 740 if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) 741 { 742 $element = strtolower($matches[1]); 743 744 if (in_array($element, $this->textLevelElements)) 745 { 746 return; 747 } 748 749 $Block = array( 750 'name' => $matches[1], 751 'depth' => 0, 752 'markup' => $Line['text'], 753 ); 754 755 $length = strlen($matches[0]); 756 757 $remainder = substr($Line['text'], $length); 758 759 if (trim($remainder) === '') 760 { 761 if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) 762 { 763 $Block['closed'] = true; 764 765 $Block['void'] = true; 766 } 767 } 768 else 769 { 770 if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) 771 { 772 return; 773 } 774 775 if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder)) 776 { 777 $Block['closed'] = true; 778 } 779 } 780 781 return $Block; 782 } 783 } 784 785 protected function blockMarkupContinue($Line, array $Block) 786 { 787 if (isset($Block['closed'])) 788 { 789 return; 790 } 791 792 if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open 793 { 794 $Block['depth'] ++; 795 } 796 797 if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close 798 { 799 if ($Block['depth'] > 0) 800 { 801 $Block['depth'] --; 802 } 803 else 804 { 805 $Block['closed'] = true; 806 } 807 } 808 809 if (isset($Block['interrupted'])) 810 { 811 $Block['markup'] .= "\n"; 812 813 unset($Block['interrupted']); 814 } 815 816 $Block['markup'] .= "\n".$Line['body']; 817 818 return $Block; 819 } 820 821 # 822 # Reference 823 824 protected function blockReference($Line) 825 { 826 if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches)) 827 { 828 $id = strtolower($matches[1]); 829 830 $Data = array( 831 'url' => $matches[2], 832 'title' => null, 833 ); 834 835 if (isset($matches[3])) 836 { 837 $Data['title'] = $matches[3]; 838 } 839 840 $this->DefinitionData['Reference'][$id] = $Data; 841 842 $Block = array( 843 'hidden' => true, 844 ); 845 846 return $Block; 847 } 848 } 849 850 # 851 # Table 852 853 protected function blockTable($Line, array $Block = null) 854 { 855 if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) 856 { 857 return; 858 } 859 860 if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '') 861 { 862 $alignments = array(); 863 864 $divider = $Line['text']; 865 866 $divider = trim($divider); 867 $divider = trim($divider, '|'); 868 869 $dividerCells = explode('|', $divider); 870 871 foreach ($dividerCells as $dividerCell) 872 { 873 $dividerCell = trim($dividerCell); 874 875 if ($dividerCell === '') 876 { 877 continue; 878 } 879 880 $alignment = null; 881 882 if ($dividerCell[0] === ':') 883 { 884 $alignment = 'left'; 885 } 886 887 if (substr($dividerCell, - 1) === ':') 888 { 889 $alignment = $alignment === 'left' ? 'center' : 'right'; 890 } 891 892 $alignments []= $alignment; 893 } 894 895 # ~ 896 897 $HeaderElements = array(); 898 899 $header = $Block['element']['text']; 900 901 $header = trim($header); 902 $header = trim($header, '|'); 903 904 $headerCells = explode('|', $header); 905 906 foreach ($headerCells as $index => $headerCell) 907 { 908 $headerCell = trim($headerCell); 909 910 $HeaderElement = array( 911 'name' => 'th', 912 'text' => $headerCell, 913 'handler' => 'line', 914 ); 915 916 if (isset($alignments[$index])) 917 { 918 $alignment = $alignments[$index]; 919 920 $HeaderElement['attributes'] = array( 921 'style' => 'text-align: '.$alignment.';', 922 ); 923 } 924 925 $HeaderElements []= $HeaderElement; 926 } 927 928 # ~ 929 930 $Block = array( 931 'alignments' => $alignments, 932 'identified' => true, 933 'element' => array( 934 'name' => 'table', 935 'handler' => 'elements', 936 ), 937 ); 938 939 $Block['element']['text'] []= array( 940 'name' => 'thead', 941 'handler' => 'elements', 942 ); 943 944 $Block['element']['text'] []= array( 945 'name' => 'tbody', 946 'handler' => 'elements', 947 'text' => array(), 948 ); 949 950 $Block['element']['text'][0]['text'] []= array( 951 'name' => 'tr', 952 'handler' => 'elements', 953 'text' => $HeaderElements, 954 ); 955 956 return $Block; 957 } 958 } 959 960 protected function blockTableContinue($Line, array $Block) 961 { 962 if (isset($Block['interrupted'])) 963 { 964 return; 965 } 966 967 if ($Line['text'][0] === '|' or strpos($Line['text'], '|')) 968 { 969 $Elements = array(); 970 971 $row = $Line['text']; 972 973 $row = trim($row); 974 $row = trim($row, '|'); 975 976 preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches); 977 978 foreach ($matches[0] as $index => $cell) 979 { 980 $cell = trim($cell); 981 982 $Element = array( 983 'name' => 'td', 984 'handler' => 'line', 985 'text' => $cell, 986 ); 987 988 if (isset($Block['alignments'][$index])) 989 { 990 $Element['attributes'] = array( 991 'style' => 'text-align: '.$Block['alignments'][$index].';', 992 ); 993 } 994 995 $Elements []= $Element; 996 } 997 998 $Element = array( 999 'name' => 'tr', 1000 'handler' => 'elements', 1001 'text' => $Elements, 1002 ); 1003 1004 $Block['element']['text'][1]['text'] []= $Element; 1005 1006 return $Block; 1007 } 1008 } 1009 1010 # 1011 # ~ 1012 # 1013 1014 protected function paragraph($Line) 1015 { 1016 $Block = array( 1017 'element' => array( 1018 'name' => 'p', 1019 'text' => $Line['text'], 1020 'handler' => 'line', 1021 ), 1022 ); 1023 1024 return $Block; 1025 } 1026 1027 # 1028 # Inline Elements 1029 # 1030 1031 protected $InlineTypes = array( 1032 '"' => array('SpecialCharacter'), 1033 '!' => array('Image'), 1034 '&' => array('SpecialCharacter'), 1035 '*' => array('Emphasis'), 1036 ':' => array('Url'), 1037 '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'), 1038 '>' => array('SpecialCharacter'), 1039 '[' => array('Link'), 1040 '_' => array('Emphasis'), 1041 '`' => array('Code'), 1042 '~' => array('Strikethrough'), 1043 '\\' => array('EscapeSequence'), 1044 ); 1045 1046 # ~ 1047 1048 protected $inlineMarkerList = '!"*_&[:<>`~\\'; 1049 1050 # 1051 # ~ 1052 # 1053 1054 public function line($text, $nonNestables=array()) 1055 { 1056 $markup = ''; 1057 1058 # $excerpt is based on the first occurrence of a marker 1059 1060 while ($excerpt = strpbrk($text, $this->inlineMarkerList)) 1061 { 1062 $marker = $excerpt[0]; 1063 1064 $markerPosition = strpos($text, $marker); 1065 1066 $Excerpt = array('text' => $excerpt, 'context' => $text); 1067 1068 foreach ($this->InlineTypes[$marker] as $inlineType) 1069 { 1070 # check to see if the current inline type is nestable in the current context 1071 1072 if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables)) 1073 { 1074 continue; 1075 } 1076 1077 $Inline = $this->{'inline'.$inlineType}($Excerpt); 1078 1079 if ( ! isset($Inline)) 1080 { 1081 continue; 1082 } 1083 1084 # makes sure that the inline belongs to "our" marker 1085 1086 if (isset($Inline['position']) and $Inline['position'] > $markerPosition) 1087 { 1088 continue; 1089 } 1090 1091 # sets a default inline position 1092 1093 if ( ! isset($Inline['position'])) 1094 { 1095 $Inline['position'] = $markerPosition; 1096 } 1097 1098 # cause the new element to 'inherit' our non nestables 1099 1100 foreach ($nonNestables as $non_nestable) 1101 { 1102 $Inline['element']['nonNestables'][] = $non_nestable; 1103 } 1104 1105 # the text that comes before the inline 1106 $unmarkedText = substr($text, 0, $Inline['position']); 1107 1108 # compile the unmarked text 1109 $markup .= $this->unmarkedText($unmarkedText); 1110 1111 # compile the inline 1112 $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']); 1113 1114 # remove the examined text 1115 $text = substr($text, $Inline['position'] + $Inline['extent']); 1116 1117 continue 2; 1118 } 1119 1120 # the marker does not belong to an inline 1121 1122 $unmarkedText = substr($text, 0, $markerPosition + 1); 1123 1124 $markup .= $this->unmarkedText($unmarkedText); 1125 1126 $text = substr($text, $markerPosition + 1); 1127 } 1128 1129 $markup .= $this->unmarkedText($text); 1130 1131 return $markup; 1132 } 1133 1134 # 1135 # ~ 1136 # 1137 1138 protected function inlineCode($Excerpt) 1139 { 1140 $marker = $Excerpt['text'][0]; 1141 1142 if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches)) 1143 { 1144 $text = $matches[2]; 1145 $text = preg_replace("/[ ]*\n/", ' ', $text); 1146 1147 return array( 1148 'extent' => strlen($matches[0]), 1149 'element' => array( 1150 'name' => 'code', 1151 'text' => $text, 1152 ), 1153 ); 1154 } 1155 } 1156 1157 protected function inlineEmailTag($Excerpt) 1158 { 1159 if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches)) 1160 { 1161 $url = $matches[1]; 1162 1163 if ( ! isset($matches[2])) 1164 { 1165 $url = 'mailto:' . $url; 1166 } 1167 1168 return array( 1169 'extent' => strlen($matches[0]), 1170 'element' => array( 1171 'name' => 'a', 1172 'text' => $matches[1], 1173 'attributes' => array( 1174 'href' => $url, 1175 ), 1176 ), 1177 ); 1178 } 1179 } 1180 1181 protected function inlineEmphasis($Excerpt) 1182 { 1183 if ( ! isset($Excerpt['text'][1])) 1184 { 1185 return; 1186 } 1187 1188 $marker = $Excerpt['text'][0]; 1189 1190 if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches)) 1191 { 1192 $emphasis = 'strong'; 1193 } 1194 elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches)) 1195 { 1196 $emphasis = 'em'; 1197 } 1198 else 1199 { 1200 return; 1201 } 1202 1203 return array( 1204 'extent' => strlen($matches[0]), 1205 'element' => array( 1206 'name' => $emphasis, 1207 'handler' => 'line', 1208 'text' => $matches[1], 1209 ), 1210 ); 1211 } 1212 1213 protected function inlineEscapeSequence($Excerpt) 1214 { 1215 if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters)) 1216 { 1217 return array( 1218 'markup' => $Excerpt['text'][1], 1219 'extent' => 2, 1220 ); 1221 } 1222 } 1223 1224 protected function inlineImage($Excerpt) 1225 { 1226 if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[') 1227 { 1228 return; 1229 } 1230 1231 $Excerpt['text']= substr($Excerpt['text'], 1); 1232 1233 $Link = $this->inlineLink($Excerpt); 1234 1235 if ($Link === null) 1236 { 1237 return; 1238 } 1239 1240 $Inline = array( 1241 'extent' => $Link['extent'] + 1, 1242 'element' => array( 1243 'name' => 'img', 1244 'attributes' => array( 1245 'src' => $Link['element']['attributes']['href'], 1246 'alt' => $Link['element']['text'], 1247 ), 1248 ), 1249 ); 1250 1251 $Inline['element']['attributes'] += $Link['element']['attributes']; 1252 1253 unset($Inline['element']['attributes']['href']); 1254 1255 return $Inline; 1256 } 1257 1258 protected function inlineLink($Excerpt) 1259 { 1260 $Element = array( 1261 'name' => 'a', 1262 'handler' => 'line', 1263 'nonNestables' => array('Url', 'Link'), 1264 'text' => null, 1265 'attributes' => array( 1266 'href' => null, 1267 'title' => null, 1268 ), 1269 ); 1270 1271 $extent = 0; 1272 1273 $remainder = $Excerpt['text']; 1274 1275 if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches)) 1276 { 1277 $Element['text'] = $matches[1]; 1278 1279 $extent += strlen($matches[0]); 1280 1281 $remainder = substr($remainder, $extent); 1282 } 1283 else 1284 { 1285 return; 1286 } 1287 1288 if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches)) 1289 { 1290 $Element['attributes']['href'] = $matches[1]; 1291 1292 if (isset($matches[2])) 1293 { 1294 $Element['attributes']['title'] = substr($matches[2], 1, - 1); 1295 } 1296 1297 $extent += strlen($matches[0]); 1298 } 1299 else 1300 { 1301 if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches)) 1302 { 1303 $definition = strlen($matches[1]) ? $matches[1] : $Element['text']; 1304 $definition = strtolower($definition); 1305 1306 $extent += strlen($matches[0]); 1307 } 1308 else 1309 { 1310 $definition = strtolower($Element['text']); 1311 } 1312 1313 if ( ! isset($this->DefinitionData['Reference'][$definition])) 1314 { 1315 return; 1316 } 1317 1318 $Definition = $this->DefinitionData['Reference'][$definition]; 1319 1320 $Element['attributes']['href'] = $Definition['url']; 1321 $Element['attributes']['title'] = $Definition['title']; 1322 } 1323 1324 return array( 1325 'extent' => $extent, 1326 'element' => $Element, 1327 ); 1328 } 1329 1330 protected function inlineMarkup($Excerpt) 1331 { 1332 if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false) 1333 { 1334 return; 1335 } 1336 1337 if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches)) 1338 { 1339 return array( 1340 'markup' => $matches[0], 1341 'extent' => strlen($matches[0]), 1342 ); 1343 } 1344 1345 if ($Excerpt['text'][1] === '!' and preg_match('/^<!---?[^>-](?:-?[^-])*-->/s', $Excerpt['text'], $matches)) 1346 { 1347 return array( 1348 'markup' => $matches[0], 1349 'extent' => strlen($matches[0]), 1350 ); 1351 } 1352 1353 if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) 1354 { 1355 return array( 1356 'markup' => $matches[0], 1357 'extent' => strlen($matches[0]), 1358 ); 1359 } 1360 } 1361 1362 protected function inlineSpecialCharacter($Excerpt) 1363 { 1364 if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text'])) 1365 { 1366 return array( 1367 'markup' => '&', 1368 'extent' => 1, 1369 ); 1370 } 1371 1372 $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot'); 1373 1374 if (isset($SpecialCharacter[$Excerpt['text'][0]])) 1375 { 1376 return array( 1377 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';', 1378 'extent' => 1, 1379 ); 1380 } 1381 } 1382 1383 protected function inlineStrikethrough($Excerpt) 1384 { 1385 if ( ! isset($Excerpt['text'][1])) 1386 { 1387 return; 1388 } 1389 1390 if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches)) 1391 { 1392 return array( 1393 'extent' => strlen($matches[0]), 1394 'element' => array( 1395 'name' => 'del', 1396 'text' => $matches[1], 1397 'handler' => 'line', 1398 ), 1399 ); 1400 } 1401 } 1402 1403 protected function inlineUrl($Excerpt) 1404 { 1405 if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/') 1406 { 1407 return; 1408 } 1409 1410 if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) 1411 { 1412 $url = $matches[0][0]; 1413 1414 $Inline = array( 1415 'extent' => strlen($matches[0][0]), 1416 'position' => $matches[0][1], 1417 'element' => array( 1418 'name' => 'a', 1419 'text' => $url, 1420 'attributes' => array( 1421 'href' => $url, 1422 ), 1423 ), 1424 ); 1425 1426 return $Inline; 1427 } 1428 } 1429 1430 protected function inlineUrlTag($Excerpt) 1431 { 1432 if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) 1433 { 1434 $url = $matches[1]; 1435 1436 return array( 1437 'extent' => strlen($matches[0]), 1438 'element' => array( 1439 'name' => 'a', 1440 'text' => $url, 1441 'attributes' => array( 1442 'href' => $url, 1443 ), 1444 ), 1445 ); 1446 } 1447 } 1448 1449 # ~ 1450 1451 protected function unmarkedText($text) 1452 { 1453 if ($this->breaksEnabled) 1454 { 1455 $text = preg_replace('/[ ]*\n/', "<br />\n", $text); 1456 } 1457 else 1458 { 1459 $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "<br />\n", $text); 1460 $text = str_replace(" \n", "\n", $text); 1461 } 1462 1463 return $text; 1464 } 1465 1466 # 1467 # Handlers 1468 # 1469 1470 protected function element(array $Element) 1471 { 1472 if ($this->safeMode) 1473 { 1474 $Element = $this->sanitiseElement($Element); 1475 } 1476 1477 $markup = '<'.$Element['name']; 1478 1479 if (isset($Element['attributes'])) 1480 { 1481 foreach ($Element['attributes'] as $name => $value) 1482 { 1483 if ($value === null) 1484 { 1485 continue; 1486 } 1487 1488 $markup .= ' '.$name.'="'.self::escape($value).'"'; 1489 } 1490 } 1491 1492 $permitRawHtml = false; 1493 1494 if (isset($Element['text'])) 1495 { 1496 $text = $Element['text']; 1497 } 1498 // very strongly consider an alternative if you're writing an 1499 // extension 1500 elseif (isset($Element['rawHtml'])) 1501 { 1502 $text = $Element['rawHtml']; 1503 $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode']; 1504 $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode; 1505 } 1506 1507 if (isset($text)) 1508 { 1509 $markup .= '>'; 1510 1511 if (!isset($Element['nonNestables'])) 1512 { 1513 $Element['nonNestables'] = array(); 1514 } 1515 1516 if (isset($Element['handler'])) 1517 { 1518 $markup .= $this->{$Element['handler']}($text, $Element['nonNestables']); 1519 } 1520 elseif (!$permitRawHtml) 1521 { 1522 $markup .= self::escape($text, true); 1523 } 1524 else 1525 { 1526 $markup .= $text; 1527 } 1528 1529 $markup .= '</'.$Element['name'].'>'; 1530 } 1531 else 1532 { 1533 $markup .= ' />'; 1534 } 1535 1536 return $markup; 1537 } 1538 1539 protected function elements(array $Elements) 1540 { 1541 $markup = ''; 1542 1543 foreach ($Elements as $Element) 1544 { 1545 $markup .= "\n" . $this->element($Element); 1546 } 1547 1548 $markup .= "\n"; 1549 1550 return $markup; 1551 } 1552 1553 # ~ 1554 1555 protected function li($lines) 1556 { 1557 $markup = $this->lines($lines); 1558 1559 $trimmedMarkup = trim($markup); 1560 1561 if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '<p>') 1562 { 1563 $markup = $trimmedMarkup; 1564 $markup = substr($markup, 3); 1565 1566 $position = strpos($markup, "</p>"); 1567 1568 $markup = substr_replace($markup, '', $position, 4); 1569 } 1570 1571 return $markup; 1572 } 1573 1574 # 1575 # Deprecated Methods 1576 # 1577 1578 function parse($text) 1579 { 1580 $markup = $this->text($text); 1581 1582 return $markup; 1583 } 1584 1585 protected function sanitiseElement(array $Element) 1586 { 1587 static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/'; 1588 static $safeUrlNameToAtt = array( 1589 'a' => 'href', 1590 'img' => 'src', 1591 ); 1592 1593 if (isset($safeUrlNameToAtt[$Element['name']])) 1594 { 1595 $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); 1596 } 1597 1598 if ( ! empty($Element['attributes'])) 1599 { 1600 foreach ($Element['attributes'] as $att => $val) 1601 { 1602 # filter out badly parsed attribute 1603 if ( ! preg_match($goodAttribute, $att)) 1604 { 1605 unset($Element['attributes'][$att]); 1606 } 1607 # dump onevent attribute 1608 elseif (self::striAtStart($att, 'on')) 1609 { 1610 unset($Element['attributes'][$att]); 1611 } 1612 } 1613 } 1614 1615 return $Element; 1616 } 1617 1618 protected function filterUnsafeUrlInAttribute(array $Element, $attribute) 1619 { 1620 foreach ($this->safeLinksWhitelist as $scheme) 1621 { 1622 if (self::striAtStart($Element['attributes'][$attribute], $scheme)) 1623 { 1624 return $Element; 1625 } 1626 } 1627 1628 $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]); 1629 1630 return $Element; 1631 } 1632 1633 # 1634 # Static Methods 1635 # 1636 1637 protected static function escape($text, $allowQuotes = false) 1638 { 1639 return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8'); 1640 } 1641 1642 protected static function striAtStart($string, $needle) 1643 { 1644 $len = strlen($needle); 1645 1646 if ($len > strlen($string)) 1647 { 1648 return false; 1649 } 1650 else 1651 { 1652 return strtolower(substr($string, 0, $len)) === strtolower($needle); 1653 } 1654 } 1655 1656 static function instance($name = 'default') 1657 { 1658 if (isset(self::$instances[$name])) 1659 { 1660 return self::$instances[$name]; 1661 } 1662 1663 $instance = new static(); 1664 1665 self::$instances[$name] = $instance; 1666 1667 return $instance; 1668 } 1669 1670 private static $instances = array(); 1671 1672 # 1673 # Fields 1674 # 1675 1676 protected $DefinitionData; 1677 1678 # 1679 # Read-Only 1680 1681 protected $specialCharacters = array( 1682 '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', 1683 ); 1684 1685 protected $StrongRegex = array( 1686 '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s', 1687 '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us', 1688 ); 1689 1690 protected $EmRegex = array( 1691 '*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s', 1692 '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us', 1693 ); 1694 1695 protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?'; 1696 1697 protected $voidElements = array( 1698 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 1699 ); 1700 1701 protected $textLevelElements = array( 1702 'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont', 1703 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing', 1704 'i', 'rp', 'del', 'code', 'strike', 'marquee', 1705 'q', 'rt', 'ins', 'font', 'strong', 1706 's', 'tt', 'kbd', 'mark', 1707 'u', 'xm', 'sub', 'nobr', 1708 'sup', 'ruby', 1709 'var', 'span', 1710 'wbr', 'time', 1711 ); 1712} 1713