1 <?php
2 
3 #
4 #
5 # Parsedown
6 # http://parsedown.org
7 #
8 # (c) Emanuil Rusev
9 # http://erusev.com
10 #
11 # For the full license information, view the LICENSE file that was distributed
12 # with this source code.
13 #
14 #
15 
16 class Parsedown
17 {
18     # ~
19 
20     const version = '1.7.4';
21 
22     # ~
23 
24     function text($text)
25     {
26         # make sure no definitions are set
27         $this->DefinitionData = array();
28 
29         # standardize line breaks
30         $text = str_replace(array("\r\n", "\r"), "\n", $text);
31 
32         # remove surrounding line breaks
33         $text = trim($text, "\n");
34 
35         # split text into lines
36         $lines = explode("\n", $text);
37 
38         # iterate through lines to identify blocks
39         $markup = $this->lines($lines);
40 
41         # trim line breaks
42         $markup = trim($markup, "\n");
43 
44         return $markup;
45     }
46 
47     #
48     # Setters
49     #
50 
51     function setBreaksEnabled($breaksEnabled)
52     {
53         $this->breaksEnabled = $breaksEnabled;
54 
55         return $this;
56     }
57 
58     protected $breaksEnabled;
59 
60     function setMarkupEscaped($markupEscaped)
61     {
62         $this->markupEscaped = $markupEscaped;
63 
64         return $this;
65     }
66 
67     protected $markupEscaped;
68 
69     function setUrlsLinked($urlsLinked)
70     {
71         $this->urlsLinked = $urlsLinked;
72 
73         return $this;
74     }
75 
76     protected $urlsLinked = true;
77 
78     function setSafeMode($safeMode)
79     {
80         $this->safeMode = (bool) $safeMode;
81 
82         return $this;
83     }
84 
85     protected $safeMode;
86 
87     protected $safeLinksWhitelist = array(
88         'http://',
89         'https://',
90         'ftp://',
91         'ftps://',
92         'mailto:',
93         'data:image/png;base64,',
94         'data:image/gif;base64,',
95         'data:image/jpeg;base64,',
96         'irc:',
97         'ircs:',
98         'git:',
99         'ssh:',
100         'news:',
101         'steam:',
102     );
103 
104     #
105     # Lines
106     #
107 
108     protected $BlockTypes = array(
109         '#' => array('Header'),
110         '*' => array('Rule', 'List'),
111         '+' => array('List'),
112         '-' => array('SetextHeader', 'Table', 'Rule', 'List'),
113         '0' => array('List'),
114         '1' => array('List'),
115         '2' => array('List'),
116         '3' => array('List'),
117         '4' => array('List'),
118         '5' => array('List'),
119         '6' => array('List'),
120         '7' => array('List'),
121         '8' => array('List'),
122         '9' => array('List'),
123         ':' => array('Table'),
124         '<' => array('Comment', 'Markup'),
125         '=' => array('SetextHeader'),
126         '>' => array('Quote'),
127         '[' => array('Reference'),
128         '_' => array('Rule'),
129         '`' => array('FencedCode'),
130         '|' => array('Table'),
131         '~' => array('FencedCode'),
132     );
133 
134     # ~
135 
136     protected $unmarkedBlockTypes = array(
137         'Code',
138     );
139 
140     #
141     # Blocks
142     #
143 
144     protected function lines(array $lines)
145     {
146         $CurrentBlock = null;
147 
148         foreach ($lines as $line)
149         {
150             if (chop($line) === '')
151             {
152                 if (isset($CurrentBlock))
153                 {
154                     $CurrentBlock['interrupted'] = true;
155                 }
156 
157                 continue;
158             }
159 
160             if (strpos($line, "\t") !== false)
161             {
162                 $parts = explode("\t", $line);
163 
164                 $line = $parts[0];
165 
166                 unset($parts[0]);
167 
168                 foreach ($parts as $part)
169                 {
170                     $shortage = 4 - mb_strlen($line, 'utf-8') % 4;
171 
172                     $line .= str_repeat(' ', $shortage);
173                     $line .= $part;
174                 }
175             }
176 
177             $indent = 0;
178 
179             while (isset($line[$indent]) and $line[$indent] === ' ')
180             {
181                 $indent ++;
182             }
183 
184             $text = $indent > 0 ? substr($line, $indent) : $line;
185 
186             # ~
187 
188             $Line = array('body' => $line, 'indent' => $indent, 'text' => $text);
189 
190             # ~
191 
192             if (isset($CurrentBlock['continuable']))
193             {
194                 $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock);
195 
196                 if (isset($Block))
197                 {
198                     $CurrentBlock = $Block;
199 
200                     continue;
201                 }
202                 else
203                 {
204                     if ($this->isBlockCompletable($CurrentBlock['type']))
205                     {
206                         $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
207                     }
208                 }
209             }
210 
211             # ~
212 
213             $marker = $text[0];
214 
215             # ~
216 
217             $blockTypes = $this->unmarkedBlockTypes;
218 
219             if (isset($this->BlockTypes[$marker]))
220             {
221                 foreach ($this->BlockTypes[$marker] as $blockType)
222                 {
223                     $blockTypes []= $blockType;
224                 }
225             }
226 
227             #
228             # ~
229 
230             foreach ($blockTypes as $blockType)
231             {
232                 $Block = $this->{'block'.$blockType}($Line, $CurrentBlock);
233 
234                 if (isset($Block))
235                 {
236                     $Block['type'] = $blockType;
237 
238                     if ( ! isset($Block['identified']))
239                     {
240                         $Blocks []= $CurrentBlock;
241 
242                         $Block['identified'] = true;
243                     }
244 
245                     if ($this->isBlockContinuable($blockType))
246                     {
247                         $Block['continuable'] = true;
248                     }
249 
250                     $CurrentBlock = $Block;
251 
252                     continue 2;
253                 }
254             }
255 
256             # ~
257 
258             if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted']))
259             {
260                 $CurrentBlock['element']['text'] .= "\n".$text;
261             }
262             else
263             {
264                 $Blocks []= $CurrentBlock;
265 
266                 $CurrentBlock = $this->paragraph($Line);
267 
268                 $CurrentBlock['identified'] = true;
269             }
270         }
271 
272         # ~
273 
274         if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type']))
275         {
276             $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
277         }
278 
279         # ~
280 
281         $Blocks []= $CurrentBlock;
282 
283         unset($Blocks[0]);
284 
285         # ~
286 
287         $markup = '';
288 
289         foreach ($Blocks as $Block)
290         {
291             if (isset($Block['hidden']))
292             {
293                 continue;
294             }
295 
296             $markup .= "\n";
297             $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']);
298         }
299 
300         $markup .= "\n";
301 
302         # ~
303 
304         return $markup;
305     }
306 
307     protected function isBlockContinuable($Type)
308     {
309         return method_exists($this, 'block'.$Type.'Continue');
310     }
311 
312     protected function isBlockCompletable($Type)
313     {
314         return method_exists($this, 'block'.$Type.'Complete');
315     }
316 
317     #
318     # Code
319 
320     protected function blockCode($Line, $Block = null)
321     {
322         if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted']))
323         {
324             return;
325         }
326 
327         if ($Line['indent'] >= 4)
328         {
329             $text = substr($Line['body'], 4);
330 
331             $Block = array(
332                 'element' => array(
333                     'name' => 'pre',
334                     'handler' => 'element',
335                     'text' => array(
336                         'name' => 'code',
337                         'text' => $text,
338                     ),
339                 ),
340             );
341 
342             return $Block;
343         }
344     }
345 
346     protected function blockCodeContinue($Line, $Block)
347     {
348         if ($Line['indent'] >= 4)
349         {
350             if (isset($Block['interrupted']))
351             {
352                 $Block['element']['text']['text'] .= "\n";
353 
354                 unset($Block['interrupted']);
355             }
356 
357             $Block['element']['text']['text'] .= "\n";
358 
359             $text = substr($Line['body'], 4);
360 
361             $Block['element']['text']['text'] .= $text;
362 
363             return $Block;
364         }
365     }
366 
367     protected function blockCodeComplete($Block)
368     {
369         $text = $Block['element']['text']['text'];
370 
371         $Block['element']['text']['text'] = $text;
372 
373         return $Block;
374     }
375 
376     #
377     # Comment
378 
379     protected function blockComment($Line)
380     {
381         if ($this->markupEscaped or $this->safeMode)
382         {
383             return;
384         }
385 
386         if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!')
387         {
388             $Block = array(
389                 'markup' => $Line['body'],
390             );
391 
392             if (preg_match('/-->$/', $Line['text']))
393             {
394                 $Block['closed'] = true;
395             }
396 
397             return $Block;
398         }
399     }
400 
401     protected function blockCommentContinue($Line, array $Block)
402     {
403         if (isset($Block['closed']))
404         {
405             return;
406         }
407 
408         $Block['markup'] .= "\n" . $Line['body'];
409 
410         if (preg_match('/-->$/', $Line['text']))
411         {
412             $Block['closed'] = true;
413         }
414 
415         return $Block;
416     }
417 
418     #
419     # Fenced Code
420 
421     protected function blockFencedCode($Line)
422     {
423         if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches))
424         {
425             $Element = array(
426                 'name' => 'code',
427                 'text' => '',
428             );
429 
430             if (isset($matches[1]))
431             {
432                 /**
433                  * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes
434                  * Every HTML element may have a class attribute specified.
435                  * The attribute, if specified, must have a value that is a set
436                  * of space-separated tokens representing the various classes
437                  * that the element belongs to.
438                  * [...]
439                  * The space characters, for the purposes of this specification,
440                  * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab),
441                  * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and
442                  * U+000D CARRIAGE RETURN (CR).
443                  */
444                 $language = substr($matches[1], 0, strcspn($matches[1], " \t\n\f\r"));
445 
446                 $class = 'language-'.$language;
447 
448                 $Element['attributes'] = array(
449                     'class' => $class,
450                 );
451             }
452 
453             $Block = array(
454                 'char' => $Line['text'][0],
455                 'element' => array(
456                     'name' => 'pre',
457                     'handler' => 'element',
458                     'text' => $Element,
459                 ),
460             );
461 
462             return $Block;
463         }
464     }
465 
466     protected function blockFencedCodeContinue($Line, $Block)
467     {
468         if (isset($Block['complete']))
469         {
470             return;
471         }
472 
473         if (isset($Block['interrupted']))
474         {
475             $Block['element']['text']['text'] .= "\n";
476 
477             unset($Block['interrupted']);
478         }
479 
480         if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text']))
481         {
482             $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1);
483 
484             $Block['complete'] = true;
485 
486             return $Block;
487         }
488 
489         $Block['element']['text']['text'] .= "\n".$Line['body'];
490 
491         return $Block;
492     }
493 
494     protected function blockFencedCodeComplete($Block)
495     {
496         $text = $Block['element']['text']['text'];
497 
498         $Block['element']['text']['text'] = $text;
499 
500         return $Block;
501     }
502 
503     #
504     # Header
505 
506     protected function blockHeader($Line)
507     {
508         if (isset($Line['text'][1]))
509         {
510             $level = 1;
511 
512             while (isset($Line['text'][$level]) and $Line['text'][$level] === '#')
513             {
514                 $level ++;
515             }
516 
517             if ($level > 6)
518             {
519                 return;
520             }
521 
522             $text = trim($Line['text'], '# ');
523 
524             $Block = array(
525                 'element' => array(
526                     'name' => 'h' . min(6, $level),
527                     'text' => $text,
528                     'handler' => 'line',
529                 ),
530             );
531 
532             return $Block;
533         }
534     }
535 
536     #
537     # List
538 
539     protected function blockList($Line)
540     {
541         list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]');
542 
543         if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches))
544         {
545             $Block = array(
546                 'indent' => $Line['indent'],
547                 'pattern' => $pattern,
548                 'element' => array(
549                     'name' => $name,
550                     'handler' => 'elements',
551                 ),
552             );
553 
554             if($name === 'ol')
555             {
556                 $listStart = stristr($matches[0], '.', true);
557 
558                 if($listStart !== '1')
559                 {
560                     $Block['element']['attributes'] = array('start' => $listStart);
561                 }
562             }
563 
564             $Block['li'] = array(
565                 'name' => 'li',
566                 'handler' => 'li',
567                 'text' => array(
568                     $matches[2],
569                 ),
570             );
571 
572             $Block['element']['text'] []= & $Block['li'];
573 
574             return $Block;
575         }
576     }
577 
578     protected function blockListContinue($Line, array $Block)
579     {
580         if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches))
581         {
582             if (isset($Block['interrupted']))
583             {
584                 $Block['li']['text'] []= '';
585 
586                 $Block['loose'] = true;
587 
588                 unset($Block['interrupted']);
589             }
590 
591             unset($Block['li']);
592 
593             $text = isset($matches[1]) ? $matches[1] : '';
594 
595             $Block['li'] = array(
596                 'name' => 'li',
597                 'handler' => 'li',
598                 'text' => array(
599                     $text,
600                 ),
601             );
602 
603             $Block['element']['text'] []= & $Block['li'];
604 
605             return $Block;
606         }
607 
608         if ($Line['text'][0] === '[' and $this->blockReference($Line))
609         {
610             return $Block;
611         }
612 
613         if ( ! isset($Block['interrupted']))
614         {
615             $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
616 
617             $Block['li']['text'] []= $text;
618 
619             return $Block;
620         }
621 
622         if ($Line['indent'] > 0)
623         {
624             $Block['li']['text'] []= '';
625 
626             $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
627 
628             $Block['li']['text'] []= $text;
629 
630             unset($Block['interrupted']);
631 
632             return $Block;
633         }
634     }
635 
636     protected function blockListComplete(array $Block)
637     {
638         if (isset($Block['loose']))
639         {
640             foreach ($Block['element']['text'] as &$li)
641             {
642                 if (end($li['text']) !== '')
643                 {
644                     $li['text'] []= '';
645                 }
646             }
647         }
648 
649         return $Block;
650     }
651 
652     #
653     # Quote
654 
655     protected function blockQuote($Line)
656     {
657         if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
658         {
659             $Block = array(
660                 'element' => array(
661                     'name' => 'blockquote',
662                     'handler' => 'lines',
663                     'text' => (array) $matches[1],
664                 ),
665             );
666 
667             return $Block;
668         }
669     }
670 
671     protected function blockQuoteContinue($Line, array $Block)
672     {
673         if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
674         {
675             if (isset($Block['interrupted']))
676             {
677                 $Block['element']['text'] []= '';
678 
679                 unset($Block['interrupted']);
680             }
681 
682             $Block['element']['text'] []= $matches[1];
683 
684             return $Block;
685         }
686 
687         if ( ! isset($Block['interrupted']))
688         {
689             $Block['element']['text'] []= $Line['text'];
690 
691             return $Block;
692         }
693     }
694 
695     #
696     # Rule
697 
698     protected function blockRule($Line)
699     {
700         if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text']))
701         {
702             $Block = array(
703                 'element' => array(
704                     'name' => 'hr'
705                 ),
706             );
707 
708             return $Block;
709         }
710     }
711 
712     #
713     # Setext
714 
715     protected function blockSetextHeader($Line, array $Block = null)
716     {
717         if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
718         {
719             return;
720         }
721 
722         if (chop($Line['text'], $Line['text'][0]) === '')
723         {
724             $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2';
725 
726             return $Block;
727         }
728     }
729 
730     #
731     # Markup
732 
733     protected function blockMarkup($Line)
734     {
735         if ($this->markupEscaped or $this->safeMode)
736         {
737             return;
738         }
739 
740         if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches))
741         {
742             $element = strtolower($matches[1]);
743 
744             if (in_array($element, $this->textLevelElements))
745             {
746                 return;
747             }
748 
749             $Block = array(
750                 'name' => $matches[1],
751                 'depth' => 0,
752                 'markup' => $Line['text'],
753             );
754 
755             $length = strlen($matches[0]);
756 
757             $remainder = substr($Line['text'], $length);
758 
759             if (trim($remainder) === '')
760             {
761                 if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
762                 {
763                     $Block['closed'] = true;
764 
765                     $Block['void'] = true;
766                 }
767             }
768             else
769             {
770                 if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
771                 {
772                     return;
773                 }
774 
775                 if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder))
776                 {
777                     $Block['closed'] = true;
778                 }
779             }
780 
781             return $Block;
782         }
783     }
784 
785     protected function blockMarkupContinue($Line, array $Block)
786     {
787         if (isset($Block['closed']))
788         {
789             return;
790         }
791 
792         if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open
793         {
794             $Block['depth'] ++;
795         }
796 
797         if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close
798         {
799             if ($Block['depth'] > 0)
800             {
801                 $Block['depth'] --;
802             }
803             else
804             {
805                 $Block['closed'] = true;
806             }
807         }
808 
809         if (isset($Block['interrupted']))
810         {
811             $Block['markup'] .= "\n";
812 
813             unset($Block['interrupted']);
814         }
815 
816         $Block['markup'] .= "\n".$Line['body'];
817 
818         return $Block;
819     }
820 
821     #
822     # Reference
823 
824     protected function blockReference($Line)
825     {
826         if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches))
827         {
828             $id = strtolower($matches[1]);
829 
830             $Data = array(
831                 'url' => $matches[2],
832                 'title' => null,
833             );
834 
835             if (isset($matches[3]))
836             {
837                 $Data['title'] = $matches[3];
838             }
839 
840             $this->DefinitionData['Reference'][$id] = $Data;
841 
842             $Block = array(
843                 'hidden' => true,
844             );
845 
846             return $Block;
847         }
848     }
849 
850     #
851     # Table
852 
853     protected function blockTable($Line, array $Block = null)
854     {
855         if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
856         {
857             return;
858         }
859 
860         if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '')
861         {
862             $alignments = array();
863 
864             $divider = $Line['text'];
865 
866             $divider = trim($divider);
867             $divider = trim($divider, '|');
868 
869             $dividerCells = explode('|', $divider);
870 
871             foreach ($dividerCells as $dividerCell)
872             {
873                 $dividerCell = trim($dividerCell);
874 
875                 if ($dividerCell === '')
876                 {
877                     continue;
878                 }
879 
880                 $alignment = null;
881 
882                 if ($dividerCell[0] === ':')
883                 {
884                     $alignment = 'left';
885                 }
886 
887                 if (substr($dividerCell, - 1) === ':')
888                 {
889                     $alignment = $alignment === 'left' ? 'center' : 'right';
890                 }
891 
892                 $alignments []= $alignment;
893             }
894 
895             # ~
896 
897             $HeaderElements = array();
898 
899             $header = $Block['element']['text'];
900 
901             $header = trim($header);
902             $header = trim($header, '|');
903 
904             $headerCells = explode('|', $header);
905 
906             foreach ($headerCells as $index => $headerCell)
907             {
908                 $headerCell = trim($headerCell);
909 
910                 $HeaderElement = array(
911                     'name' => 'th',
912                     'text' => $headerCell,
913                     'handler' => 'line',
914                 );
915 
916                 if (isset($alignments[$index]))
917                 {
918                     $alignment = $alignments[$index];
919 
920                     $HeaderElement['attributes'] = array(
921                         'style' => 'text-align: '.$alignment.';',
922                     );
923                 }
924 
925                 $HeaderElements []= $HeaderElement;
926             }
927 
928             # ~
929 
930             $Block = array(
931                 'alignments' => $alignments,
932                 'identified' => true,
933                 'element' => array(
934                     'name' => 'table',
935                     'handler' => 'elements',
936                 ),
937             );
938 
939             $Block['element']['text'] []= array(
940                 'name' => 'thead',
941                 'handler' => 'elements',
942             );
943 
944             $Block['element']['text'] []= array(
945                 'name' => 'tbody',
946                 'handler' => 'elements',
947                 'text' => array(),
948             );
949 
950             $Block['element']['text'][0]['text'] []= array(
951                 'name' => 'tr',
952                 'handler' => 'elements',
953                 'text' => $HeaderElements,
954             );
955 
956             return $Block;
957         }
958     }
959 
960     protected function blockTableContinue($Line, array $Block)
961     {
962         if (isset($Block['interrupted']))
963         {
964             return;
965         }
966 
967         if ($Line['text'][0] === '|' or strpos($Line['text'], '|'))
968         {
969             $Elements = array();
970 
971             $row = $Line['text'];
972 
973             $row = trim($row);
974             $row = trim($row, '|');
975 
976             preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches);
977 
978             foreach ($matches[0] as $index => $cell)
979             {
980                 $cell = trim($cell);
981 
982                 $Element = array(
983                     'name' => 'td',
984                     'handler' => 'line',
985                     'text' => $cell,
986                 );
987 
988                 if (isset($Block['alignments'][$index]))
989                 {
990                     $Element['attributes'] = array(
991                         'style' => 'text-align: '.$Block['alignments'][$index].';',
992                     );
993                 }
994 
995                 $Elements []= $Element;
996             }
997 
998             $Element = array(
999                 'name' => 'tr',
1000                 'handler' => 'elements',
1001                 'text' => $Elements,
1002             );
1003 
1004             $Block['element']['text'][1]['text'] []= $Element;
1005 
1006             return $Block;
1007         }
1008     }
1009 
1010     #
1011     # ~
1012     #
1013 
1014     protected function paragraph($Line)
1015     {
1016         $Block = array(
1017             'element' => array(
1018                 'name' => 'p',
1019                 'text' => $Line['text'],
1020                 'handler' => 'line',
1021             ),
1022         );
1023 
1024         return $Block;
1025     }
1026 
1027     #
1028     # Inline Elements
1029     #
1030 
1031     protected $InlineTypes = array(
1032         '"' => array('SpecialCharacter'),
1033         '!' => array('Image'),
1034         '&' => array('SpecialCharacter'),
1035         '*' => array('Emphasis'),
1036         ':' => array('Url'),
1037         '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'),
1038         '>' => array('SpecialCharacter'),
1039         '[' => array('Link'),
1040         '_' => array('Emphasis'),
1041         '`' => array('Code'),
1042         '~' => array('Strikethrough'),
1043         '\\' => array('EscapeSequence'),
1044     );
1045 
1046     # ~
1047 
1048     protected $inlineMarkerList = '!"*_&[:<>`~\\';
1049 
1050     #
1051     # ~
1052     #
1053 
1054     public function line($text, $nonNestables=array())
1055     {
1056         $markup = '';
1057 
1058         # $excerpt is based on the first occurrence of a marker
1059 
1060         while ($excerpt = strpbrk($text, $this->inlineMarkerList))
1061         {
1062             $marker = $excerpt[0];
1063 
1064             $markerPosition = strpos($text, $marker);
1065 
1066             $Excerpt = array('text' => $excerpt, 'context' => $text);
1067 
1068             foreach ($this->InlineTypes[$marker] as $inlineType)
1069             {
1070                 # check to see if the current inline type is nestable in the current context
1071 
1072                 if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables))
1073                 {
1074                     continue;
1075                 }
1076 
1077                 $Inline = $this->{'inline'.$inlineType}($Excerpt);
1078 
1079                 if ( ! isset($Inline))
1080                 {
1081                     continue;
1082                 }
1083 
1084                 # makes sure that the inline belongs to "our" marker
1085 
1086                 if (isset($Inline['position']) and $Inline['position'] > $markerPosition)
1087                 {
1088                     continue;
1089                 }
1090 
1091                 # sets a default inline position
1092 
1093                 if ( ! isset($Inline['position']))
1094                 {
1095                     $Inline['position'] = $markerPosition;
1096                 }
1097 
1098                 # cause the new element to 'inherit' our non nestables
1099 
1100                 foreach ($nonNestables as $non_nestable)
1101                 {
1102                     $Inline['element']['nonNestables'][] = $non_nestable;
1103                 }
1104 
1105                 # the text that comes before the inline
1106                 $unmarkedText = substr($text, 0, $Inline['position']);
1107 
1108                 # compile the unmarked text
1109                 $markup .= $this->unmarkedText($unmarkedText);
1110 
1111                 # compile the inline
1112                 $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']);
1113 
1114                 # remove the examined text
1115                 $text = substr($text, $Inline['position'] + $Inline['extent']);
1116 
1117                 continue 2;
1118             }
1119 
1120             # the marker does not belong to an inline
1121 
1122             $unmarkedText = substr($text, 0, $markerPosition + 1);
1123 
1124             $markup .= $this->unmarkedText($unmarkedText);
1125 
1126             $text = substr($text, $markerPosition + 1);
1127         }
1128 
1129         $markup .= $this->unmarkedText($text);
1130 
1131         return $markup;
1132     }
1133 
1134     #
1135     # ~
1136     #
1137 
1138     protected function inlineCode($Excerpt)
1139     {
1140         $marker = $Excerpt['text'][0];
1141 
1142         if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
1143         {
1144             $text = $matches[2];
1145             $text = preg_replace("/[ ]*\n/", ' ', $text);
1146 
1147             return array(
1148                 'extent' => strlen($matches[0]),
1149                 'element' => array(
1150                     'name' => 'code',
1151                     'text' => $text,
1152                 ),
1153             );
1154         }
1155     }
1156 
1157     protected function inlineEmailTag($Excerpt)
1158     {
1159         if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches))
1160         {
1161             $url = $matches[1];
1162 
1163             if ( ! isset($matches[2]))
1164             {
1165                 $url = 'mailto:' . $url;
1166             }
1167 
1168             return array(
1169                 'extent' => strlen($matches[0]),
1170                 'element' => array(
1171                     'name' => 'a',
1172                     'text' => $matches[1],
1173                     'attributes' => array(
1174                         'href' => $url,
1175                     ),
1176                 ),
1177             );
1178         }
1179     }
1180 
1181     protected function inlineEmphasis($Excerpt)
1182     {
1183         if ( ! isset($Excerpt['text'][1]))
1184         {
1185             return;
1186         }
1187 
1188         $marker = $Excerpt['text'][0];
1189 
1190         if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches))
1191         {
1192             $emphasis = 'strong';
1193         }
1194         elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches))
1195         {
1196             $emphasis = 'em';
1197         }
1198         else
1199         {
1200             return;
1201         }
1202 
1203         return array(
1204             'extent' => strlen($matches[0]),
1205             'element' => array(
1206                 'name' => $emphasis,
1207                 'handler' => 'line',
1208                 'text' => $matches[1],
1209             ),
1210         );
1211     }
1212 
1213     protected function inlineEscapeSequence($Excerpt)
1214     {
1215         if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters))
1216         {
1217             return array(
1218                 'markup' => $Excerpt['text'][1],
1219                 'extent' => 2,
1220             );
1221         }
1222     }
1223 
1224     protected function inlineImage($Excerpt)
1225     {
1226         if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[')
1227         {
1228             return;
1229         }
1230 
1231         $Excerpt['text']= substr($Excerpt['text'], 1);
1232 
1233         $Link = $this->inlineLink($Excerpt);
1234 
1235         if ($Link === null)
1236         {
1237             return;
1238         }
1239 
1240         $Inline = array(
1241             'extent' => $Link['extent'] + 1,
1242             'element' => array(
1243                 'name' => 'img',
1244                 'attributes' => array(
1245                     'src' => $Link['element']['attributes']['href'],
1246                     'alt' => $Link['element']['text'],
1247                 ),
1248             ),
1249         );
1250 
1251         $Inline['element']['attributes'] += $Link['element']['attributes'];
1252 
1253         unset($Inline['element']['attributes']['href']);
1254 
1255         return $Inline;
1256     }
1257 
1258     protected function inlineLink($Excerpt)
1259     {
1260         $Element = array(
1261             'name' => 'a',
1262             'handler' => 'line',
1263             'nonNestables' => array('Url', 'Link'),
1264             'text' => null,
1265             'attributes' => array(
1266                 'href' => null,
1267                 'title' => null,
1268             ),
1269         );
1270 
1271         $extent = 0;
1272 
1273         $remainder = $Excerpt['text'];
1274 
1275         if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches))
1276         {
1277             $Element['text'] = $matches[1];
1278 
1279             $extent += strlen($matches[0]);
1280 
1281             $remainder = substr($remainder, $extent);
1282         }
1283         else
1284         {
1285             return;
1286         }
1287 
1288         if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches))
1289         {
1290             $Element['attributes']['href'] = $matches[1];
1291 
1292             if (isset($matches[2]))
1293             {
1294                 $Element['attributes']['title'] = substr($matches[2], 1, - 1);
1295             }
1296 
1297             $extent += strlen($matches[0]);
1298         }
1299         else
1300         {
1301             if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches))
1302             {
1303                 $definition = strlen($matches[1]) ? $matches[1] : $Element['text'];
1304                 $definition = strtolower($definition);
1305 
1306                 $extent += strlen($matches[0]);
1307             }
1308             else
1309             {
1310                 $definition = strtolower($Element['text']);
1311             }
1312 
1313             if ( ! isset($this->DefinitionData['Reference'][$definition]))
1314             {
1315                 return;
1316             }
1317 
1318             $Definition = $this->DefinitionData['Reference'][$definition];
1319 
1320             $Element['attributes']['href'] = $Definition['url'];
1321             $Element['attributes']['title'] = $Definition['title'];
1322         }
1323 
1324         return array(
1325             'extent' => $extent,
1326             'element' => $Element,
1327         );
1328     }
1329 
1330     protected function inlineMarkup($Excerpt)
1331     {
1332         if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false)
1333         {
1334             return;
1335         }
1336 
1337         if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches))
1338         {
1339             return array(
1340                 'markup' => $matches[0],
1341                 'extent' => strlen($matches[0]),
1342             );
1343         }
1344 
1345         if ($Excerpt['text'][1] === '!' and preg_match('/^<!---?[^>-](?:-?[^-])*-->/s', $Excerpt['text'], $matches))
1346         {
1347             return array(
1348                 'markup' => $matches[0],
1349                 'extent' => strlen($matches[0]),
1350             );
1351         }
1352 
1353         if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches))
1354         {
1355             return array(
1356                 'markup' => $matches[0],
1357                 'extent' => strlen($matches[0]),
1358             );
1359         }
1360     }
1361 
1362     protected function inlineSpecialCharacter($Excerpt)
1363     {
1364         if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text']))
1365         {
1366             return array(
1367                 'markup' => '&amp;',
1368                 'extent' => 1,
1369             );
1370         }
1371 
1372         $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot');
1373 
1374         if (isset($SpecialCharacter[$Excerpt['text'][0]]))
1375         {
1376             return array(
1377                 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';',
1378                 'extent' => 1,
1379             );
1380         }
1381     }
1382 
1383     protected function inlineStrikethrough($Excerpt)
1384     {
1385         if ( ! isset($Excerpt['text'][1]))
1386         {
1387             return;
1388         }
1389 
1390         if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches))
1391         {
1392             return array(
1393                 'extent' => strlen($matches[0]),
1394                 'element' => array(
1395                     'name' => 'del',
1396                     'text' => $matches[1],
1397                     'handler' => 'line',
1398                 ),
1399             );
1400         }
1401     }
1402 
1403     protected function inlineUrl($Excerpt)
1404     {
1405         if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/')
1406         {
1407             return;
1408         }
1409 
1410         if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE))
1411         {
1412             $url = $matches[0][0];
1413 
1414             $Inline = array(
1415                 'extent' => strlen($matches[0][0]),
1416                 'position' => $matches[0][1],
1417                 'element' => array(
1418                     'name' => 'a',
1419                     'text' => $url,
1420                     'attributes' => array(
1421                         'href' => $url,
1422                     ),
1423                 ),
1424             );
1425 
1426             return $Inline;
1427         }
1428     }
1429 
1430     protected function inlineUrlTag($Excerpt)
1431     {
1432         if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches))
1433         {
1434             $url = $matches[1];
1435 
1436             return array(
1437                 'extent' => strlen($matches[0]),
1438                 'element' => array(
1439                     'name' => 'a',
1440                     'text' => $url,
1441                     'attributes' => array(
1442                         'href' => $url,
1443                     ),
1444                 ),
1445             );
1446         }
1447     }
1448 
1449     # ~
1450 
1451     protected function unmarkedText($text)
1452     {
1453         if ($this->breaksEnabled)
1454         {
1455             $text = preg_replace('/[ ]*\n/', "<br />\n", $text);
1456         }
1457         else
1458         {
1459             $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "<br />\n", $text);
1460             $text = str_replace(" \n", "\n", $text);
1461         }
1462 
1463         return $text;
1464     }
1465 
1466     #
1467     # Handlers
1468     #
1469 
1470     protected function element(array $Element)
1471     {
1472         if ($this->safeMode)
1473         {
1474             $Element = $this->sanitiseElement($Element);
1475         }
1476 
1477         $markup = '<'.$Element['name'];
1478 
1479         if (isset($Element['attributes']))
1480         {
1481             foreach ($Element['attributes'] as $name => $value)
1482             {
1483                 if ($value === null)
1484                 {
1485                     continue;
1486                 }
1487 
1488                 $markup .= ' '.$name.'="'.self::escape($value).'"';
1489             }
1490         }
1491 
1492         $permitRawHtml = false;
1493 
1494         if (isset($Element['text']))
1495         {
1496             $text = $Element['text'];
1497         }
1498         // very strongly consider an alternative if you're writing an
1499         // extension
1500         elseif (isset($Element['rawHtml']))
1501         {
1502             $text = $Element['rawHtml'];
1503             $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode'];
1504             $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode;
1505         }
1506 
1507         if (isset($text))
1508         {
1509             $markup .= '>';
1510 
1511             if (!isset($Element['nonNestables']))
1512             {
1513                 $Element['nonNestables'] = array();
1514             }
1515 
1516             if (isset($Element['handler']))
1517             {
1518                 $markup .= $this->{$Element['handler']}($text, $Element['nonNestables']);
1519             }
1520             elseif (!$permitRawHtml)
1521             {
1522                 $markup .= self::escape($text, true);
1523             }
1524             else
1525             {
1526                 $markup .= $text;
1527             }
1528 
1529             $markup .= '</'.$Element['name'].'>';
1530         }
1531         else
1532         {
1533             $markup .= ' />';
1534         }
1535 
1536         return $markup;
1537     }
1538 
1539     protected function elements(array $Elements)
1540     {
1541         $markup = '';
1542 
1543         foreach ($Elements as $Element)
1544         {
1545             $markup .= "\n" . $this->element($Element);
1546         }
1547 
1548         $markup .= "\n";
1549 
1550         return $markup;
1551     }
1552 
1553     # ~
1554 
1555     protected function li($lines)
1556     {
1557         $markup = $this->lines($lines);
1558 
1559         $trimmedMarkup = trim($markup);
1560 
1561         if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '<p>')
1562         {
1563             $markup = $trimmedMarkup;
1564             $markup = substr($markup, 3);
1565 
1566             $position = strpos($markup, "</p>");
1567 
1568             $markup = substr_replace($markup, '', $position, 4);
1569         }
1570 
1571         return $markup;
1572     }
1573 
1574     #
1575     # Deprecated Methods
1576     #
1577 
1578     function parse($text)
1579     {
1580         $markup = $this->text($text);
1581 
1582         return $markup;
1583     }
1584 
1585     protected function sanitiseElement(array $Element)
1586     {
1587         static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/';
1588         static $safeUrlNameToAtt  = array(
1589             'a'   => 'href',
1590             'img' => 'src',
1591         );
1592 
1593         if (isset($safeUrlNameToAtt[$Element['name']]))
1594         {
1595             $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]);
1596         }
1597 
1598         if ( ! empty($Element['attributes']))
1599         {
1600             foreach ($Element['attributes'] as $att => $val)
1601             {
1602                 # filter out badly parsed attribute
1603                 if ( ! preg_match($goodAttribute, $att))
1604                 {
1605                     unset($Element['attributes'][$att]);
1606                 }
1607                 # dump onevent attribute
1608                 elseif (self::striAtStart($att, 'on'))
1609                 {
1610                     unset($Element['attributes'][$att]);
1611                 }
1612             }
1613         }
1614 
1615         return $Element;
1616     }
1617 
1618     protected function filterUnsafeUrlInAttribute(array $Element, $attribute)
1619     {
1620         foreach ($this->safeLinksWhitelist as $scheme)
1621         {
1622             if (self::striAtStart($Element['attributes'][$attribute], $scheme))
1623             {
1624                 return $Element;
1625             }
1626         }
1627 
1628         $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]);
1629 
1630         return $Element;
1631     }
1632 
1633     #
1634     # Static Methods
1635     #
1636 
1637     protected static function escape($text, $allowQuotes = false)
1638     {
1639         return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8');
1640     }
1641 
1642     protected static function striAtStart($string, $needle)
1643     {
1644         $len = strlen($needle);
1645 
1646         if ($len > strlen($string))
1647         {
1648             return false;
1649         }
1650         else
1651         {
1652             return strtolower(substr($string, 0, $len)) === strtolower($needle);
1653         }
1654     }
1655 
1656     static function instance($name = 'default')
1657     {
1658         if (isset(self::$instances[$name]))
1659         {
1660             return self::$instances[$name];
1661         }
1662 
1663         $instance = new static();
1664 
1665         self::$instances[$name] = $instance;
1666 
1667         return $instance;
1668     }
1669 
1670     private static $instances = array();
1671 
1672     #
1673     # Fields
1674     #
1675 
1676     protected $DefinitionData;
1677 
1678     #
1679     # Read-Only
1680 
1681     protected $specialCharacters = array(
1682         '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|',
1683     );
1684 
1685     protected $StrongRegex = array(
1686         '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s',
1687         '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us',
1688     );
1689 
1690     protected $EmRegex = array(
1691         '*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s',
1692         '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us',
1693     );
1694 
1695     protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?';
1696 
1697     protected $voidElements = array(
1698         'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source',
1699     );
1700 
1701     protected $textLevelElements = array(
1702         'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont',
1703         'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing',
1704         'i', 'rp', 'del', 'code',          'strike', 'marquee',
1705         'q', 'rt', 'ins', 'font',          'strong',
1706         's', 'tt', 'kbd', 'mark',
1707         'u', 'xm', 'sub', 'nobr',
1708                    'sup', 'ruby',
1709                    'var', 'span',
1710                    'wbr', 'time',
1711     );
1712 }
1713