1<?php
2
3#
4#
5# Parsedown
6# http://parsedown.org
7#
8# (c) Emanuil Rusev
9# http://erusev.com
10#
11# For the full license information, view the LICENSE file that was distributed
12# with this source code.
13#
14#
15
16class Parsedown
17{
18    # ~
19
20    const version = '1.7.4';
21
22    # ~
23
24    function text($text)
25    {
26        # make sure no definitions are set
27        $this->DefinitionData = array();
28
29        # standardize line breaks
30        $text = str_replace(array("\r\n", "\r"), "\n", $text);
31
32        # remove surrounding line breaks
33        $text = trim($text, "\n");
34
35        # split text into lines
36        $lines = explode("\n", $text);
37
38        # iterate through lines to identify blocks
39        $markup = $this->lines($lines);
40
41        # trim line breaks
42        $markup = trim($markup, "\n");
43
44        return $markup;
45    }
46
47    #
48    # Setters
49    #
50
51    function setBreaksEnabled($breaksEnabled)
52    {
53        $this->breaksEnabled = $breaksEnabled;
54
55        return $this;
56    }
57
58    protected $breaksEnabled;
59
60    function setMarkupEscaped($markupEscaped)
61    {
62        $this->markupEscaped = $markupEscaped;
63
64        return $this;
65    }
66
67    protected $markupEscaped;
68
69    function setUrlsLinked($urlsLinked)
70    {
71        $this->urlsLinked = $urlsLinked;
72
73        return $this;
74    }
75
76    protected $urlsLinked = true;
77
78    function setSafeMode($safeMode)
79    {
80        $this->safeMode = (bool) $safeMode;
81
82        return $this;
83    }
84
85    protected $safeMode;
86
87    protected $safeLinksWhitelist = array(
88        'http://',
89        'https://',
90        'ftp://',
91        'ftps://',
92        'mailto:',
93        'data:image/png;base64,',
94        'data:image/gif;base64,',
95        'data:image/jpeg;base64,',
96        'irc:',
97        'ircs:',
98        'git:',
99        'ssh:',
100        'news:',
101        'steam:',
102    );
103
104    #
105    # Lines
106    #
107
108    protected $BlockTypes = array(
109        '#' => array('Header'),
110        '*' => array('Rule', 'List'),
111        '+' => array('List'),
112        '-' => array('SetextHeader', 'Table', 'Rule', 'List'),
113        '0' => array('List'),
114        '1' => array('List'),
115        '2' => array('List'),
116        '3' => array('List'),
117        '4' => array('List'),
118        '5' => array('List'),
119        '6' => array('List'),
120        '7' => array('List'),
121        '8' => array('List'),
122        '9' => array('List'),
123        ':' => array('Table'),
124        '<' => array('Comment', 'Markup'),
125        '=' => array('SetextHeader'),
126        '>' => array('Quote'),
127        '[' => array('Reference'),
128        '_' => array('Rule'),
129        '`' => array('FencedCode'),
130        '|' => array('Table'),
131        '~' => array('FencedCode'),
132    );
133
134    # ~
135
136    protected $unmarkedBlockTypes = array(
137        'Code',
138    );
139
140    #
141    # Blocks
142    #
143
144    protected function lines(array $lines)
145    {
146        $CurrentBlock = null;
147
148        foreach ($lines as $line)
149        {
150            if (chop($line) === '')
151            {
152                if (isset($CurrentBlock))
153                {
154                    $CurrentBlock['interrupted'] = true;
155                }
156
157                continue;
158            }
159
160            if (strpos($line, "\t") !== false)
161            {
162                $parts = explode("\t", $line);
163
164                $line = $parts[0];
165
166                unset($parts[0]);
167
168                foreach ($parts as $part)
169                {
170                    $shortage = 4 - mb_strlen($line, 'utf-8') % 4;
171
172                    $line .= str_repeat(' ', $shortage);
173                    $line .= $part;
174                }
175            }
176
177            $indent = 0;
178
179            while (isset($line[$indent]) and $line[$indent] === ' ')
180            {
181                $indent ++;
182            }
183
184            $text = $indent > 0 ? substr($line, $indent) : $line;
185
186            # ~
187
188            $Line = array('body' => $line, 'indent' => $indent, 'text' => $text);
189
190            # ~
191
192            if (isset($CurrentBlock['continuable']))
193            {
194                $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock);
195
196                if (isset($Block))
197                {
198                    $CurrentBlock = $Block;
199
200                    continue;
201                }
202                else
203                {
204                    if ($this->isBlockCompletable($CurrentBlock['type']))
205                    {
206                        $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
207                    }
208                }
209            }
210
211            # ~
212
213            $marker = $text[0];
214
215            # ~
216
217            $blockTypes = $this->unmarkedBlockTypes;
218
219            if (isset($this->BlockTypes[$marker]))
220            {
221                foreach ($this->BlockTypes[$marker] as $blockType)
222                {
223                    $blockTypes []= $blockType;
224                }
225            }
226
227            #
228            # ~
229
230            foreach ($blockTypes as $blockType)
231            {
232                $Block = $this->{'block'.$blockType}($Line, $CurrentBlock);
233
234                if (isset($Block))
235                {
236                    $Block['type'] = $blockType;
237
238                    if ( ! isset($Block['identified']))
239                    {
240                        $Blocks []= $CurrentBlock;
241
242                        $Block['identified'] = true;
243                    }
244
245                    if ($this->isBlockContinuable($blockType))
246                    {
247                        $Block['continuable'] = true;
248                    }
249
250                    $CurrentBlock = $Block;
251
252                    continue 2;
253                }
254            }
255
256            # ~
257
258            if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted']))
259            {
260                $CurrentBlock['element']['text'] .= "\n".$text;
261            }
262            else
263            {
264                $Blocks []= $CurrentBlock;
265
266                $CurrentBlock = $this->paragraph($Line);
267
268                $CurrentBlock['identified'] = true;
269            }
270        }
271
272        # ~
273
274        if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type']))
275        {
276            $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock);
277        }
278
279        # ~
280
281        $Blocks []= $CurrentBlock;
282
283        unset($Blocks[0]);
284
285        # ~
286
287        $markup = '';
288
289        foreach ($Blocks as $Block)
290        {
291            if (isset($Block['hidden']))
292            {
293                continue;
294            }
295
296            $markup .= "\n";
297            $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']);
298        }
299
300        $markup .= "\n";
301
302        # ~
303
304        return $markup;
305    }
306
307    protected function isBlockContinuable($Type)
308    {
309        return method_exists($this, 'block'.$Type.'Continue');
310    }
311
312    protected function isBlockCompletable($Type)
313    {
314        return method_exists($this, 'block'.$Type.'Complete');
315    }
316
317    #
318    # Code
319
320    protected function blockCode($Line, $Block = null)
321    {
322        if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted']))
323        {
324            return;
325        }
326
327        if ($Line['indent'] >= 4)
328        {
329            $text = substr($Line['body'], 4);
330
331            $Block = array(
332                'element' => array(
333                    'name' => 'pre',
334                    'handler' => 'element',
335                    'text' => array(
336                        'name' => 'code',
337                        'text' => $text,
338                    ),
339                ),
340            );
341
342            return $Block;
343        }
344    }
345
346    protected function blockCodeContinue($Line, $Block)
347    {
348        if ($Line['indent'] >= 4)
349        {
350            if (isset($Block['interrupted']))
351            {
352                $Block['element']['text']['text'] .= "\n";
353
354                unset($Block['interrupted']);
355            }
356
357            $Block['element']['text']['text'] .= "\n";
358
359            $text = substr($Line['body'], 4);
360
361            $Block['element']['text']['text'] .= $text;
362
363            return $Block;
364        }
365    }
366
367    protected function blockCodeComplete($Block)
368    {
369        $text = $Block['element']['text']['text'];
370
371        $Block['element']['text']['text'] = $text;
372
373        return $Block;
374    }
375
376    #
377    # Comment
378
379    protected function blockComment($Line)
380    {
381        if ($this->markupEscaped or $this->safeMode)
382        {
383            return;
384        }
385
386        if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!')
387        {
388            $Block = array(
389                'markup' => $Line['body'],
390            );
391
392            if (preg_match('/-->$/', $Line['text']))
393            {
394                $Block['closed'] = true;
395            }
396
397            return $Block;
398        }
399    }
400
401    protected function blockCommentContinue($Line, array $Block)
402    {
403        if (isset($Block['closed']))
404        {
405            return;
406        }
407
408        $Block['markup'] .= "\n" . $Line['body'];
409
410        if (preg_match('/-->$/', $Line['text']))
411        {
412            $Block['closed'] = true;
413        }
414
415        return $Block;
416    }
417
418    #
419    # Fenced Code
420
421    protected function blockFencedCode($Line)
422    {
423        if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches))
424        {
425            $Element = array(
426                'name' => 'code',
427                'text' => '',
428            );
429
430            if (isset($matches[1]))
431            {
432                /**
433                 * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes
434                 * Every HTML element may have a class attribute specified.
435                 * The attribute, if specified, must have a value that is a set
436                 * of space-separated tokens representing the various classes
437                 * that the element belongs to.
438                 * [...]
439                 * The space characters, for the purposes of this specification,
440                 * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab),
441                 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and
442                 * U+000D CARRIAGE RETURN (CR).
443                 */
444                $language = substr($matches[1], 0, strcspn($matches[1], " \t\n\f\r"));
445
446                $class = 'language-'.$language;
447
448                $Element['attributes'] = array(
449                    'class' => $class,
450                );
451            }
452
453            $Block = array(
454                'char' => $Line['text'][0],
455                'element' => array(
456                    'name' => 'pre',
457                    'handler' => 'element',
458                    'text' => $Element,
459                ),
460            );
461
462            return $Block;
463        }
464    }
465
466    protected function blockFencedCodeContinue($Line, $Block)
467    {
468        if (isset($Block['complete']))
469        {
470            return;
471        }
472
473        if (isset($Block['interrupted']))
474        {
475            $Block['element']['text']['text'] .= "\n";
476
477            unset($Block['interrupted']);
478        }
479
480        if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text']))
481        {
482            $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1);
483
484            $Block['complete'] = true;
485
486            return $Block;
487        }
488
489        $Block['element']['text']['text'] .= "\n".$Line['body'];
490
491        return $Block;
492    }
493
494    protected function blockFencedCodeComplete($Block)
495    {
496        $text = $Block['element']['text']['text'];
497
498        $Block['element']['text']['text'] = $text;
499
500        return $Block;
501    }
502
503    #
504    # Header
505
506    protected function blockHeader($Line)
507    {
508        if (isset($Line['text'][1]))
509        {
510            $level = 1;
511
512            while (isset($Line['text'][$level]) and $Line['text'][$level] === '#')
513            {
514                $level ++;
515            }
516
517            if ($level > 6)
518            {
519                return;
520            }
521
522            $text = trim($Line['text'], '# ');
523
524            $Block = array(
525                'element' => array(
526                    'name' => 'h' . min(6, $level),
527                    'text' => $text,
528                    'handler' => 'line',
529                ),
530            );
531
532            return $Block;
533        }
534    }
535
536    #
537    # List
538
539    protected function blockList($Line)
540    {
541        list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]');
542
543        if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches))
544        {
545            $Block = array(
546                'indent' => $Line['indent'],
547                'pattern' => $pattern,
548                'element' => array(
549                    'name' => $name,
550                    'handler' => 'elements',
551                ),
552            );
553
554            if($name === 'ol')
555            {
556                $listStart = stristr($matches[0], '.', true);
557
558                if($listStart !== '1')
559                {
560                    $Block['element']['attributes'] = array('start' => $listStart);
561                }
562            }
563
564            $Block['li'] = array(
565                'name' => 'li',
566                'handler' => 'li',
567                'text' => array(
568                    $matches[2],
569                ),
570            );
571
572            $Block['element']['text'] []= & $Block['li'];
573
574            return $Block;
575        }
576    }
577
578    protected function blockListContinue($Line, array $Block)
579    {
580        if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches))
581        {
582            if (isset($Block['interrupted']))
583            {
584                $Block['li']['text'] []= '';
585
586                $Block['loose'] = true;
587
588                unset($Block['interrupted']);
589            }
590
591            unset($Block['li']);
592
593            $text = isset($matches[1]) ? $matches[1] : '';
594
595            $Block['li'] = array(
596                'name' => 'li',
597                'handler' => 'li',
598                'text' => array(
599                    $text,
600                ),
601            );
602
603            $Block['element']['text'] []= & $Block['li'];
604
605            return $Block;
606        }
607
608        if ($Line['text'][0] === '[' and $this->blockReference($Line))
609        {
610            return $Block;
611        }
612
613        if ( ! isset($Block['interrupted']))
614        {
615            $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
616
617            $Block['li']['text'] []= $text;
618
619            return $Block;
620        }
621
622        if ($Line['indent'] > 0)
623        {
624            $Block['li']['text'] []= '';
625
626            $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']);
627
628            $Block['li']['text'] []= $text;
629
630            unset($Block['interrupted']);
631
632            return $Block;
633        }
634    }
635
636    protected function blockListComplete(array $Block)
637    {
638        if (isset($Block['loose']))
639        {
640            foreach ($Block['element']['text'] as &$li)
641            {
642                if (end($li['text']) !== '')
643                {
644                    $li['text'] []= '';
645                }
646            }
647        }
648
649        return $Block;
650    }
651
652    #
653    # Quote
654
655    protected function blockQuote($Line)
656    {
657        if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
658        {
659            $Block = array(
660                'element' => array(
661                    'name' => 'blockquote',
662                    'handler' => 'lines',
663                    'text' => (array) $matches[1],
664                ),
665            );
666
667            return $Block;
668        }
669    }
670
671    protected function blockQuoteContinue($Line, array $Block)
672    {
673        if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches))
674        {
675            if (isset($Block['interrupted']))
676            {
677                $Block['element']['text'] []= '';
678
679                unset($Block['interrupted']);
680            }
681
682            $Block['element']['text'] []= $matches[1];
683
684            return $Block;
685        }
686
687        if ( ! isset($Block['interrupted']))
688        {
689            $Block['element']['text'] []= $Line['text'];
690
691            return $Block;
692        }
693    }
694
695    #
696    # Rule
697
698    protected function blockRule($Line)
699    {
700        if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text']))
701        {
702            $Block = array(
703                'element' => array(
704                    'name' => 'hr'
705                ),
706            );
707
708            return $Block;
709        }
710    }
711
712    #
713    # Setext
714
715    protected function blockSetextHeader($Line, array $Block = null)
716    {
717        if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
718        {
719            return;
720        }
721
722        if (chop($Line['text'], $Line['text'][0]) === '')
723        {
724            $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2';
725
726            return $Block;
727        }
728    }
729
730    #
731    # Markup
732
733    protected function blockMarkup($Line)
734    {
735        if ($this->markupEscaped or $this->safeMode)
736        {
737            return;
738        }
739
740        if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches))
741        {
742            $element = strtolower($matches[1]);
743
744            if (in_array($element, $this->textLevelElements))
745            {
746                return;
747            }
748
749            $Block = array(
750                'name' => $matches[1],
751                'depth' => 0,
752                'markup' => $Line['text'],
753            );
754
755            $length = strlen($matches[0]);
756
757            $remainder = substr($Line['text'], $length);
758
759            if (trim($remainder) === '')
760            {
761                if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
762                {
763                    $Block['closed'] = true;
764
765                    $Block['void'] = true;
766                }
767            }
768            else
769            {
770                if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
771                {
772                    return;
773                }
774
775                if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder))
776                {
777                    $Block['closed'] = true;
778                }
779            }
780
781            return $Block;
782        }
783    }
784
785    protected function blockMarkupContinue($Line, array $Block)
786    {
787        if (isset($Block['closed']))
788        {
789            return;
790        }
791
792        if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open
793        {
794            $Block['depth'] ++;
795        }
796
797        if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close
798        {
799            if ($Block['depth'] > 0)
800            {
801                $Block['depth'] --;
802            }
803            else
804            {
805                $Block['closed'] = true;
806            }
807        }
808
809        if (isset($Block['interrupted']))
810        {
811            $Block['markup'] .= "\n";
812
813            unset($Block['interrupted']);
814        }
815
816        $Block['markup'] .= "\n".$Line['body'];
817
818        return $Block;
819    }
820
821    #
822    # Reference
823
824    protected function blockReference($Line)
825    {
826        if (preg_match('/^\[(.+?)\]:[ ]*<?(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches))
827        {
828            $id = strtolower($matches[1]);
829
830            $Data = array(
831                'url' => $matches[2],
832                'title' => null,
833            );
834
835            if (isset($matches[3]))
836            {
837                $Data['title'] = $matches[3];
838            }
839
840            $this->DefinitionData['Reference'][$id] = $Data;
841
842            $Block = array(
843                'hidden' => true,
844            );
845
846            return $Block;
847        }
848    }
849
850    #
851    # Table
852
853    protected function blockTable($Line, array $Block = null)
854    {
855        if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted']))
856        {
857            return;
858        }
859
860        if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '')
861        {
862            $alignments = array();
863
864            $divider = $Line['text'];
865
866            $divider = trim($divider);
867            $divider = trim($divider, '|');
868
869            $dividerCells = explode('|', $divider);
870
871            foreach ($dividerCells as $dividerCell)
872            {
873                $dividerCell = trim($dividerCell);
874
875                if ($dividerCell === '')
876                {
877                    continue;
878                }
879
880                $alignment = null;
881
882                if ($dividerCell[0] === ':')
883                {
884                    $alignment = 'left';
885                }
886
887                if (substr($dividerCell, - 1) === ':')
888                {
889                    $alignment = $alignment === 'left' ? 'center' : 'right';
890                }
891
892                $alignments []= $alignment;
893            }
894
895            # ~
896
897            $HeaderElements = array();
898
899            $header = $Block['element']['text'];
900
901            $header = trim($header);
902            $header = trim($header, '|');
903
904            $headerCells = explode('|', $header);
905
906            foreach ($headerCells as $index => $headerCell)
907            {
908                $headerCell = trim($headerCell);
909
910                $HeaderElement = array(
911                    'name' => 'th',
912                    'text' => $headerCell,
913                    'handler' => 'line',
914                );
915
916                if (isset($alignments[$index]))
917                {
918                    $alignment = $alignments[$index];
919
920                    $HeaderElement['attributes'] = array(
921                        'style' => 'text-align: '.$alignment.';',
922                    );
923                }
924
925                $HeaderElements []= $HeaderElement;
926            }
927
928            # ~
929
930            $Block = array(
931                'alignments' => $alignments,
932                'identified' => true,
933                'element' => array(
934                    'name' => 'table',
935                    'handler' => 'elements',
936                ),
937            );
938
939            $Block['element']['text'] []= array(
940                'name' => 'thead',
941                'handler' => 'elements',
942            );
943
944            $Block['element']['text'] []= array(
945                'name' => 'tbody',
946                'handler' => 'elements',
947                'text' => array(),
948            );
949
950            $Block['element']['text'][0]['text'] []= array(
951                'name' => 'tr',
952                'handler' => 'elements',
953                'text' => $HeaderElements,
954            );
955
956            return $Block;
957        }
958    }
959
960    protected function blockTableContinue($Line, array $Block)
961    {
962        if (isset($Block['interrupted']))
963        {
964            return;
965        }
966
967        if ($Line['text'][0] === '|' or strpos($Line['text'], '|'))
968        {
969            $Elements = array();
970
971            $row = $Line['text'];
972
973            $row = trim($row);
974            $row = trim($row, '|');
975
976            preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches);
977
978            foreach ($matches[0] as $index => $cell)
979            {
980                $cell = trim($cell);
981
982                $Element = array(
983                    'name' => 'td',
984                    'handler' => 'line',
985                    'text' => $cell,
986                );
987
988                if (isset($Block['alignments'][$index]))
989                {
990                    $Element['attributes'] = array(
991                        'style' => 'text-align: '.$Block['alignments'][$index].';',
992                    );
993                }
994
995                $Elements []= $Element;
996            }
997
998            $Element = array(
999                'name' => 'tr',
1000                'handler' => 'elements',
1001                'text' => $Elements,
1002            );
1003
1004            $Block['element']['text'][1]['text'] []= $Element;
1005
1006            return $Block;
1007        }
1008    }
1009
1010    #
1011    # ~
1012    #
1013
1014    protected function paragraph($Line)
1015    {
1016        $Block = array(
1017            'element' => array(
1018                'name' => 'p',
1019                'text' => $Line['text'],
1020                'handler' => 'line',
1021            ),
1022        );
1023
1024        return $Block;
1025    }
1026
1027    #
1028    # Inline Elements
1029    #
1030
1031    protected $InlineTypes = array(
1032        '"' => array('SpecialCharacter'),
1033        '!' => array('Image'),
1034        '&' => array('SpecialCharacter'),
1035        '*' => array('Emphasis'),
1036        ':' => array('Url'),
1037        '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'),
1038        '>' => array('SpecialCharacter'),
1039        '[' => array('Link'),
1040        '_' => array('Emphasis'),
1041        '`' => array('Code'),
1042        '~' => array('Strikethrough'),
1043        '\\' => array('EscapeSequence'),
1044    );
1045
1046    # ~
1047
1048    protected $inlineMarkerList = '!"*_&[:<>`~\\';
1049
1050    #
1051    # ~
1052    #
1053
1054    public function line($text, $nonNestables=array())
1055    {
1056        $markup = '';
1057
1058        # $excerpt is based on the first occurrence of a marker
1059
1060        while ($excerpt = strpbrk($text, $this->inlineMarkerList))
1061        {
1062            $marker = $excerpt[0];
1063
1064            $markerPosition = strpos($text, $marker);
1065
1066            $Excerpt = array('text' => $excerpt, 'context' => $text);
1067
1068            foreach ($this->InlineTypes[$marker] as $inlineType)
1069            {
1070                # check to see if the current inline type is nestable in the current context
1071
1072                if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables))
1073                {
1074                    continue;
1075                }
1076
1077                $Inline = $this->{'inline'.$inlineType}($Excerpt);
1078
1079                if ( ! isset($Inline))
1080                {
1081                    continue;
1082                }
1083
1084                # makes sure that the inline belongs to "our" marker
1085
1086                if (isset($Inline['position']) and $Inline['position'] > $markerPosition)
1087                {
1088                    continue;
1089                }
1090
1091                # sets a default inline position
1092
1093                if ( ! isset($Inline['position']))
1094                {
1095                    $Inline['position'] = $markerPosition;
1096                }
1097
1098                # cause the new element to 'inherit' our non nestables
1099
1100                foreach ($nonNestables as $non_nestable)
1101                {
1102                    $Inline['element']['nonNestables'][] = $non_nestable;
1103                }
1104
1105                # the text that comes before the inline
1106                $unmarkedText = substr($text, 0, $Inline['position']);
1107
1108                # compile the unmarked text
1109                $markup .= $this->unmarkedText($unmarkedText);
1110
1111                # compile the inline
1112                $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']);
1113
1114                # remove the examined text
1115                $text = substr($text, $Inline['position'] + $Inline['extent']);
1116
1117                continue 2;
1118            }
1119
1120            # the marker does not belong to an inline
1121
1122            $unmarkedText = substr($text, 0, $markerPosition + 1);
1123
1124            $markup .= $this->unmarkedText($unmarkedText);
1125
1126            $text = substr($text, $markerPosition + 1);
1127        }
1128
1129        $markup .= $this->unmarkedText($text);
1130
1131        return $markup;
1132    }
1133
1134    #
1135    # ~
1136    #
1137
1138    protected function inlineCode($Excerpt)
1139    {
1140        $marker = $Excerpt['text'][0];
1141
1142        if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
1143        {
1144            $text = $matches[2];
1145            $text = preg_replace("/[ ]*\n/", ' ', $text);
1146
1147            return array(
1148                'extent' => strlen($matches[0]),
1149                'element' => array(
1150                    'name' => 'code',
1151                    'text' => $text,
1152                ),
1153            );
1154        }
1155    }
1156
1157    protected function inlineEmailTag($Excerpt)
1158    {
1159        if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches))
1160        {
1161            $url = $matches[1];
1162
1163            if ( ! isset($matches[2]))
1164            {
1165                $url = 'mailto:' . $url;
1166            }
1167
1168            return array(
1169                'extent' => strlen($matches[0]),
1170                'element' => array(
1171                    'name' => 'a',
1172                    'text' => $matches[1],
1173                    'attributes' => array(
1174                        'href' => $url,
1175                    ),
1176                ),
1177            );
1178        }
1179    }
1180
1181    protected function inlineEmphasis($Excerpt)
1182    {
1183        if ( ! isset($Excerpt['text'][1]))
1184        {
1185            return;
1186        }
1187
1188        $marker = $Excerpt['text'][0];
1189
1190        if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches))
1191        {
1192            $emphasis = 'strong';
1193        }
1194        elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches))
1195        {
1196            $emphasis = 'em';
1197        }
1198        else
1199        {
1200            return;
1201        }
1202
1203        return array(
1204            'extent' => strlen($matches[0]),
1205            'element' => array(
1206                'name' => $emphasis,
1207                'handler' => 'line',
1208                'text' => $matches[1],
1209            ),
1210        );
1211    }
1212
1213    protected function inlineEscapeSequence($Excerpt)
1214    {
1215        if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters))
1216        {
1217            return array(
1218                'markup' => $Excerpt['text'][1],
1219                'extent' => 2,
1220            );
1221        }
1222    }
1223
1224    protected function inlineImage($Excerpt)
1225    {
1226        if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[')
1227        {
1228            return;
1229        }
1230
1231        $Excerpt['text']= substr($Excerpt['text'], 1);
1232
1233        $Link = $this->inlineLink($Excerpt);
1234
1235        if ($Link === null)
1236        {
1237            return;
1238        }
1239
1240        $Inline = array(
1241            'extent' => $Link['extent'] + 1,
1242            'element' => array(
1243                'name' => 'img',
1244                'attributes' => array(
1245                    'src' => $Link['element']['attributes']['href'],
1246                    'alt' => $Link['element']['text'],
1247                ),
1248            ),
1249        );
1250
1251        $Inline['element']['attributes'] += $Link['element']['attributes'];
1252
1253        unset($Inline['element']['attributes']['href']);
1254
1255        return $Inline;
1256    }
1257
1258    protected function inlineLink($Excerpt)
1259    {
1260        $Element = array(
1261            'name' => 'a',
1262            'handler' => 'line',
1263            'nonNestables' => array('Url', 'Link'),
1264            'text' => null,
1265            'attributes' => array(
1266                'href' => null,
1267                'title' => null,
1268            ),
1269        );
1270
1271        $extent = 0;
1272
1273        $remainder = $Excerpt['text'];
1274
1275        if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches))
1276        {
1277            $Element['text'] = $matches[1];
1278
1279            $extent += strlen($matches[0]);
1280
1281            $remainder = substr($remainder, $extent);
1282        }
1283        else
1284        {
1285            return;
1286        }
1287
1288        if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches))
1289        {
1290            $Element['attributes']['href'] = $matches[1];
1291
1292            if (isset($matches[2]))
1293            {
1294                $Element['attributes']['title'] = substr($matches[2], 1, - 1);
1295            }
1296
1297            $extent += strlen($matches[0]);
1298        }
1299        else
1300        {
1301            if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches))
1302            {
1303                $definition = strlen($matches[1]) ? $matches[1] : $Element['text'];
1304                $definition = strtolower($definition);
1305
1306                $extent += strlen($matches[0]);
1307            }
1308            else
1309            {
1310                $definition = strtolower($Element['text']);
1311            }
1312
1313            if ( ! isset($this->DefinitionData['Reference'][$definition]))
1314            {
1315                return;
1316            }
1317
1318            $Definition = $this->DefinitionData['Reference'][$definition];
1319
1320            $Element['attributes']['href'] = $Definition['url'];
1321            $Element['attributes']['title'] = $Definition['title'];
1322        }
1323
1324        return array(
1325            'extent' => $extent,
1326            'element' => $Element,
1327        );
1328    }
1329
1330    protected function inlineMarkup($Excerpt)
1331    {
1332        if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false)
1333        {
1334            return;
1335        }
1336
1337        if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches))
1338        {
1339            return array(
1340                'markup' => $matches[0],
1341                'extent' => strlen($matches[0]),
1342            );
1343        }
1344
1345        if ($Excerpt['text'][1] === '!' and preg_match('/^<!---?[^>-](?:-?[^-])*-->/s', $Excerpt['text'], $matches))
1346        {
1347            return array(
1348                'markup' => $matches[0],
1349                'extent' => strlen($matches[0]),
1350            );
1351        }
1352
1353        if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches))
1354        {
1355            return array(
1356                'markup' => $matches[0],
1357                'extent' => strlen($matches[0]),
1358            );
1359        }
1360    }
1361
1362    protected function inlineSpecialCharacter($Excerpt)
1363    {
1364        if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text']))
1365        {
1366            return array(
1367                'markup' => '&amp;',
1368                'extent' => 1,
1369            );
1370        }
1371
1372        $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot');
1373
1374        if (isset($SpecialCharacter[$Excerpt['text'][0]]))
1375        {
1376            return array(
1377                'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';',
1378                'extent' => 1,
1379            );
1380        }
1381    }
1382
1383    protected function inlineStrikethrough($Excerpt)
1384    {
1385        if ( ! isset($Excerpt['text'][1]))
1386        {
1387            return;
1388        }
1389
1390        if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches))
1391        {
1392            return array(
1393                'extent' => strlen($matches[0]),
1394                'element' => array(
1395                    'name' => 'del',
1396                    'text' => $matches[1],
1397                    'handler' => 'line',
1398                ),
1399            );
1400        }
1401    }
1402
1403    protected function inlineUrl($Excerpt)
1404    {
1405        if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/')
1406        {
1407            return;
1408        }
1409
1410        if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE))
1411        {
1412            $url = $matches[0][0];
1413
1414            $Inline = array(
1415                'extent' => strlen($matches[0][0]),
1416                'position' => $matches[0][1],
1417                'element' => array(
1418                    'name' => 'a',
1419                    'text' => $url,
1420                    'attributes' => array(
1421                        'href' => $url,
1422                    ),
1423                ),
1424            );
1425
1426            return $Inline;
1427        }
1428    }
1429
1430    protected function inlineUrlTag($Excerpt)
1431    {
1432        if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches))
1433        {
1434            $url = $matches[1];
1435
1436            return array(
1437                'extent' => strlen($matches[0]),
1438                'element' => array(
1439                    'name' => 'a',
1440                    'text' => $url,
1441                    'attributes' => array(
1442                        'href' => $url,
1443                    ),
1444                ),
1445            );
1446        }
1447    }
1448
1449    # ~
1450
1451    protected function unmarkedText($text)
1452    {
1453        if ($this->breaksEnabled)
1454        {
1455            $text = preg_replace('/[ ]*\n/', "<br />\n", $text);
1456        }
1457        else
1458        {
1459            $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "<br />\n", $text);
1460            $text = str_replace(" \n", "\n", $text);
1461        }
1462
1463        return $text;
1464    }
1465
1466    #
1467    # Handlers
1468    #
1469
1470    protected function element(array $Element)
1471    {
1472        if ($this->safeMode)
1473        {
1474            $Element = $this->sanitiseElement($Element);
1475        }
1476
1477        $markup = '<'.$Element['name'];
1478
1479        if (isset($Element['attributes']))
1480        {
1481            foreach ($Element['attributes'] as $name => $value)
1482            {
1483                if ($value === null)
1484                {
1485                    continue;
1486                }
1487
1488                $markup .= ' '.$name.'="'.self::escape($value).'"';
1489            }
1490        }
1491
1492        $permitRawHtml = false;
1493
1494        if (isset($Element['text']))
1495        {
1496            $text = $Element['text'];
1497        }
1498        // very strongly consider an alternative if you're writing an
1499        // extension
1500        elseif (isset($Element['rawHtml']))
1501        {
1502            $text = $Element['rawHtml'];
1503            $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode'];
1504            $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode;
1505        }
1506
1507        if (isset($text))
1508        {
1509            $markup .= '>';
1510
1511            if (!isset($Element['nonNestables']))
1512            {
1513                $Element['nonNestables'] = array();
1514            }
1515
1516            if (isset($Element['handler']))
1517            {
1518                $markup .= $this->{$Element['handler']}($text, $Element['nonNestables']);
1519            }
1520            elseif (!$permitRawHtml)
1521            {
1522                $markup .= self::escape($text, true);
1523            }
1524            else
1525            {
1526                $markup .= $text;
1527            }
1528
1529            $markup .= '</'.$Element['name'].'>';
1530        }
1531        else
1532        {
1533            $markup .= ' />';
1534        }
1535
1536        return $markup;
1537    }
1538
1539    protected function elements(array $Elements)
1540    {
1541        $markup = '';
1542
1543        foreach ($Elements as $Element)
1544        {
1545            $markup .= "\n" . $this->element($Element);
1546        }
1547
1548        $markup .= "\n";
1549
1550        return $markup;
1551    }
1552
1553    # ~
1554
1555    protected function li($lines)
1556    {
1557        $markup = $this->lines($lines);
1558
1559        $trimmedMarkup = trim($markup);
1560
1561        if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '<p>')
1562        {
1563            $markup = $trimmedMarkup;
1564            $markup = substr($markup, 3);
1565
1566            $position = strpos($markup, "</p>");
1567
1568            $markup = substr_replace($markup, '', $position, 4);
1569        }
1570
1571        return $markup;
1572    }
1573
1574    #
1575    # Deprecated Methods
1576    #
1577
1578    function parse($text)
1579    {
1580        $markup = $this->text($text);
1581
1582        return $markup;
1583    }
1584
1585    protected function sanitiseElement(array $Element)
1586    {
1587        static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/';
1588        static $safeUrlNameToAtt  = array(
1589            'a'   => 'href',
1590            'img' => 'src',
1591        );
1592
1593        if (isset($safeUrlNameToAtt[$Element['name']]))
1594        {
1595            $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]);
1596        }
1597
1598        if ( ! empty($Element['attributes']))
1599        {
1600            foreach ($Element['attributes'] as $att => $val)
1601            {
1602                # filter out badly parsed attribute
1603                if ( ! preg_match($goodAttribute, $att))
1604                {
1605                    unset($Element['attributes'][$att]);
1606                }
1607                # dump onevent attribute
1608                elseif (self::striAtStart($att, 'on'))
1609                {
1610                    unset($Element['attributes'][$att]);
1611                }
1612            }
1613        }
1614
1615        return $Element;
1616    }
1617
1618    protected function filterUnsafeUrlInAttribute(array $Element, $attribute)
1619    {
1620        foreach ($this->safeLinksWhitelist as $scheme)
1621        {
1622            if (self::striAtStart($Element['attributes'][$attribute], $scheme))
1623            {
1624                return $Element;
1625            }
1626        }
1627
1628        $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]);
1629
1630        return $Element;
1631    }
1632
1633    #
1634    # Static Methods
1635    #
1636
1637    protected static function escape($text, $allowQuotes = false)
1638    {
1639        return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8');
1640    }
1641
1642    protected static function striAtStart($string, $needle)
1643    {
1644        $len = strlen($needle);
1645
1646        if ($len > strlen($string))
1647        {
1648            return false;
1649        }
1650        else
1651        {
1652            return strtolower(substr($string, 0, $len)) === strtolower($needle);
1653        }
1654    }
1655
1656    static function instance($name = 'default')
1657    {
1658        if (isset(self::$instances[$name]))
1659        {
1660            return self::$instances[$name];
1661        }
1662
1663        $instance = new static();
1664
1665        self::$instances[$name] = $instance;
1666
1667        return $instance;
1668    }
1669
1670    private static $instances = array();
1671
1672    #
1673    # Fields
1674    #
1675
1676    protected $DefinitionData;
1677
1678    #
1679    # Read-Only
1680
1681    protected $specialCharacters = array(
1682        '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|',
1683    );
1684
1685    protected $StrongRegex = array(
1686        '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s',
1687        '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us',
1688    );
1689
1690    protected $EmRegex = array(
1691        '*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s',
1692        '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us',
1693    );
1694
1695    protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?';
1696
1697    protected $voidElements = array(
1698        'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source',
1699    );
1700
1701    protected $textLevelElements = array(
1702        'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont',
1703        'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing',
1704        'i', 'rp', 'del', 'code',          'strike', 'marquee',
1705        'q', 'rt', 'ins', 'font',          'strong',
1706        's', 'tt', 'kbd', 'mark',
1707        'u', 'xm', 'sub', 'nobr',
1708                   'sup', 'ruby',
1709                   'var', 'span',
1710                   'wbr', 'time',
1711    );
1712}
1713