xref: /dokuwiki/inc/parser/parser.php (revision 0cecf9d507451346a32ddf45a85b425784fbb0f8)
1<?php
2
3if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/');
4
5require_once DOKU_INC . 'inc/parser/lexer.php';
6require_once DOKU_INC . 'inc/parser/handler.php';
7
8//-------------------------------------------------------------------
9
10/**
11* Sets up the Lexer with modes and points it to the Handler
12* For an intro to the Lexer see: wiki:parser
13*/
14class Doku_Parser {
15
16    var $Handler;
17
18    var $Lexer;
19
20    var $modes = array();
21
22    var $connected = FALSE;
23
24    function addBaseMode() {
25        $this->modes['base'] = & new Doku_Parser_Mode_Base();
26        if ( !$this->Lexer ) {
27            $this->Lexer = & new Doku_Lexer($this->Handler,'base', TRUE);
28        }
29        $this->modes['base']->Lexer = & $this->Lexer;
30    }
31
32    /**
33    * PHP preserves order of associative elements
34    * Mode sequence is important
35    */
36    function addMode($name, & $Mode) {
37        if ( !isset($this->modes['base']) ) {
38            $this->addBaseMode();
39        }
40        $Mode->Lexer = & $this->Lexer;
41        $this->modes[$name] = & $Mode;
42    }
43
44    function connectModes() {
45
46        if ( $this->connected ) {
47            return;
48        }
49
50        foreach ( array_keys($this->modes) as $mode ) {
51
52            // Base isn't connected to anything
53            if ( $mode == 'base' ) {
54                continue;
55            }
56
57            $this->modes[$mode]->preConnect();
58
59            foreach ( array_keys($this->modes) as $cm ) {
60
61                if ( $this->modes[$cm]->accepts($mode) ) {
62                    $this->modes[$mode]->connectTo($cm);
63                }
64
65            }
66
67            $this->modes[$mode]->postConnect();
68        }
69
70        $this->connected = TRUE;
71    }
72
73    function parse($doc) {
74        if ( $this->Lexer ) {
75            $this->connectModes();
76            // Normalize CRs and pad doc
77            $doc = "\n".str_replace("\r\n","\n",$doc)."\n";
78            $this->Lexer->parse($doc);
79            $this->Handler->__finalize();
80            return $this->Handler->calls;
81        } else {
82            return FALSE;
83        }
84    }
85
86}
87
88//-------------------------------------------------------------------
89/**
90* This class and all the subclasses below are
91* used to reduce the effort required to register
92* modes with the Lexer. For performance these
93* could all be eliminated later perhaps, or
94* the Parser could be serialized to a file once
95* all modes are registered
96*/
97class Doku_Parser_Mode {
98
99    var $Lexer;
100
101    var $allowedModes = array();
102
103    // Called before any calls to connectTo
104    function preConnect() {}
105
106    function connectTo($mode) {}
107
108    // Called after all calls to connectTo
109    function postConnect() {}
110
111    function accepts($mode) {
112        return in_array($mode, $this->allowedModes );
113    }
114
115}
116
117//-------------------------------------------------------------------
118class Doku_Parser_Mode_Base extends Doku_Parser_Mode {
119
120    function Doku_Parser_Mode_Base() {
121
122        $this->allowedModes = array_merge (
123                Doku_Parser_BlockContainers(),
124                Doku_Parser_BaseOnly(),
125                Doku_Parser_Paragraphs(),
126                Doku_Parser_Formatting(),
127                Doku_Parser_Substition(),
128                Doku_Parser_Protected(),
129                Doku_Parser_Disabled()
130            );
131    }
132}
133
134//-------------------------------------------------------------------
135class Doku_Parser_Mode_Footnote extends Doku_Parser_Mode {
136
137    function Doku_Parser_Mode_Footnote() {
138
139        $this->allowedModes = array_merge (
140                Doku_Parser_BlockContainers(),
141                Doku_Parser_Formatting(),
142                Doku_Parser_Substition(),
143                Doku_Parser_Protected(),
144                Doku_Parser_Disabled()
145            );
146
147    }
148
149    function connectTo($mode) {
150        $this->Lexer->addEntryPattern(
151            '\x28\x28(?=.*\x29\x29)',$mode,'footnote'
152            );
153    }
154
155    function postConnect() {
156        $this->Lexer->addExitPattern(
157            '\x29\x29','footnote'
158            );
159
160    }
161
162}
163
164//-------------------------------------------------------------------
165class Doku_Parser_Mode_Header extends Doku_Parser_Mode {
166
167    function preConnect() {
168
169        // Header 1 is special case - match 6 or more
170        $this->Lexer->addSpecialPattern(
171                            '[ \t]*={6,}[^\n]+={6,}[ \t]*\n',
172                            'base',
173                            'header'
174                        );
175
176        // For the rest, match exactly
177        for ( $i = 5; $i > 1; $i--) {
178            $this->Lexer->addSpecialPattern(
179                                '[ \t]*={'.$i.'}[^\n]+={'.$i.'}[ \t]*\n',
180                                'base',
181                                'header'
182                            );
183        }
184    }
185
186}
187
188//-------------------------------------------------------------------
189class Doku_Parser_Mode_NoToc extends Doku_Parser_Mode {
190
191    function connectTo($mode) {
192        $this->Lexer->addSpecialPattern('~~NOTOC~~',$mode,'notoc');
193    }
194
195}
196
197//-------------------------------------------------------------------
198class Doku_Parser_Mode_Linebreak extends Doku_Parser_Mode {
199
200    function connectTo($mode) {
201        $this->Lexer->addSpecialPattern('\x5C{2}\s',$mode,'linebreak');
202    }
203}
204
205//-------------------------------------------------------------------
206class Doku_Parser_Mode_Eol extends Doku_Parser_Mode {
207
208    function connectTo($mode) {
209        $badModes = array('listblock','table');
210        if ( in_array($mode, $badModes) ) {
211            return;
212        }
213        $this->Lexer->addSpecialPattern('\n',$mode,'eol');
214    }
215}
216
217//-------------------------------------------------------------------
218class Doku_Parser_Mode_HR extends Doku_Parser_Mode {
219
220    function connectTo($mode) {
221        $this->Lexer->addSpecialPattern('\n[ \t]*-{4,}[ \t]*\n',$mode,'hr');
222    }
223
224}
225
226//-------------------------------------------------------------------
227class Doku_Parser_Mode_Formatting extends Doku_Parser_Mode {
228
229    var $type;
230
231    var $formatting = array (
232        'strong' => array (
233            'entry'=>'\*\*(?=.*\*\*)',
234            'exit'=>'\*\*',
235            ),
236
237        'emphasis'=> array (
238            'entry'=>'//(?=.*//)',
239            'exit'=>'//',
240            ),
241
242        'underline'=> array (
243            'entry'=>'__(?=.*__)',
244            'exit'=>'__',
245            ),
246
247        'monospace'=> array (
248            'entry'=>'\x27\x27(?=.*\x27\x27)',
249            'exit'=>'\x27\x27',
250            ),
251
252        'subscript'=> array (
253            'entry'=>'<sub>(?=.*\x3C/sub\x3E)',
254            'exit'=>'</sub>',
255            ),
256
257        'superscript'=> array (
258            'entry'=>'<sup>(?=.*\x3C/sup\x3E)',
259            'exit'=>'</sup>',
260            ),
261
262        'deleted'=> array (
263            'entry'=>'<del>(?=.*\x3C/del\x3E)',
264            'exit'=>'</del>',
265            ),
266        );
267
268    function Doku_Parser_Mode_Formatting($type) {
269
270        if ( !array_key_exists($type, $this->formatting) ) {
271            trigger_error('Invalid formatting type '.$type, E_USER_WARNING);
272        }
273
274        $this->type = $type;
275
276        $this->allowedModes = array_merge (
277                Doku_Parser_Formatting($type),
278                Doku_Parser_Substition(),
279                Doku_Parser_Disabled()
280            );
281
282    }
283
284    function connectTo($mode) {
285
286        // Can't nest formatting in itself
287        if ( $mode == $this->type ) {
288            return;
289        }
290
291        $this->Lexer->addEntryPattern(
292                $this->formatting[$this->type]['entry'],
293                $mode,
294                $this->type
295            );
296    }
297
298    function postConnect() {
299
300        $this->Lexer->addExitPattern(
301            $this->formatting[$this->type]['exit'],
302            $this->type
303            );
304
305    }
306}
307
308//-------------------------------------------------------------------
309class Doku_Parser_Mode_ListBlock extends Doku_Parser_Mode {
310
311    function Doku_Parser_Mode_ListBlock() {
312
313        $this->allowedModes = array_merge (
314                Doku_Parser_Formatting(),
315                Doku_Parser_Substition(),
316                Doku_Parser_Disabled()
317            );
318        $this->allowedModes[] = 'footnote';
319        $this->allowedModes[] = 'preformatted';
320        $this->allowedModes[] = 'unformatted';
321
322    }
323
324    function connectTo($mode) {
325        $this->Lexer->addEntryPattern('\n {2,}[\-\*]',$mode,'listblock');
326        $this->Lexer->addEntryPattern('\n\t{1,}[\-\*]',$mode,'listblock');
327
328        $this->Lexer->addPattern('\n {2,}[\-\*]','listblock');
329        $this->Lexer->addPattern('\n\t{1,}[\-\*]','listblock');
330
331    }
332
333    function postConnect() {
334        $this->Lexer->addExitPattern('\n','listblock');
335    }
336}
337
338//-------------------------------------------------------------------
339class Doku_Parser_Mode_Table extends Doku_Parser_Mode {
340
341    function Doku_Parser_Mode_Table() {
342
343        $this->allowedModes = array_merge (
344                Doku_Parser_Formatting(),
345                Doku_Parser_Substition(),
346                Doku_Parser_Disabled()
347            );
348        $this->allowedModes[] = 'footnote';
349        $this->allowedModes[] = 'preformatted';
350        $this->allowedModes[] = 'unformatted';
351    }
352
353    function connectTo($mode) {
354        $this->Lexer->addEntryPattern('\n\^',$mode,'table');
355        $this->Lexer->addEntryPattern('\n\|',$mode,'table');
356    }
357
358    function postConnect() {
359        $this->Lexer->addPattern('\n\^','table');
360        $this->Lexer->addPattern('\n\|','table');
361        $this->Lexer->addPattern(' {2,}','table');
362        $this->Lexer->addPattern('\^','table');
363        $this->Lexer->addPattern('\|','table');
364        $this->Lexer->addExitPattern('\n','table');
365    }
366}
367
368//-------------------------------------------------------------------
369class Doku_Parser_Mode_Unformatted extends Doku_Parser_Mode {
370
371    function connectTo($mode) {
372        $this->Lexer->addEntryPattern('<nowiki>(?=.*\x3C/nowiki\x3E)',$mode,'unformatted');
373        $this->Lexer->addEntryPattern('%%(?=.*%%)',$mode,'unformattedalt');
374    }
375
376    function postConnect() {
377        $this->Lexer->addExitPattern('</nowiki>','unformatted');
378        $this->Lexer->addExitPattern('%%','unformattedalt');
379        $this->Lexer->mapHandler('unformattedalt','unformatted');
380    }
381
382}
383
384//-------------------------------------------------------------------
385class Doku_Parser_Mode_PHP extends Doku_Parser_Mode {
386
387    function connectTo($mode) {
388        $this->Lexer->addEntryPattern('<php>(?=.*\x3C/php\x3E)',$mode,'php');
389    }
390
391    function postConnect() {
392        $this->Lexer->addExitPattern('</php>','php');
393    }
394
395}
396
397//-------------------------------------------------------------------
398class Doku_Parser_Mode_HTML extends Doku_Parser_Mode {
399
400    function connectTo($mode) {
401        $this->Lexer->addEntryPattern('<html>(?=.*\x3C/html\x3E)',$mode,'html');
402    }
403
404    function postConnect() {
405        $this->Lexer->addExitPattern('</html>','html');
406    }
407
408}
409
410//-------------------------------------------------------------------
411class Doku_Parser_Mode_Preformatted extends Doku_Parser_Mode {
412
413    function connectTo($mode) {
414        // Has hard coded awareness of lists...
415        $this->Lexer->addEntryPattern('\n  (?![\*\-])',$mode,'preformatted');
416        $this->Lexer->addEntryPattern('\n\t(?![\*\-])',$mode,'preformatted');
417
418        // How to effect a sub pattern with the Lexer!
419        $this->Lexer->addPattern('\n  ','preformatted');
420        $this->Lexer->addPattern('\n\t','preformatted');
421
422    }
423
424    function postConnect() {
425        $this->Lexer->addExitPattern('\n','preformatted');
426    }
427
428}
429
430//-------------------------------------------------------------------
431class Doku_Parser_Mode_Code extends Doku_Parser_Mode {
432
433    function connectTo($mode) {
434        $this->Lexer->addEntryPattern('<code(?=.*\x3C/code\x3E)',$mode,'code');
435    }
436
437    function postConnect() {
438        $this->Lexer->addExitPattern('</code>','code');
439    }
440
441}
442
443//-------------------------------------------------------------------
444class Doku_Parser_Mode_File extends Doku_Parser_Mode {
445
446    function connectTo($mode) {
447        $this->Lexer->addEntryPattern('<file>(?=.*\x3C/file\x3E)',$mode,'file');
448    }
449
450    function postConnect() {
451        $this->Lexer->addExitPattern('</file>','file');
452    }
453
454}
455
456//-------------------------------------------------------------------
457class Doku_Parser_Mode_Quote extends Doku_Parser_Mode {
458
459    function Doku_Parser_Mode_Quote() {
460
461        $this->allowedModes = array_merge (
462                Doku_Parser_Formatting(),
463                Doku_Parser_Substition(),
464                Doku_Parser_Disabled()
465            );
466            $this->allowedModes[] = 'footnote';
467            $this->allowedModes[] = 'preformatted';
468            $this->allowedModes[] = 'unformatted';
469    }
470
471    function connectTo($mode) {
472        $this->Lexer->addEntryPattern('\n>{1,}',$mode,'quote');
473    }
474
475    function postConnect() {
476        $this->Lexer->addPattern('\n>{1,}','quote');
477        $this->Lexer->addExitPattern('\n','quote');
478    }
479
480}
481
482//-------------------------------------------------------------------
483class Doku_Parser_Mode_Acronym extends Doku_Parser_Mode {
484    // A list
485    var $acronyms = array();
486    var $pattern = '';
487
488    function Doku_Parser_Mode_Acronym($acronyms) {
489        $this->acronyms = $acronyms;
490    }
491
492    function preConnect() {
493        $sep = '';
494        foreach ( $this->acronyms as $acronym ) {
495            $this->pattern .= $sep.'(?<=\b)'.Doku_Lexer_Escape($acronym).'(?=\b)';
496            $sep = '|';
497        }
498    }
499
500    function connectTo($mode) {
501        if ( strlen($this->pattern) > 0 ) {
502            $this->Lexer->addSpecialPattern($this->pattern,$mode,'acronym');
503        }
504    }
505
506}
507
508//-------------------------------------------------------------------
509class Doku_Parser_Mode_Smiley extends Doku_Parser_Mode {
510    // A list
511    var $smileys = array();
512    var $pattern = '';
513
514    function Doku_Parser_Mode_Smiley($smileys) {
515        $this->smileys = $smileys;
516    }
517
518    function preConnect() {
519        $sep = '';
520        foreach ( $this->smileys as $smiley ) {
521            $this->pattern .= $sep.Doku_Lexer_Escape($smiley);
522            $sep = '|';
523        }
524    }
525
526    function connectTo($mode) {
527        if ( strlen($this->pattern) > 0 ) {
528            $this->Lexer->addSpecialPattern($this->pattern,$mode,'smiley');
529        }
530    }
531
532}
533
534//-------------------------------------------------------------------
535class Doku_Parser_Mode_Wordblock extends Doku_Parser_Mode {
536    // A list
537    var $badwords = array();
538    var $pattern = '';
539
540    function Doku_Parser_Mode_Wordblock($badwords) {
541        $this->badwords = $badwords;
542    }
543
544    function preConnect() {
545
546        if ( count($this->badwords) == 0 ) {
547            return;
548        }
549
550        $sep = '';
551        foreach ( $this->badwords as $badword ) {
552            $this->pattern .= $sep.'(?<=\b)(?i)'.Doku_Lexer_Escape($badword).'(?-i)(?=\b)';
553            $sep = '|';
554        }
555
556    }
557
558    function connectTo($mode) {
559        if ( strlen($this->pattern) > 0 ) {
560            $this->Lexer->addSpecialPattern($this->pattern,$mode,'wordblock');
561        }
562    }
563
564}
565
566//-------------------------------------------------------------------
567/**
568* @TODO Quotes and 640x480 are note supported - just straight replacements here
569*/
570class Doku_Parser_Mode_Entity extends Doku_Parser_Mode {
571    // A list
572    var $entities = array();
573    var $pattern = '';
574
575    function Doku_Parser_Mode_Entity($entities) {
576        $this->entities = $entities;
577    }
578
579    function preConnect() {
580        $sep = '';
581        foreach ( $this->entities as $entity ) {
582            $this->pattern .= $sep.Doku_Lexer_Escape($entity);
583            $sep = '|';
584        }
585    }
586
587    function connectTo($mode) {
588        if ( strlen($this->pattern) > 0 ) {
589            $this->Lexer->addSpecialPattern($this->pattern,$mode,'entity');
590        }
591    }
592
593}
594
595//-------------------------------------------------------------------
596// Implements the 640x480 replacement
597class Doku_Parser_Mode_MultiplyEntity extends Doku_Parser_Mode {
598
599    function connectTo($mode) {
600
601        $this->Lexer->addSpecialPattern(
602                    '(?<=\b)\d+[x|X]\d+(?=\b)',$mode,'multiplyentity'
603                );
604
605    }
606
607}
608
609//-------------------------------------------------------------------
610class Doku_Parser_Mode_Quotes extends Doku_Parser_Mode {
611
612    function connectTo($mode) {
613
614        $this->Lexer->addSpecialPattern(
615                    '(?<=\s)\'(?=\S)',$mode,'singlequoteopening'
616                );
617        $this->Lexer->addSpecialPattern(
618                    '(?<=\S)\'',$mode,'singlequoteclosing'
619                );
620        $this->Lexer->addSpecialPattern(
621                    '(?<=\s)"(?=\S)',$mode,'doublequoteopening'
622                );
623        $this->Lexer->addSpecialPattern(
624                    '(?<=\S)"',$mode,'doublequoteclosing'
625                );
626
627    }
628
629}
630
631//-------------------------------------------------------------------
632class Doku_Parser_Mode_CamelCaseLink extends Doku_Parser_Mode {
633
634    function connectTo($mode) {
635        $this->Lexer->addSpecialPattern(
636                '\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b',$mode,'camelcaselink'
637            );
638    }
639
640}
641
642//-------------------------------------------------------------------
643class Doku_Parser_Mode_InternalLink extends Doku_Parser_Mode {
644
645    function connectTo($mode) {
646        // Word boundaries?
647        $this->Lexer->addSpecialPattern("\[\[[^\]]+?\]\]",$mode,'internallink');
648    }
649
650}
651
652//-------------------------------------------------------------------
653class Doku_Parser_Mode_Media extends Doku_Parser_Mode {
654
655    function connectTo($mode) {
656        // Word boundaries?
657        $this->Lexer->addSpecialPattern("\{\{[^\}]+\}\}",$mode,'media');
658    }
659
660}
661
662//-------------------------------------------------------------------
663class Doku_Parser_Mode_ExternalLink extends Doku_Parser_Mode {
664    var $schemes = array('http','https','telnet','gopher','wais','ftp','ed2k','irc');
665    var $patterns = array();
666
667    function preConnect() {
668
669        $ltrs = '\w';
670        $gunk = '/\#~:.?+=&%@!\-';
671        $punc = '.:?\-;,';
672        $host = $ltrs.$punc;
673        $any  = $ltrs.$gunk.$punc;
674
675        foreach ( $this->schemes as $scheme ) {
676            $this->patterns[] = '\b(?i)'.$scheme.'(?-i)://['.$any.']+?['.$punc.']*[^'.$any.']';
677        }
678
679        $this->patterns[] = '\b(?i)www?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?['.$punc.']*[^'.$any.']';
680        $this->patterns[] = '\b(?i)ftp?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?['.$punc.']*[^'.$any.']';
681
682    }
683
684    function connectTo($mode) {
685        foreach ( $this->patterns as $pattern ) {
686            $this->Lexer->addSpecialPattern($pattern,$mode,'externallink');
687        }
688    }
689
690}
691
692//-------------------------------------------------------------------
693class Doku_Parser_Mode_FileLink extends Doku_Parser_Mode {
694
695    var $pattern;
696
697    function preConnect() {
698
699        $ltrs = '\w';
700        $gunk = '/\#~:.?+=&%@!\-';
701        $punc = '.:?\-;,';
702        $host = $ltrs.$punc;
703        $any  = $ltrs.$gunk.$punc;
704
705        $this->pattern = '\b(?i)file(?-i)://['.$any.']+?['.
706            $punc.']*[^'.$any.']';
707    }
708
709    function connectTo($mode) {
710        $this->Lexer->addSpecialPattern(
711            $this->pattern,$mode,'filelink');
712    }
713
714
715}
716
717//-------------------------------------------------------------------
718class Doku_Parser_Mode_WindowsShareLink extends Doku_Parser_Mode {
719
720    var $pattern;
721
722    function preConnect() {
723
724        $ltrs = '\w';
725        $gunk = '/\#~:.?+=&%@!\-';
726        $punc = '.:?\-;,';
727        $host = $ltrs.$punc;
728        $any  = $ltrs.$gunk.$punc;
729
730        $this->pattern = "[$gunk$punc\s]\\\\\\\\[$host]+?\\\\[$any]+?[$punc]*[^$any]";
731    }
732
733    function connectTo($mode) {
734        $this->Lexer->addSpecialPattern(
735            $this->pattern,$mode,'windowssharelink');
736    }
737
738
739}
740
741//-------------------------------------------------------------------
742class Doku_Parser_Mode_Email extends Doku_Parser_Mode {
743
744    function connectTo($mode) {
745    //<([\w0-9\-_.]+?)@([\w\-]+\.([\w\-\.]+\.)*[\w]+)>
746        $this->Lexer->addSpecialPattern("<[\w0-9\-_.]+?@[\w\-]+\.[\w\-\.]+\.*[\w]+>",$mode,'email');
747    }
748
749}
750
751//-------------------------------------------------------------------
752// Help fns to keep mode lists - used to make it easier to populate
753// the list of modes another mode accepts
754
755// Can contain many other modes
756// E.g. a footnote can containing formatting etc.
757function Doku_Parser_BlockContainers() {
758    $modes = array(
759        'footnote', 'listblock', 'table','quote',
760        // hr breaks the principle but HRs should not be used in tables / lists
761        // so put it here
762        'hr',
763    );
764    return $modes;
765}
766
767// Used to mark paragraph boundaries
768function Doku_Parser_Paragraphs() {
769    $modes = array(
770        'eol'
771    );
772    return $modes;
773}
774
775// Can only be used by the base mode
776function Doku_Parser_BaseOnly() {
777    $modes = array(
778        'header'
779    );
780    return $modes;
781}
782
783// "Styling" modes that format text.
784function Doku_Parser_Formatting($remove = '') {
785    $modes = array(
786        'strong', 'emphasis', 'underline', 'monospace',
787        'subscript', 'superscript', 'deleted',
788        );
789    $key = array_search($remove, $modes);
790    if ( is_int($key) ) {
791        unset($modes[$key]);
792    }
793
794    return $modes;
795}
796
797// Modes where the token is simply replaced - contain no
798// other modes
799function Doku_Parser_Substition() {
800    $modes = array(
801        'acronym','smiley','wordblock','entity','camelcaselink',
802        'internallink','media','externallink','linebreak','email',
803        'windowssharelink','filelink','notoc','multiplyentity',
804        'quotes',
805
806    );
807    return $modes;
808}
809
810// Modes which have a start and end token but inside which
811// no other modes should be applied
812function Doku_Parser_Protected() {
813    $modes = array(
814        'preformatted','code','file',
815        'php','html','quote',
816    );
817    return $modes;
818}
819
820// Disable wiki markup inside this mode
821function Doku_Parser_Disabled() {
822    $modes = array(
823        'unformatted'
824    );
825    return $modes;
826}
827
828?>
829