xref: /dokuwiki/inc/parser/parser.php (revision 47f73ecf3ecd5d2a8e07b52f22a70ce2329d24bb)
10cecf9d5Sandi<?php
236dc94bbSAndreas Gohr
336dc94bbSAndreas Gohruse dokuwiki\ParserMode\Base;
436dc94bbSAndreas Gohruse dokuwiki\ParserMode\ModeInterface;
536dc94bbSAndreas Gohr
6ee20e7d1Sandi/**
7ee20e7d1Sandi * Define various types of modes used by the parser - they are used to
8ee20e7d1Sandi * populate the list of modes another mode accepts
9ee20e7d1Sandi */
10ee20e7d1Sandiglobal $PARSER_MODES;
11ee20e7d1Sandi$PARSER_MODES = array(
12ee20e7d1Sandi    // containers are complex modes that can contain many other modes
13ee20e7d1Sandi    // hr breaks the principle but they shouldn't be used in tables / lists
14ee20e7d1Sandi    // so they are put here
15ee20e7d1Sandi    'container'    => array('listblock','table','quote','hr'),
16ee20e7d1Sandi
17ee20e7d1Sandi    // some mode are allowed inside the base mode only
18ee20e7d1Sandi    'baseonly'     => array('header'),
19ee20e7d1Sandi
20ee20e7d1Sandi    // modes for styling text -- footnote behaves similar to styling
21ee20e7d1Sandi    'formatting'   => array('strong', 'emphasis', 'underline', 'monospace',
22ee20e7d1Sandi                            'subscript', 'superscript', 'deleted', 'footnote'),
23ee20e7d1Sandi
24ee20e7d1Sandi    // modes where the token is simply replaced - they can not contain any
25ee20e7d1Sandi    // other modes
26ee20e7d1Sandi    'substition'   => array('acronym','smiley','wordblock','entity',
27ee20e7d1Sandi                            'camelcaselink', 'internallink','media',
28ee20e7d1Sandi                            'externallink','linebreak','emaillink',
29ee20e7d1Sandi                            'windowssharelink','filelink','notoc',
30ee20e7d1Sandi                            'nocache','multiplyentity','quotes','rss'),
31ee20e7d1Sandi
32ee20e7d1Sandi    // modes which have a start and end token but inside which
33ee20e7d1Sandi    // no other modes should be applied
3407f89c3cSAnika Henke    'protected'    => array('preformatted','code','file','php','html','htmlblock','phpblock'),
35ee20e7d1Sandi
36ee20e7d1Sandi    // inside this mode no wiki markup should be applied but lineendings
37ee20e7d1Sandi    // and whitespace isn't preserved
38ee20e7d1Sandi    'disabled'     => array('unformatted'),
39ee20e7d1Sandi
40ee20e7d1Sandi    // used to mark paragraph boundaries
41ee20e7d1Sandi    'paragraphs'   => array('eol')
42ee20e7d1Sandi);
43ee20e7d1Sandi
440cecf9d5Sandi/**
450cecf9d5Sandi * Sets up the Lexer with modes and points it to the Handler
460cecf9d5Sandi * For an intro to the Lexer see: wiki:parser
470cecf9d5Sandi */
480cecf9d5Sandiclass Doku_Parser {
490cecf9d5Sandi
50*47f73ecfSAndreas Gohr    /** @var Doku_Handler */
51*47f73ecfSAndreas Gohr    protected $handler;
52*47f73ecfSAndreas Gohr
53*47f73ecfSAndreas Gohr    /** @var Doku_Lexer $lexer */
54*47f73ecfSAndreas Gohr    protected $lexer;
55*47f73ecfSAndreas Gohr
56*47f73ecfSAndreas Gohr    /** @var ModeInterface[] $modes */
57*47f73ecfSAndreas Gohr    protected $modes = array();
58*47f73ecfSAndreas Gohr
59*47f73ecfSAndreas Gohr    /** @var bool mode connections may only be set up once */
60*47f73ecfSAndreas Gohr    protected $connected = false;
610cecf9d5Sandi
62e3ab6fc5SMichael Hamann    /**
63*47f73ecfSAndreas Gohr     * Doku_Parser constructor.
64*47f73ecfSAndreas Gohr     *
65*47f73ecfSAndreas Gohr     * @param Doku_Handler $handler
66e3ab6fc5SMichael Hamann     */
67*47f73ecfSAndreas Gohr    public function __construct(Doku_Handler $handler) {
68*47f73ecfSAndreas Gohr        $this->handler = $handler;
69*47f73ecfSAndreas Gohr    }
700cecf9d5Sandi
71276820f7SScrutinizer Auto-Fixer    /**
72*47f73ecfSAndreas Gohr     * Adds the base mode and initialized the lexer
73*47f73ecfSAndreas Gohr     *
7436dc94bbSAndreas Gohr     * @param Base $BaseMode
75276820f7SScrutinizer Auto-Fixer     */
76*47f73ecfSAndreas Gohr    protected function addBaseMode($BaseMode) {
7745e8987eSChristopher Smith        $this->modes['base'] = $BaseMode;
78*47f73ecfSAndreas Gohr        if ( !$this->lexer ) {
79*47f73ecfSAndreas Gohr            $this->lexer = new Doku_Lexer($this->handler, 'base', true);
800cecf9d5Sandi        }
81*47f73ecfSAndreas Gohr        $this->modes['base']->Lexer = $this->lexer;
820cecf9d5Sandi    }
830cecf9d5Sandi
840cecf9d5Sandi    /**
85*47f73ecfSAndreas Gohr     * Add a new syntax element (mode) to the parser
86*47f73ecfSAndreas Gohr     *
870cecf9d5Sandi     * PHP preserves order of associative elements
880cecf9d5Sandi     * Mode sequence is important
89f50a239bSTakamura     *
90f50a239bSTakamura     * @param string $name
9136dc94bbSAndreas Gohr     * @param ModeInterface $Mode
920cecf9d5Sandi     */
93*47f73ecfSAndreas Gohr    public function addMode($name, ModeInterface $Mode) {
940cecf9d5Sandi        if ( !isset($this->modes['base']) ) {
9536dc94bbSAndreas Gohr            $this->addBaseMode(new Base());
960cecf9d5Sandi        }
97*47f73ecfSAndreas Gohr        $Mode->Lexer = $this->lexer; // FIXME should be done by setter
9845e8987eSChristopher Smith        $this->modes[$name] = $Mode;
990cecf9d5Sandi    }
1000cecf9d5Sandi
101*47f73ecfSAndreas Gohr    /**
102*47f73ecfSAndreas Gohr     * Connect all modes with each other
103*47f73ecfSAndreas Gohr     *
104*47f73ecfSAndreas Gohr     * This is the last step before actually parsing.
105*47f73ecfSAndreas Gohr     */
106*47f73ecfSAndreas Gohr    protected function connectModes() {
1070cecf9d5Sandi
1080cecf9d5Sandi        if ( $this->connected ) {
1090cecf9d5Sandi            return;
1100cecf9d5Sandi        }
1110cecf9d5Sandi
1120cecf9d5Sandi        foreach ( array_keys($this->modes) as $mode ) {
1130cecf9d5Sandi            // Base isn't connected to anything
1140cecf9d5Sandi            if ( $mode == 'base' ) {
1150cecf9d5Sandi                continue;
1160cecf9d5Sandi            }
1170cecf9d5Sandi            $this->modes[$mode]->preConnect();
1180cecf9d5Sandi
1190cecf9d5Sandi            foreach ( array_keys($this->modes) as $cm ) {
1200cecf9d5Sandi
1210cecf9d5Sandi                if ( $this->modes[$cm]->accepts($mode) ) {
1220cecf9d5Sandi                    $this->modes[$mode]->connectTo($cm);
1230cecf9d5Sandi                }
1240cecf9d5Sandi
1250cecf9d5Sandi            }
1260cecf9d5Sandi
1270cecf9d5Sandi            $this->modes[$mode]->postConnect();
1280cecf9d5Sandi        }
1290cecf9d5Sandi
13044881bd0Shenning.noren        $this->connected = true;
1310cecf9d5Sandi    }
1320cecf9d5Sandi
133*47f73ecfSAndreas Gohr    /**
134*47f73ecfSAndreas Gohr     * Parses wiki syntax to instructions
135*47f73ecfSAndreas Gohr     *
136*47f73ecfSAndreas Gohr     * @param string $doc the wiki syntax text
137*47f73ecfSAndreas Gohr     * @return array instructions
138*47f73ecfSAndreas Gohr     */
139*47f73ecfSAndreas Gohr    public function parse($doc) {
1400cecf9d5Sandi        $this->connectModes();
1410cecf9d5Sandi        // Normalize CRs and pad doc
1420cecf9d5Sandi        $doc = "\n".str_replace("\r\n","\n",$doc)."\n";
143*47f73ecfSAndreas Gohr        $this->lexer->parse($doc);
144*47f73ecfSAndreas Gohr        $this->handler->_finalize();
145*47f73ecfSAndreas Gohr        return $this->handler->calls;
1460cecf9d5Sandi    }
1470cecf9d5Sandi
1480cecf9d5Sandi}
149