1<?php
2/**
3 * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/
4 * For an intro to the Lexer see:
5 * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes
6 *
7 * @author Marcus Baker http://www.lastcraft.com
8 */
9
10namespace dokuwiki\Parsing\Lexer;
11
12/**
13 * Compounded regular expression.
14 *
15 * Any of the contained patterns could match and when one does it's label is returned.
16 */
17class ParallelRegex
18{
19    /** @var string[] patterns to match */
20    protected $patterns;
21    /** @var string[] labels for above patterns */
22    protected $labels;
23    /** @var string the compound regex matching all patterns */
24    protected $regex;
25    /** @var bool case sensitive matching? */
26    protected $case;
27
28    /**
29     * Constructor. Starts with no patterns.
30     *
31     * @param boolean $case    True for case sensitive, false
32     *                         for insensitive.
33     */
34    public function __construct($case)
35    {
36        $this->case = $case;
37        $this->patterns = array();
38        $this->labels = array();
39        $this->regex = null;
40    }
41
42    /**
43     * Adds a pattern with an optional label.
44     *
45     * @param mixed       $pattern Perl style regex. Must be UTF-8
46     *                             encoded. If its a string, the (, )
47     *                             lose their meaning unless they
48     *                             form part of a lookahead or
49     *                             lookbehind assertation.
50     * @param bool|string $label   Label of regex to be returned
51     *                             on a match. Label must be ASCII
52     */
53    public function addPattern($pattern, $label = true)
54    {
55        $count = count($this->patterns);
56        $this->patterns[$count] = $pattern;
57        $this->labels[$count] = $label;
58        $this->regex = null;
59    }
60
61    /**
62     * Attempts to match all patterns at once against a string.
63     *
64     * @param string $subject      String to match against.
65     * @param string $match        First matched portion of
66     *                             subject.
67     * @return bool|string         False if no match found, label if label exists, true if not
68     */
69    public function apply($subject, &$match)
70    {
71        if (count($this->patterns) == 0) {
72            return false;
73        }
74        if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
75            $match = "";
76            return false;
77        }
78
79        $match = $matches[0];
80        $size = count($matches);
81        // FIXME this could be made faster by storing the labels as keys in a hashmap
82        for ($i = 1; $i < $size; $i++) {
83            if ($matches[$i] && isset($this->labels[$i - 1])) {
84                return $this->labels[$i - 1];
85            }
86        }
87        return true;
88    }
89
90    /**
91     * Attempts to split the string against all patterns at once
92     *
93     * @param string $subject      String to match against.
94     * @param array $split         The split result: array containing, pre-match, match & post-match strings
95     * @return boolean             True on success.
96     *
97     * @author Christopher Smith <chris@jalakai.co.uk>
98     */
99    public function split($subject, &$split)
100    {
101        if (count($this->patterns) == 0) {
102            return false;
103        }
104
105        if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
106            if (function_exists('preg_last_error')) {
107                $err = preg_last_error();
108                switch ($err) {
109                    case PREG_BACKTRACK_LIMIT_ERROR:
110                        msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1);
111                        break;
112                    case PREG_RECURSION_LIMIT_ERROR:
113                        msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1);
114                        break;
115                    case PREG_BAD_UTF8_ERROR:
116                        msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1);
117                        break;
118                    case PREG_INTERNAL_ERROR:
119                        msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1);
120                        break;
121                }
122            }
123
124            $split = array($subject, "", "");
125            return false;
126        }
127
128        $idx = count($matches)-2;
129        list($pre, $post) = preg_split($this->patterns[$idx].$this->getPerlMatchingFlags(), $subject, 2);
130        $split = array($pre, $matches[0], $post);
131
132        return isset($this->labels[$idx]) ? $this->labels[$idx] : true;
133    }
134
135    /**
136     * Compounds the patterns into a single
137     * regular expression separated with the
138     * "or" operator. Caches the regex.
139     * Will automatically escape (, ) and / tokens.
140     *
141     * @return null|string
142     */
143    protected function getCompoundedRegex()
144    {
145        if ($this->regex == null) {
146            $cnt = count($this->patterns);
147            for ($i = 0; $i < $cnt; $i++) {
148                /*
149                 * decompose the input pattern into "(", "(?", ")",
150                 * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"...
151                 * elements.
152                 */
153                preg_match_all('/\\\\.|' .
154                               '\(\?|' .
155                               '[()]|' .
156                               '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' .
157                               '[^[()\\\\]+/', $this->patterns[$i], $elts);
158
159                $pattern = "";
160                $level = 0;
161
162                foreach ($elts[0] as $elt) {
163                    /*
164                     * for "(", ")" remember the nesting level, add "\"
165                     * only to the non-"(?" ones.
166                     */
167
168                    switch ($elt) {
169                        case '(':
170                            $pattern .= '\(';
171                            break;
172                        case ')':
173                            if ($level > 0)
174                                $level--; /* closing (? */
175                            else $pattern .= '\\';
176                            $pattern .= ')';
177                            break;
178                        case '(?':
179                            $level++;
180                            $pattern .= '(?';
181                            break;
182                        default:
183                            if (substr($elt, 0, 1) == '\\')
184                                $pattern .= $elt;
185                            else $pattern .= str_replace('/', '\/', $elt);
186                    }
187                }
188                $this->patterns[$i] = "($pattern)";
189            }
190            $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags();
191        }
192        return $this->regex;
193    }
194
195    /**
196     * Accessor for perl regex mode flags to use.
197     * @return string       Perl regex flags.
198     */
199    protected function getPerlMatchingFlags()
200    {
201        return ($this->case ? "msS" : "msSi");
202    }
203}
204