1<?php 2/** 3 * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/ 4 * For an intro to the Lexer see: 5 * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes 6 * 7 * @author Marcus Baker http://www.lastcraft.com 8 */ 9 10namespace dokuwiki\Parsing\Lexer; 11 12/** 13 * Compounded regular expression. 14 * 15 * Any of the contained patterns could match and when one does it's label is returned. 16 */ 17class ParallelRegex 18{ 19 /** @var string[] patterns to match */ 20 protected $patterns = []; 21 /** @var string[] labels for above patterns */ 22 protected $labels = []; 23 /** @var string the compound regex matching all patterns */ 24 protected $regex; 25 /** @var bool case sensitive matching? */ 26 protected $case; 27 28 /** 29 * Constructor. Starts with no patterns. 30 * 31 * @param boolean $case True for case sensitive, false 32 * for insensitive. 33 */ 34 public function __construct($case) 35 { 36 $this->case = $case; 37 } 38 39 /** 40 * Adds a pattern with an optional label. 41 * 42 * @param mixed $pattern Perl style regex. Must be UTF-8 43 * encoded. If its a string, the (, ) 44 * lose their meaning unless they 45 * form part of a lookahead or 46 * lookbehind assertation. 47 * @param bool|string $label Label of regex to be returned 48 * on a match. Label must be ASCII 49 */ 50 public function addPattern($pattern, $label = true) 51 { 52 $count = count($this->patterns); 53 $this->patterns[$count] = $pattern; 54 $this->labels[$count] = $label; 55 $this->regex = null; 56 } 57 58 /** 59 * Attempts to match all patterns at once against a string. 60 * 61 * @param string $subject String to match against. 62 * @param string $match First matched portion of 63 * subject. 64 * @return bool|string False if no match found, label if label exists, true if not 65 */ 66 public function apply($subject, &$match) 67 { 68 if (count($this->patterns) == 0) { 69 return false; 70 } 71 if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) { 72 $match = ""; 73 return false; 74 } 75 76 $match = $matches[0]; 77 $size = count($matches); 78 // FIXME this could be made faster by storing the labels as keys in a hashmap 79 for ($i = 1; $i < $size; $i++) { 80 if ($matches[$i] && isset($this->labels[$i - 1])) { 81 return $this->labels[$i - 1]; 82 } 83 } 84 return true; 85 } 86 87 /** 88 * Attempts to split the string against all patterns at once 89 * 90 * @param string $subject String to match against. 91 * @param array $split The split result: array containing, pre-match, match & post-match strings 92 * @return boolean True on success. 93 * 94 * @author Christopher Smith <chris@jalakai.co.uk> 95 */ 96 public function split($subject, &$split) 97 { 98 if (count($this->patterns) == 0) { 99 return false; 100 } 101 102 if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) { 103 if (function_exists('preg_last_error')) { 104 $err = preg_last_error(); 105 switch ($err) { 106 case PREG_BACKTRACK_LIMIT_ERROR: 107 msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1); 108 break; 109 case PREG_RECURSION_LIMIT_ERROR: 110 msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1); 111 break; 112 case PREG_BAD_UTF8_ERROR: 113 msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1); 114 break; 115 case PREG_INTERNAL_ERROR: 116 msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1); 117 break; 118 } 119 } 120 121 $split = [$subject, "", ""]; 122 return false; 123 } 124 125 $idx = count($matches)-2; 126 [$pre, $post] = preg_split($this->patterns[$idx].$this->getPerlMatchingFlags(), $subject, 2); 127 $split = [$pre, $matches[0], $post]; 128 129 return $this->labels[$idx] ?? true; 130 } 131 132 /** 133 * Compounds the patterns into a single 134 * regular expression separated with the 135 * "or" operator. Caches the regex. 136 * Will automatically escape (, ) and / tokens. 137 * 138 * @return null|string 139 */ 140 protected function getCompoundedRegex() 141 { 142 if ($this->regex == null) { 143 $cnt = count($this->patterns); 144 for ($i = 0; $i < $cnt; $i++) { 145 /* 146 * decompose the input pattern into "(", "(?", ")", 147 * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"... 148 * elements. 149 */ 150 preg_match_all('/\\\\.|' . 151 '\(\?|' . 152 '[()]|' . 153 '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' . 154 '[^[()\\\\]+/', $this->patterns[$i], $elts); 155 156 $pattern = ""; 157 $level = 0; 158 159 foreach ($elts[0] as $elt) { 160 /* 161 * for "(", ")" remember the nesting level, add "\" 162 * only to the non-"(?" ones. 163 */ 164 165 switch ($elt) { 166 case '(': 167 $pattern .= '\('; 168 break; 169 case ')': 170 if ($level > 0) 171 $level--; /* closing (? */ 172 else $pattern .= '\\'; 173 $pattern .= ')'; 174 break; 175 case '(?': 176 $level++; 177 $pattern .= '(?'; 178 break; 179 default: 180 if (substr($elt, 0, 1) == '\\') 181 $pattern .= $elt; 182 else $pattern .= str_replace('/', '\/', $elt); 183 } 184 } 185 $this->patterns[$i] = "($pattern)"; 186 } 187 $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags(); 188 } 189 return $this->regex; 190 } 191 192 /** 193 * Accessor for perl regex mode flags to use. 194 * @return string Perl regex flags. 195 */ 196 protected function getPerlMatchingFlags() 197 { 198 return ($this->case ? "msS" : "msSi"); 199 } 200} 201