1<?php 2 3/** 4 * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/ 5 * For an intro to the Lexer see: 6 * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes 7 * 8 * @author Marcus Baker http://www.lastcraft.com 9 */ 10 11namespace dokuwiki\Parsing\Lexer; 12 13/** 14 * Compounded regular expression. 15 * 16 * Any of the contained patterns could match and when one does it's label is returned. 17 */ 18class ParallelRegex 19{ 20 /** @var string[] patterns to match */ 21 protected $patterns = []; 22 /** @var string[] labels for above patterns */ 23 protected $labels = []; 24 /** @var string the compound regex matching all patterns */ 25 protected $regex; 26 /** @var bool case sensitive matching? */ 27 protected $case; 28 29 /** 30 * Constructor. Starts with no patterns. 31 * 32 * @param boolean $case True for case sensitive, false 33 * for insensitive. 34 */ 35 public function __construct($case) 36 { 37 $this->case = $case; 38 } 39 40 /** 41 * Adds a pattern with an optional label. 42 * 43 * @param mixed $pattern Perl style regex. Must be UTF-8 44 * encoded. If its a string, the (, ) 45 * lose their meaning unless they 46 * form part of a lookahead or 47 * lookbehind assertation. 48 * @param bool|string $label Label of regex to be returned 49 * on a match. Label must be ASCII 50 */ 51 public function addPattern($pattern, $label = true) 52 { 53 $count = count($this->patterns); 54 $this->patterns[$count] = $pattern; 55 $this->labels[$count] = $label; 56 $this->regex = null; 57 } 58 59 /** 60 * Attempts to match all patterns at once against a string. 61 * 62 * @param string $subject String to match against. 63 * @param string $match First matched portion of 64 * subject. 65 * @return bool|string False if no match found, label if label exists, true if not 66 */ 67 public function apply($subject, &$match) 68 { 69 if (count($this->patterns) == 0) { 70 return false; 71 } 72 if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) { 73 $match = ""; 74 return false; 75 } 76 77 $match = $matches[0]; 78 $size = count($matches); 79 // FIXME this could be made faster by storing the labels as keys in a hashmap 80 for ($i = 1; $i < $size; $i++) { 81 if ($matches[$i] && isset($this->labels[$i - 1])) { 82 return $this->labels[$i - 1]; 83 } 84 } 85 return true; 86 } 87 88 /** 89 * Attempts to split the string against all patterns at once 90 * 91 * @param string $subject String to match against. 92 * @param array $split The split result: array containing, pre-match, match & post-match strings 93 * @return boolean True on success. 94 * 95 * @author Christopher Smith <chris@jalakai.co.uk> 96 */ 97 public function split($subject, &$split) 98 { 99 if (count($this->patterns) == 0) { 100 return false; 101 } 102 103 if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) { 104 if (function_exists('preg_last_error')) { 105 $err = preg_last_error(); 106 switch ($err) { 107 case PREG_BACKTRACK_LIMIT_ERROR: 108 msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1); 109 break; 110 case PREG_RECURSION_LIMIT_ERROR: 111 msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1); 112 break; 113 case PREG_BAD_UTF8_ERROR: 114 msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1); 115 break; 116 case PREG_INTERNAL_ERROR: 117 msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1); 118 break; 119 } 120 } 121 122 $split = [$subject, "", ""]; 123 return false; 124 } 125 126 $idx = count($matches) - 2; 127 [$pre, $post] = preg_split($this->patterns[$idx] . $this->getPerlMatchingFlags(), $subject, 2); 128 $split = [$pre, $matches[0], $post]; 129 130 return $this->labels[$idx] ?? true; 131 } 132 133 /** 134 * Compounds the patterns into a single 135 * regular expression separated with the 136 * "or" operator. Caches the regex. 137 * Will automatically escape (, ) and / tokens. 138 * 139 * @return null|string 140 */ 141 protected function getCompoundedRegex() 142 { 143 if ($this->regex == null) { 144 $cnt = count($this->patterns); 145 for ($i = 0; $i < $cnt; $i++) { 146 /* 147 * decompose the input pattern into "(", "(?", ")", 148 * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"... 149 * elements. 150 */ 151 preg_match_all('/\\\\.|' . 152 '\(\?|' . 153 '[()]|' . 154 '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' . 155 '[^[()\\\\]+/', $this->patterns[$i], $elts); 156 157 $pattern = ""; 158 $level = 0; 159 160 foreach ($elts[0] as $elt) { 161 /* 162 * for "(", ")" remember the nesting level, add "\" 163 * only to the non-"(?" ones. 164 */ 165 166 switch ($elt) { 167 case '(': 168 $pattern .= '\('; 169 break; 170 case ')': 171 if ($level > 0) 172 $level--; /* closing (? */ 173 else $pattern .= '\\'; 174 $pattern .= ')'; 175 break; 176 case '(?': 177 $level++; 178 $pattern .= '(?'; 179 break; 180 default: 181 if (str_starts_with($elt, '\\')) 182 $pattern .= $elt; 183 else $pattern .= str_replace('/', '\/', $elt); 184 } 185 } 186 $this->patterns[$i] = "($pattern)"; 187 } 188 $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags(); 189 } 190 return $this->regex; 191 } 192 193 /** 194 * Accessor for perl regex mode flags to use. 195 * @return string Perl regex flags. 196 */ 197 protected function getPerlMatchingFlags() 198 { 199 return ($this->case ? "msS" : "msSi"); 200 } 201} 202