1<?php 2 3/** 4 * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/ 5 * For an intro to the Lexer see: 6 * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes 7 * 8 * @author Marcus Baker http://www.lastcraft.com 9 */ 10 11namespace dokuwiki\Parsing\Lexer; 12 13/** 14 * Compounded regular expression. 15 * 16 * Any of the contained patterns could match and when one does it's label is returned. 17 */ 18class ParallelRegex 19{ 20 /** @var string[] patterns to match */ 21 protected $patterns = []; 22 /** @var string[] labels for above patterns */ 23 protected $labels = []; 24 /** @var string the compound regex matching all patterns */ 25 protected $regex; 26 /** @var bool case sensitive matching? */ 27 protected $case; 28 29 /** 30 * Constructor. Starts with no patterns. 31 * 32 * @param boolean $case True for case sensitive, false 33 * for insensitive. 34 */ 35 public function __construct($case) 36 { 37 $this->case = $case; 38 } 39 40 /** 41 * Adds a pattern with an optional label. 42 * 43 * @param mixed $pattern Perl style regex. Must be UTF-8 44 * encoded. If its a string, the (, ) 45 * lose their meaning unless they 46 * form part of a lookahead or 47 * lookbehind assertation. 48 * @param bool|string $label Label of regex to be returned 49 * on a match. Label must be ASCII 50 */ 51 public function addPattern($pattern, $label = true) 52 { 53 $count = count($this->patterns); 54 $this->patterns[$count] = $pattern; 55 $this->labels[$count] = $label; 56 $this->regex = null; 57 } 58 59 /** 60 * Attempts to split the string against all patterns at once 61 * 62 * @param string $subject String to match against. 63 * @param array $split The split result: array containing, pre-match, match & post-match strings 64 * @return boolean True on success. 65 * 66 * @author Christopher Smith <chris@jalakai.co.uk> 67 */ 68 public function split($subject, &$split) 69 { 70 if (count($this->patterns) == 0) { 71 return false; 72 } 73 74 if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) { 75 if (function_exists('preg_last_error')) { 76 $err = preg_last_error(); 77 switch ($err) { 78 case PREG_BACKTRACK_LIMIT_ERROR: 79 msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1); 80 break; 81 case PREG_RECURSION_LIMIT_ERROR: 82 msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1); 83 break; 84 case PREG_BAD_UTF8_ERROR: 85 msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1); 86 break; 87 case PREG_INTERNAL_ERROR: 88 msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1); 89 break; 90 } 91 } 92 93 $split = [$subject, "", ""]; 94 return false; 95 } 96 97 $idx = count($matches) - 2; 98 [$pre, $post] = preg_split($this->patterns[$idx] . $this->getPerlMatchingFlags(), $subject, 2); 99 $split = [$pre, $matches[0], $post]; 100 101 return $this->labels[$idx] ?? true; 102 } 103 104 /** 105 * Compounds the patterns into a single 106 * regular expression separated with the 107 * "or" operator. Caches the regex. 108 * Will automatically escape (, ) and / tokens. 109 * 110 * @return null|string 111 */ 112 protected function getCompoundedRegex() 113 { 114 if ($this->regex == null) { 115 $cnt = count($this->patterns); 116 for ($i = 0; $i < $cnt; $i++) { 117 /* 118 * decompose the input pattern into "(", "(?", ")", 119 * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"... 120 * elements. 121 */ 122 preg_match_all('/\\\\.|' . 123 '\(\?|' . 124 '[()]|' . 125 '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' . 126 '[^[()\\\\]+/', $this->patterns[$i], $elts); 127 128 $pattern = ""; 129 $level = 0; 130 131 foreach ($elts[0] as $elt) { 132 /* 133 * for "(", ")" remember the nesting level, add "\" 134 * only to the non-"(?" ones. 135 */ 136 137 switch ($elt) { 138 case '(': 139 $pattern .= '\('; 140 break; 141 case ')': 142 if ($level > 0) 143 $level--; /* closing (? */ 144 else $pattern .= '\\'; 145 $pattern .= ')'; 146 break; 147 case '(?': 148 $level++; 149 $pattern .= '(?'; 150 break; 151 default: 152 if (str_starts_with($elt, '\\')) 153 $pattern .= $elt; 154 else $pattern .= str_replace('/', '\/', $elt); 155 } 156 } 157 $this->patterns[$i] = "($pattern)"; 158 } 159 $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags(); 160 } 161 return $this->regex; 162 } 163 164 /** 165 * Accessor for perl regex mode flags to use. 166 * @return string Perl regex flags. 167 */ 168 protected function getPerlMatchingFlags() 169 { 170 return ($this->case ? "msS" : "msSi"); 171 } 172} 173