1be906b56SAndreas Gohr<?php 2be906b56SAndreas Gohr/** 3be906b56SAndreas Gohr * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/ 4be906b56SAndreas Gohr * For an intro to the Lexer see: 5be906b56SAndreas Gohr * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes 6be906b56SAndreas Gohr * 7be906b56SAndreas Gohr * @author Marcus Baker http://www.lastcraft.com 8be906b56SAndreas Gohr */ 9be906b56SAndreas Gohr 10be906b56SAndreas Gohrnamespace dokuwiki\Parsing\Lexer; 11be906b56SAndreas Gohr 12be906b56SAndreas Gohr/** 13be906b56SAndreas Gohr * Compounded regular expression. 14be906b56SAndreas Gohr * 15be906b56SAndreas Gohr * Any of the contained patterns could match and when one does it's label is returned. 16be906b56SAndreas Gohr */ 17be906b56SAndreas Gohrclass ParallelRegex 18be906b56SAndreas Gohr{ 19be906b56SAndreas Gohr /** @var string[] patterns to match */ 20be906b56SAndreas Gohr protected $patterns; 21be906b56SAndreas Gohr /** @var string[] labels for above patterns */ 22be906b56SAndreas Gohr protected $labels; 23be906b56SAndreas Gohr /** @var string the compound regex matching all patterns */ 24be906b56SAndreas Gohr protected $regex; 25be906b56SAndreas Gohr /** @var bool case sensitive matching? */ 26be906b56SAndreas Gohr protected $case; 27be906b56SAndreas Gohr 28be906b56SAndreas Gohr /** 29be906b56SAndreas Gohr * Constructor. Starts with no patterns. 30be906b56SAndreas Gohr * 31be906b56SAndreas Gohr * @param boolean $case True for case sensitive, false 32be906b56SAndreas Gohr * for insensitive. 33be906b56SAndreas Gohr */ 34be906b56SAndreas Gohr public function __construct($case) 35be906b56SAndreas Gohr { 36be906b56SAndreas Gohr $this->case = $case; 37be906b56SAndreas Gohr $this->patterns = array(); 38be906b56SAndreas Gohr $this->labels = array(); 39be906b56SAndreas Gohr $this->regex = null; 40be906b56SAndreas Gohr } 41be906b56SAndreas Gohr 42be906b56SAndreas Gohr /** 43be906b56SAndreas Gohr * Adds a pattern with an optional label. 44be906b56SAndreas Gohr * 45be906b56SAndreas Gohr * @param mixed $pattern Perl style regex. Must be UTF-8 46be906b56SAndreas Gohr * encoded. If its a string, the (, ) 47be906b56SAndreas Gohr * lose their meaning unless they 48be906b56SAndreas Gohr * form part of a lookahead or 49be906b56SAndreas Gohr * lookbehind assertation. 50be906b56SAndreas Gohr * @param bool|string $label Label of regex to be returned 51be906b56SAndreas Gohr * on a match. Label must be ASCII 52be906b56SAndreas Gohr */ 53be906b56SAndreas Gohr public function addPattern($pattern, $label = true) 54be906b56SAndreas Gohr { 55be906b56SAndreas Gohr $count = count($this->patterns); 56be906b56SAndreas Gohr $this->patterns[$count] = $pattern; 57be906b56SAndreas Gohr $this->labels[$count] = $label; 58be906b56SAndreas Gohr $this->regex = null; 59be906b56SAndreas Gohr } 60be906b56SAndreas Gohr 61be906b56SAndreas Gohr /** 62be906b56SAndreas Gohr * Attempts to match all patterns at once against a string. 63be906b56SAndreas Gohr * 64be906b56SAndreas Gohr * @param string $subject String to match against. 65be906b56SAndreas Gohr * @param string $match First matched portion of 66be906b56SAndreas Gohr * subject. 67be906b56SAndreas Gohr * @return bool|string False if no match found, label if label exists, true if not 68be906b56SAndreas Gohr */ 69*fe2e97f6SAndreas Gohr public function apply($subject, &$match) 70be906b56SAndreas Gohr { 71be906b56SAndreas Gohr if (count($this->patterns) == 0) { 72be906b56SAndreas Gohr return false; 73be906b56SAndreas Gohr } 74be906b56SAndreas Gohr if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) { 75be906b56SAndreas Gohr $match = ""; 76be906b56SAndreas Gohr return false; 77be906b56SAndreas Gohr } 78be906b56SAndreas Gohr 79be906b56SAndreas Gohr $match = $matches[0]; 80be906b56SAndreas Gohr $size = count($matches); 81be906b56SAndreas Gohr // FIXME this could be made faster by storing the labels as keys in a hashmap 82be906b56SAndreas Gohr for ($i = 1; $i < $size; $i++) { 83be906b56SAndreas Gohr if ($matches[$i] && isset($this->labels[$i - 1])) { 84be906b56SAndreas Gohr return $this->labels[$i - 1]; 85be906b56SAndreas Gohr } 86be906b56SAndreas Gohr } 87be906b56SAndreas Gohr return true; 88be906b56SAndreas Gohr } 89be906b56SAndreas Gohr 90be906b56SAndreas Gohr /** 91be906b56SAndreas Gohr * Attempts to split the string against all patterns at once 92be906b56SAndreas Gohr * 93be906b56SAndreas Gohr * @param string $subject String to match against. 94be906b56SAndreas Gohr * @param array $split The split result: array containing, pre-match, match & post-match strings 95be906b56SAndreas Gohr * @return boolean True on success. 96be906b56SAndreas Gohr * 97be906b56SAndreas Gohr * @author Christopher Smith <chris@jalakai.co.uk> 98be906b56SAndreas Gohr */ 99be906b56SAndreas Gohr public function split($subject, &$split) 100be906b56SAndreas Gohr { 101be906b56SAndreas Gohr if (count($this->patterns) == 0) { 102be906b56SAndreas Gohr return false; 103be906b56SAndreas Gohr } 104be906b56SAndreas Gohr 105be906b56SAndreas Gohr if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) { 106be906b56SAndreas Gohr if (function_exists('preg_last_error')) { 107be906b56SAndreas Gohr $err = preg_last_error(); 108be906b56SAndreas Gohr switch ($err) { 109be906b56SAndreas Gohr case PREG_BACKTRACK_LIMIT_ERROR: 110be906b56SAndreas Gohr msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1); 111be906b56SAndreas Gohr break; 112be906b56SAndreas Gohr case PREG_RECURSION_LIMIT_ERROR: 113be906b56SAndreas Gohr msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1); 114be906b56SAndreas Gohr break; 115be906b56SAndreas Gohr case PREG_BAD_UTF8_ERROR: 116be906b56SAndreas Gohr msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1); 117be906b56SAndreas Gohr break; 118be906b56SAndreas Gohr case PREG_INTERNAL_ERROR: 119be906b56SAndreas Gohr msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1); 120be906b56SAndreas Gohr break; 121be906b56SAndreas Gohr } 122be906b56SAndreas Gohr } 123be906b56SAndreas Gohr 124be906b56SAndreas Gohr $split = array($subject, "", ""); 125be906b56SAndreas Gohr return false; 126be906b56SAndreas Gohr } 127be906b56SAndreas Gohr 128be906b56SAndreas Gohr $idx = count($matches)-2; 129be906b56SAndreas Gohr list($pre, $post) = preg_split($this->patterns[$idx].$this->getPerlMatchingFlags(), $subject, 2); 130be906b56SAndreas Gohr $split = array($pre, $matches[0], $post); 131be906b56SAndreas Gohr 132be906b56SAndreas Gohr return isset($this->labels[$idx]) ? $this->labels[$idx] : true; 133be906b56SAndreas Gohr } 134be906b56SAndreas Gohr 135be906b56SAndreas Gohr /** 136be906b56SAndreas Gohr * Compounds the patterns into a single 137be906b56SAndreas Gohr * regular expression separated with the 138be906b56SAndreas Gohr * "or" operator. Caches the regex. 139be906b56SAndreas Gohr * Will automatically escape (, ) and / tokens. 140be906b56SAndreas Gohr * 141be906b56SAndreas Gohr * @return null|string 142be906b56SAndreas Gohr */ 143be906b56SAndreas Gohr protected function getCompoundedRegex() 144be906b56SAndreas Gohr { 145be906b56SAndreas Gohr if ($this->regex == null) { 146be906b56SAndreas Gohr $cnt = count($this->patterns); 147be906b56SAndreas Gohr for ($i = 0; $i < $cnt; $i++) { 148be906b56SAndreas Gohr /* 149be906b56SAndreas Gohr * decompose the input pattern into "(", "(?", ")", 150be906b56SAndreas Gohr * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"... 151be906b56SAndreas Gohr * elements. 152be906b56SAndreas Gohr */ 153be906b56SAndreas Gohr preg_match_all('/\\\\.|' . 154be906b56SAndreas Gohr '\(\?|' . 155be906b56SAndreas Gohr '[()]|' . 156be906b56SAndreas Gohr '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' . 157be906b56SAndreas Gohr '[^[()\\\\]+/', $this->patterns[$i], $elts); 158be906b56SAndreas Gohr 159be906b56SAndreas Gohr $pattern = ""; 160be906b56SAndreas Gohr $level = 0; 161be906b56SAndreas Gohr 162be906b56SAndreas Gohr foreach ($elts[0] as $elt) { 163be906b56SAndreas Gohr /* 164be906b56SAndreas Gohr * for "(", ")" remember the nesting level, add "\" 165be906b56SAndreas Gohr * only to the non-"(?" ones. 166be906b56SAndreas Gohr */ 167be906b56SAndreas Gohr 168be906b56SAndreas Gohr switch ($elt) { 169be906b56SAndreas Gohr case '(': 170be906b56SAndreas Gohr $pattern .= '\('; 171be906b56SAndreas Gohr break; 172be906b56SAndreas Gohr case ')': 173be906b56SAndreas Gohr if ($level > 0) 174be906b56SAndreas Gohr $level--; /* closing (? */ 175be906b56SAndreas Gohr else $pattern .= '\\'; 176be906b56SAndreas Gohr $pattern .= ')'; 177be906b56SAndreas Gohr break; 178be906b56SAndreas Gohr case '(?': 179be906b56SAndreas Gohr $level++; 180be906b56SAndreas Gohr $pattern .= '(?'; 181be906b56SAndreas Gohr break; 182be906b56SAndreas Gohr default: 183be906b56SAndreas Gohr if (substr($elt, 0, 1) == '\\') 184be906b56SAndreas Gohr $pattern .= $elt; 185be906b56SAndreas Gohr else $pattern .= str_replace('/', '\/', $elt); 186be906b56SAndreas Gohr } 187be906b56SAndreas Gohr } 188be906b56SAndreas Gohr $this->patterns[$i] = "($pattern)"; 189be906b56SAndreas Gohr } 190be906b56SAndreas Gohr $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags(); 191be906b56SAndreas Gohr } 192be906b56SAndreas Gohr return $this->regex; 193be906b56SAndreas Gohr } 194be906b56SAndreas Gohr 195be906b56SAndreas Gohr /** 196be906b56SAndreas Gohr * Accessor for perl regex mode flags to use. 197be906b56SAndreas Gohr * @return string Perl regex flags. 198be906b56SAndreas Gohr */ 199be906b56SAndreas Gohr protected function getPerlMatchingFlags() 200be906b56SAndreas Gohr { 201be906b56SAndreas Gohr return ($this->case ? "msS" : "msSi"); 202be906b56SAndreas Gohr } 203be906b56SAndreas Gohr} 204