1// 2// Hoa 3// 4// 5// @license 6// 7// New BSD License 8// 9// Copyright © 2007-2017, Hoa community. All rights reserved. 10// 11// Redistribution and use in source and binary forms, with or without 12// modification, are permitted provided that the following conditions are met: 13// * Redistributions of source code must retain the above copyright 14// notice, this list of conditions and the following disclaimer. 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// * Neither the name of the Hoa nor the names of its contributors may be 19// used to endorse or promote products derived from this software without 20// specific prior written permission. 21// 22// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 23// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE 26// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32// POSSIBILITY OF SUCH DAMAGE. 33// 34// Grammar \Hoa\Regex\Grammar. 35// 36// Provide grammar of PCRE (Perl Compatible Regular Expression)for the LL(k) 37// parser. More informations at http://pcre.org/pcre.txt, sections pcrepattern & 38// pcresyntax. 39// 40// @copyright Copyright © 2007-2017 Hoa community. 41// @license New BSD License 42// 43 44 45// Skip. 46%skip nl \n 47 48// Character classes. 49%token negative_class_ \[\^ 50%token class_ \[ 51%token _class \] 52%token range \- 53 54// Internal options. 55%token internal_option \(\?[\-+]?[imsx]\) 56 57// Lookahead and lookbehind assertions. 58%token lookahead_ \(\?= 59%token negative_lookahead_ \(\?! 60%token lookbehind_ \(\?<= 61%token negative_lookbehind_ \(\?<! 62 63// Conditions. 64%token named_reference_ \(\?\(< -> nc 65%token absolute_reference_ \(\?\((?=\d) -> c 66%token relative_reference_ \(\?\((?=[\+\-]) -> c 67%token c:index [\+\-]?\d+ -> default 68%token assertion_reference_ \(\?\( 69 70// Comments. 71%token comment_ \(\?# -> co 72%token co:_comment \) -> default 73%token co:comment .*?(?=(?<!\\)\)) 74 75// Capturing group. 76%token named_capturing_ \(\?< -> nc 77%token nc:_named_capturing > -> default 78%token nc:capturing_name .+?(?=(?<!\\)>) 79%token non_capturing_ \(\?: 80%token non_capturing_reset_ \(\?\| 81%token atomic_group_ \(\?> 82%token capturing_ \( 83%token _capturing \) 84 85// Quantifiers (by default, greedy). 86%token zero_or_one_possessive \?\+ 87%token zero_or_one_lazy \?\? 88%token zero_or_one \? 89%token zero_or_more_possessive \*\+ 90%token zero_or_more_lazy \*\? 91%token zero_or_more \* 92%token one_or_more_possessive \+\+ 93%token one_or_more_lazy \+\? 94%token one_or_more \+ 95%token exactly_n \{[0-9]+\} 96%token n_to_m_possessive \{[0-9]+,[0-9]+\}\+ 97%token n_to_m_lazy \{[0-9]+,[0-9]+\}\? 98%token n_to_m \{[0-9]+,[0-9]+\} 99%token n_or_more_possessive \{[0-9]+,\}\+ 100%token n_or_more_lazy \{[0-9]+,\}\? 101%token n_or_more \{[0-9]+,\} 102 103// Alternation. 104%token alternation \| 105 106// Literal. 107%token character \\([aefnrt]|c[\x00-\x7f]) 108%token dynamic_character \\([0-7]{3}|x[0-9a-zA-Z]{2}|x{[0-9a-zA-Z]+}) 109// Please, see PCRESYNTAX(3), General Category properties, PCRE special category 110// properties and script names for \p{} and \P{}. 111%token character_type \\([CdDhHNRsSvVwWX]|[pP]{[^}]+}) 112%token anchor \\(bBAZzG)|\^|\$ 113%token match_point_reset \\K 114%token literal \\.|. 115 116 117// Rules. 118 119#expression: 120 alternation() 121 122alternation: 123 concatenation() ( ::alternation:: concatenation() #alternation )* 124 125concatenation: 126 ( internal_options() | assertion() | quantification() | condition() ) 127 ( ( internal_options() | assertion() | quantification() | condition() ) #concatenation )* 128 129#internal_options: 130 <internal_option> 131 132#condition: 133 ( 134 ::named_reference_:: <capturing_name> ::_named_capturing:: #namedcondition 135 | ( 136 ::relative_reference_:: #relativecondition 137 | ::absolute_reference_:: #absolutecondition 138 ) 139 <index> 140 | ::assertion_reference_:: alternation() #assertioncondition 141 ) 142 ::_capturing:: concatenation()? 143 ( ::alternation:: concatenation()? )? 144 ::_capturing:: 145 146assertion: 147 ( 148 ::lookahead_:: #lookahead 149 | ::negative_lookahead_:: #negativelookahead 150 | ::lookbehind_:: #lookbehind 151 | ::negative_lookbehind_:: #negativelookbehind 152 ) 153 alternation() ::_capturing:: 154 155quantification: 156 ( class() | simple() ) ( quantifier() #quantification )? 157 158quantifier: 159 <zero_or_one_possessive> | <zero_or_one_lazy> | <zero_or_one> 160 | <zero_or_more_possessive> | <zero_or_more_lazy> | <zero_or_more> 161 | <one_or_more_possessive> | <one_or_more_lazy> | <one_or_more> 162 | <exactly_n> 163 | <n_to_m_possessive> | <n_to_m_lazy> | <n_to_m> 164 | <n_or_more_possessive> | <n_or_more_lazy> | <n_or_more> 165 166#class: 167 ( 168 ::negative_class_:: #negativeclass 169 | ::class_:: 170 ) 171 ( range() | literal() )+ 172 ::_class:: 173 174#range: 175 literal() ::range:: literal() 176 177simple: 178 capturing() 179 | literal() 180 181capturing: 182 ::comment_:: <comment>? ::_comment:: #comment 183 | ( 184 ::named_capturing_:: <capturing_name> ::_named_capturing:: #namedcapturing 185 | ::non_capturing_:: #noncapturing 186 | ::non_capturing_reset_:: #noncapturingreset 187 | ::atomic_group_:: #atomicgroup 188 | ::capturing_:: 189 ) 190 alternation() ::_capturing:: 191 192literal: 193 <character> 194 | <dynamic_character> 195 | <character_type> 196 | <anchor> 197 | <match_point_reset> 198 | <literal> 199