1//
2// Hoa
3//
4//
5// @license
6//
7// New BSD License
8//
9// Copyright © 2007-2017, Hoa community. All rights reserved.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are met:
13//     * Redistributions of source code must retain the above copyright
14//       notice, this list of conditions and the following disclaimer.
15//     * Redistributions in binary form must reproduce the above copyright
16//       notice, this list of conditions and the following disclaimer in the
17//       documentation and/or other materials provided with the distribution.
18//     * Neither the name of the Hoa nor the names of its contributors may be
19//       used to endorse or promote products derived from this software without
20//       specific prior written permission.
21//
22// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
26// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32// POSSIBILITY OF SUCH DAMAGE.
33//
34// Grammar \Hoa\Regex\Grammar.
35//
36// Provide grammar of PCRE (Perl Compatible Regular Expression)for the LL(k)
37// parser. More informations at http://pcre.org/pcre.txt, sections pcrepattern &
38// pcresyntax.
39//
40// @copyright  Copyright © 2007-2017 Hoa community.
41// @license    New BSD License
42//
43
44
45// Skip.
46%skip   nl                       \n
47
48// Character classes.
49%token  negative_class_          \[\^
50%token  class_                   \[
51%token _class                    \]
52%token  range                    \-
53
54// Internal options.
55%token  internal_option          \(\?[\-+]?[imsx]\)
56
57// Lookahead and lookbehind assertions.
58%token  lookahead_               \(\?=
59%token  negative_lookahead_      \(\?!
60%token  lookbehind_              \(\?<=
61%token  negative_lookbehind_     \(\?<!
62
63// Conditions.
64%token  named_reference_         \(\?\(<            -> nc
65%token  absolute_reference_      \(\?\((?=\d)       -> c
66%token  relative_reference_      \(\?\((?=[\+\-])   -> c
67%token  c:index                  [\+\-]?\d+         -> default
68%token  assertion_reference_     \(\?\(
69
70// Comments.
71%token  comment_                 \(\?#              -> co
72%token  co:_comment              \)                 -> default
73%token  co:comment               .*?(?=(?<!\\)\))
74
75// Capturing group.
76%token  named_capturing_         \(\?<              -> nc
77%token  nc:_named_capturing      >                  -> default
78%token  nc:capturing_name        .+?(?=(?<!\\)>)
79%token  non_capturing_           \(\?:
80%token  non_capturing_reset_     \(\?\|
81%token  atomic_group_            \(\?>
82%token  capturing_               \(
83%token _capturing                \)
84
85// Quantifiers (by default, greedy).
86%token  zero_or_one_possessive   \?\+
87%token  zero_or_one_lazy         \?\?
88%token  zero_or_one              \?
89%token  zero_or_more_possessive  \*\+
90%token  zero_or_more_lazy        \*\?
91%token  zero_or_more             \*
92%token  one_or_more_possessive   \+\+
93%token  one_or_more_lazy         \+\?
94%token  one_or_more              \+
95%token  exactly_n                \{[0-9]+\}
96%token  n_to_m_possessive        \{[0-9]+,[0-9]+\}\+
97%token  n_to_m_lazy              \{[0-9]+,[0-9]+\}\?
98%token  n_to_m                   \{[0-9]+,[0-9]+\}
99%token  n_or_more_possessive     \{[0-9]+,\}\+
100%token  n_or_more_lazy           \{[0-9]+,\}\?
101%token  n_or_more                \{[0-9]+,\}
102
103// Alternation.
104%token alternation               \|
105
106// Literal.
107%token character                 \\([aefnrt]|c[\x00-\x7f])
108%token dynamic_character         \\([0-7]{3}|x[0-9a-zA-Z]{2}|x{[0-9a-zA-Z]+})
109// Please, see PCRESYNTAX(3), General Category properties, PCRE special category
110// properties and script names for \p{} and \P{}.
111%token character_type            \\([CdDhHNRsSvVwWX]|[pP]{[^}]+})
112%token anchor                    \\(bBAZzG)|\^|\$
113%token match_point_reset         \\K
114%token literal                   \\.|.
115
116
117// Rules.
118
119#expression:
120    alternation()
121
122alternation:
123    concatenation() ( ::alternation:: concatenation() #alternation )*
124
125concatenation:
126    (   internal_options() | assertion() | quantification() | condition() )
127    ( ( internal_options() | assertion() | quantification() | condition() ) #concatenation )*
128
129#internal_options:
130    <internal_option>
131
132#condition:
133    (
134        ::named_reference_:: <capturing_name> ::_named_capturing:: #namedcondition
135      | (
136            ::relative_reference_:: #relativecondition
137          | ::absolute_reference_:: #absolutecondition
138        )
139        <index>
140      | ::assertion_reference_:: alternation() #assertioncondition
141    )
142    ::_capturing:: concatenation()?
143    ( ::alternation:: concatenation()? )?
144    ::_capturing::
145
146assertion:
147    (
148        ::lookahead_::           #lookahead
149      | ::negative_lookahead_::  #negativelookahead
150      | ::lookbehind_::          #lookbehind
151      | ::negative_lookbehind_:: #negativelookbehind
152    )
153    alternation() ::_capturing::
154
155quantification:
156    ( class() | simple() ) ( quantifier() #quantification )?
157
158quantifier:
159    <zero_or_one_possessive>  | <zero_or_one_lazy>  | <zero_or_one>
160  | <zero_or_more_possessive> | <zero_or_more_lazy> | <zero_or_more>
161  | <one_or_more_possessive>  | <one_or_more_lazy>  | <one_or_more>
162  | <exactly_n>
163  | <n_to_m_possessive>       | <n_to_m_lazy>       | <n_to_m>
164  | <n_or_more_possessive>    | <n_or_more_lazy>    | <n_or_more>
165
166#class:
167    (
168        ::negative_class_:: #negativeclass
169      | ::class_::
170    )
171    ( range() | literal() )+
172    ::_class::
173
174#range:
175    literal() ::range:: literal()
176
177simple:
178    capturing()
179  | literal()
180
181capturing:
182    ::comment_:: <comment>? ::_comment:: #comment
183  | (
184        ::named_capturing_:: <capturing_name> ::_named_capturing:: #namedcapturing
185      | ::non_capturing_:: #noncapturing
186      | ::non_capturing_reset_:: #noncapturingreset
187      | ::atomic_group_:: #atomicgroup
188      | ::capturing_::
189    )
190    alternation() ::_capturing::
191
192literal:
193    <character>
194  | <dynamic_character>
195  | <character_type>
196  | <anchor>
197  | <match_point_reset>
198  | <literal>
199