xref: /plugin/combo/vendor/antlr/antlr4-php-runtime/src/Atn/PredictionMode.php (revision 37748cd8654635afbeca80942126742f0f4cc346)
1*37748cd8SNickeau<?php
2*37748cd8SNickeau
3*37748cd8SNickeaudeclare(strict_types=1);
4*37748cd8SNickeau
5*37748cd8SNickeaunamespace Antlr\Antlr4\Runtime\Atn;
6*37748cd8SNickeau
7*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\SemanticContexts\SemanticContext;
8*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\States\RuleStopState;
9*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Equality;
10*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Equivalence;
11*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Hashable;
12*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Hasher;
13*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Utils\BitSet;
14*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Utils\Map;
15*37748cd8SNickeau
16*37748cd8SNickeau/**
17*37748cd8SNickeau * This enumeration defines the prediction modes available in ANTLR 4 along with
18*37748cd8SNickeau * utility methods for analyzing configuration sets for conflicts and/or
19*37748cd8SNickeau * ambiguities.
20*37748cd8SNickeau */
21*37748cd8SNickeaufinal class PredictionMode
22*37748cd8SNickeau{
23*37748cd8SNickeau    /**
24*37748cd8SNickeau     * The SLL(*) prediction mode. This prediction mode ignores the current
25*37748cd8SNickeau     * parser context when making predictions. This is the fastest prediction
26*37748cd8SNickeau     * mode, and provides correct results for many grammars. This prediction
27*37748cd8SNickeau     * mode is more powerful than the prediction mode provided by ANTLR 3, but
28*37748cd8SNickeau     * may result in syntax errors for grammar and input combinations which are
29*37748cd8SNickeau     * not SLL.
30*37748cd8SNickeau     *
31*37748cd8SNickeau     * When using this prediction mode, the parser will either return a correct
32*37748cd8SNickeau     * parse tree (i.e. the same parse tree that would be returned with the
33*37748cd8SNickeau     * {@see PredictionMode::LL()} prediction mode), or it will report a syntax
34*37748cd8SNickeau     * error. If a syntax error is encountered when using the
35*37748cd8SNickeau     * {@see PredictionMode::SLL()} prediction mode, it may be due to either
36*37748cd8SNickeau     * an actual syntax error in the input or indicate that the particular
37*37748cd8SNickeau     * ombination of grammar and input requires the more powerful
38*37748cd8SNickeau     * {@see PredictionMode::LL()} prediction abilities to complete successfully.
39*37748cd8SNickeau     *
40*37748cd8SNickeau     * This prediction mode does not provide any guarantees for prediction
41*37748cd8SNickeau     * behavior for syntactically-incorrect inputs.
42*37748cd8SNickeau     */
43*37748cd8SNickeau    public const SLL = 0;
44*37748cd8SNickeau
45*37748cd8SNickeau    /**
46*37748cd8SNickeau     * The LL(*) prediction mode. This prediction mode allows the current parser
47*37748cd8SNickeau     * context to be used for resolving SLL conflicts that occur during
48*37748cd8SNickeau     * prediction. This is the fastest prediction mode that guarantees correct
49*37748cd8SNickeau     * parse results for all combinations of grammars with syntactically correct
50*37748cd8SNickeau     * inputs.
51*37748cd8SNickeau     *
52*37748cd8SNickeau     * When using this prediction mode, the parser will make correct decisions
53*37748cd8SNickeau     * for all syntactically-correct grammar and input combinations. However, in
54*37748cd8SNickeau     * cases where the grammar is truly ambiguous this prediction mode might not
55*37748cd8SNickeau     * report a precise answer for exactly which alternatives are ambiguous.
56*37748cd8SNickeau     *
57*37748cd8SNickeau     * This prediction mode does not provide any guarantees for prediction
58*37748cd8SNickeau     * behavior for syntactically-incorrect inputs.
59*37748cd8SNickeau     */
60*37748cd8SNickeau    public const LL = 1;
61*37748cd8SNickeau
62*37748cd8SNickeau    /**
63*37748cd8SNickeau     * The LL(*) prediction mode with exact ambiguity detection. In addition to
64*37748cd8SNickeau     * the correctness guarantees provided by the {@see PredictionMode::LL}
65*37748cd8SNickeau     * prediction mode, this prediction mode instructs the prediction algorithm
66*37748cd8SNickeau     * to determine the complete and exact set of ambiguous alternatives for
67*37748cd8SNickeau     * every ambiguous decision encountered while parsing.
68*37748cd8SNickeau     *
69*37748cd8SNickeau     * This prediction mode may be used for diagnosing ambiguities during
70*37748cd8SNickeau     * grammar development. Due to the performance overhead of calculating sets
71*37748cd8SNickeau     * of ambiguous alternatives, this prediction mode should be avoided when
72*37748cd8SNickeau     * the exact results are not necessary.
73*37748cd8SNickeau     *
74*37748cd8SNickeau     * This prediction mode does not provide any guarantees for prediction
75*37748cd8SNickeau     * behavior for syntactically-incorrect inputs.
76*37748cd8SNickeau     */
77*37748cd8SNickeau    public const LL_EXACT_AMBIG_DETECTION = 2;
78*37748cd8SNickeau
79*37748cd8SNickeau    /**
80*37748cd8SNickeau     * Computes the SLL prediction termination condition.
81*37748cd8SNickeau     *
82*37748cd8SNickeau     * This method computes the SLL prediction termination condition for both of
83*37748cd8SNickeau     * the following cases.
84*37748cd8SNickeau     *
85*37748cd8SNickeau     * - The usual SLL+LL fallback upon SLL conflict
86*37748cd8SNickeau     * - Pure SLL without LL fallback
87*37748cd8SNickeau     *
88*37748cd8SNickeau     * COMBINED SLL+LL PARSING
89*37748cd8SNickeau     *
90*37748cd8SNickeau     * When LL-fallback is enabled upon SLL conflict, correct predictions are
91*37748cd8SNickeau     * ensured regardless of how the termination condition is computed by this
92*37748cd8SNickeau     * method. Due to the substantially higher cost of LL prediction, the
93*37748cd8SNickeau     * prediction should only fall back to LL when the additional lookahead
94*37748cd8SNickeau     * cannot lead to a unique SLL prediction.
95*37748cd8SNickeau     *
96*37748cd8SNickeau     * Assuming combined SLL+LL parsing, an SLL configuration set with only
97*37748cd8SNickeau     * conflicting subsets should fall back to full LL, even if the
98*37748cd8SNickeau     * configuration sets don't resolve to the same alternative (e.g.
99*37748cd8SNickeau     * `{1,2}` and `{3,4}`. If there is at least one non-conflicting
100*37748cd8SNickeau     * configuration, SLL could continue with the hopes that more lookahead will
101*37748cd8SNickeau     * resolve via one of those non-conflicting configurations.
102*37748cd8SNickeau     *
103*37748cd8SNickeau     * Here's the prediction termination rule them: SLL (for SLL+LL parsing)
104*37748cd8SNickeau     * stops when it sees only conflicting configuration subsets. In contrast,
105*37748cd8SNickeau     * full LL keeps going when there is uncertainty.
106*37748cd8SNickeau     *
107*37748cd8SNickeau     * HEURISTIC
108*37748cd8SNickeau     *
109*37748cd8SNickeau     * As a heuristic, we stop prediction when we see any conflicting subset
110*37748cd8SNickeau     * unless we see a state that only has one alternative associated with it.
111*37748cd8SNickeau     * The single-alt-state thing lets prediction continue upon rules like
112*37748cd8SNickeau     * (otherwise, it would admit defeat too soon):
113*37748cd8SNickeau     *
114*37748cd8SNickeau     * `[12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;`
115*37748cd8SNickeau     *
116*37748cd8SNickeau     * When the ATN simulation reaches the state before `';'`, it has a
117*37748cd8SNickeau     * DFA state that looks like: `[12|1|[], 6|2|[], 12|2|[]]`. Naturally
118*37748cd8SNickeau     * `12|1|[]` and `12|2|[]` conflict, but we cannot stop processing this
119*37748cd8SNickeau     * node because alternative to has another way to continue, via `[6|2|[]]`.
120*37748cd8SNickeau     *
121*37748cd8SNickeau     * It also let's us continue for this rule: `[1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;`
122*37748cd8SNickeau     *
123*37748cd8SNickeau     * After matching input A, we reach the stop state for rule A, state 1.
124*37748cd8SNickeau     * State 8 is the state right before B. Clearly alternatives 1 and 2
125*37748cd8SNickeau     * conflict and no amount of further lookahead will separate the two.
126*37748cd8SNickeau     * However, alternative 3 will be able to continue and so we do not stop
127*37748cd8SNickeau     * working on this state. In the previous example, we're concerned with
128*37748cd8SNickeau     * states associated with the conflicting alternatives. Here alt 3 is not
129*37748cd8SNickeau     * associated with the conflicting configs, but since we can continue
130*37748cd8SNickeau     * looking for input reasonably, don't declare the state done.
131*37748cd8SNickeau     *
132*37748cd8SNickeau     * PURE SLL PARSING
133*37748cd8SNickeau     *
134*37748cd8SNickeau     * To handle pure SLL parsing, all we have to do is make sure that we
135*37748cd8SNickeau     * combine stack contexts for configurations that differ only by semantic
136*37748cd8SNickeau     * predicate. From there, we can do the usual SLL termination heuristic.
137*37748cd8SNickeau     *
138*37748cd8SNickeau     * PREDICATES IN SLL+LL PARSING
139*37748cd8SNickeau     *
140*37748cd8SNickeau     * SLL decisions don't evaluate predicates until after they reach DFA stop
141*37748cd8SNickeau     * states because they need to create the DFA cache that works in all
142*37748cd8SNickeau     * semantic situations. In contrast, full LL evaluates predicates collected
143*37748cd8SNickeau     * during start state computation so it can ignore predicates thereafter.
144*37748cd8SNickeau     * This means that SLL termination detection can totally ignore semantic
145*37748cd8SNickeau     * predicates.
146*37748cd8SNickeau     *
147*37748cd8SNickeau     * Implementation-wise, {@see ATNConfigSet} combines stack contexts but not
148*37748cd8SNickeau     * semantic predicate contexts so we might see two configurations like the
149*37748cd8SNickeau     * following.
150*37748cd8SNickeau     *
151*37748cd8SNickeau     * `s, 1, x, {}), (s, 1, x', {p})`
152*37748cd8SNickeau     *
153*37748cd8SNickeau     * Before testing these configurations against others, we have to merge
154*37748cd8SNickeau     * `x` and `x'` (without modifying the existing configurations).
155*37748cd8SNickeau     * For example, we test `(x+x') === x''` when looking for conflicts in
156*37748cd8SNickeau     * the following configurations.
157*37748cd8SNickeau     *
158*37748cd8SNickeau     * `(s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})`
159*37748cd8SNickeau     *
160*37748cd8SNickeau     * If the configuration set has predicates (as indicated by
161*37748cd8SNickeau     * {@see ATNConfigSet::hasSemanticContext()}), this algorithm makes a copy of
162*37748cd8SNickeau     * the configurations to strip out all of the predicates so that a standard
163*37748cd8SNickeau     * {@see ATNConfigSet} will merge everything ignoring predicates.
164*37748cd8SNickeau     */
165*37748cd8SNickeau    public static function hasSLLConflictTerminatingPrediction(int $mode, ATNConfigSet $configs) : bool
166*37748cd8SNickeau    {
167*37748cd8SNickeau        /* Configs in rule stop states indicate reaching the end of the decision
168*37748cd8SNickeau         * rule (local context) or end of start rule (full context). If all
169*37748cd8SNickeau         * configs meet this condition, then none of the configurations is able
170*37748cd8SNickeau         * to match additional input so we terminate prediction.
171*37748cd8SNickeau         */
172*37748cd8SNickeau        if (self::allConfigsInRuleStopStates($configs)) {
173*37748cd8SNickeau            return true;
174*37748cd8SNickeau        }
175*37748cd8SNickeau
176*37748cd8SNickeau        // pure SLL mode parsing
177*37748cd8SNickeau        if ($mode === self::SLL) {
178*37748cd8SNickeau            // Don't bother with combining configs from different semantic
179*37748cd8SNickeau            // contexts if we can fail over to full LL; costs more time
180*37748cd8SNickeau            // since we'll often fail over anyway.
181*37748cd8SNickeau            if ($configs->hasSemanticContext) {
182*37748cd8SNickeau                // dup configs, tossing out semantic predicates
183*37748cd8SNickeau                $dup = new ATNConfigSet();
184*37748cd8SNickeau
185*37748cd8SNickeau                foreach ($configs->elements() as $c) {
186*37748cd8SNickeau                    $c = new ATNConfig($c, null, null, SemanticContext::none());
187*37748cd8SNickeau                    $dup->add($c);
188*37748cd8SNickeau                }
189*37748cd8SNickeau
190*37748cd8SNickeau                $configs = $dup;
191*37748cd8SNickeau            }
192*37748cd8SNickeau            // now we have combined contexts for configs with dissimilar preds
193*37748cd8SNickeau        }
194*37748cd8SNickeau
195*37748cd8SNickeau        // pure SLL or combined SLL+LL mode parsing
196*37748cd8SNickeau        $altsets = self::getConflictingAltSubsets($configs);
197*37748cd8SNickeau
198*37748cd8SNickeau        return self::hasConflictingAltSet($altsets) && !self::hasStateAssociatedWithOneAlt($configs);
199*37748cd8SNickeau    }
200*37748cd8SNickeau
201*37748cd8SNickeau    /**
202*37748cd8SNickeau     * Checks if any configuration in `configs` is in a {@see RuleStopState}.
203*37748cd8SNickeau     * Configurations meeting this condition have reached the end of the decision
204*37748cd8SNickeau     * rule (local context) or end of start rule (full context).
205*37748cd8SNickeau     *
206*37748cd8SNickeau     * @param ATNConfigSet $configs The configuration set to test.
207*37748cd8SNickeau     *
208*37748cd8SNickeau     * @return bool If any configuration in  is in a if any configuration in
209*37748cd8SNickeau     *              `configs` is in a {@see RuleStopState}, otherwise `false`.
210*37748cd8SNickeau     */
211*37748cd8SNickeau    public static function hasConfigInRuleStopState(ATNConfigSet $configs) : bool
212*37748cd8SNickeau    {
213*37748cd8SNickeau        foreach ($configs->elements() as $c) {
214*37748cd8SNickeau            if ($c->state instanceof RuleStopState) {
215*37748cd8SNickeau                return true;
216*37748cd8SNickeau            }
217*37748cd8SNickeau        }
218*37748cd8SNickeau
219*37748cd8SNickeau        return false;
220*37748cd8SNickeau    }
221*37748cd8SNickeau
222*37748cd8SNickeau    /**
223*37748cd8SNickeau     * Checks if all configurations in `configs` are in a {@see RuleStopState}.
224*37748cd8SNickeau     * Configurations meeting this condition have reached the end of the decision
225*37748cd8SNickeau     * rule (local context) or end of start rule (full context).
226*37748cd8SNickeau     *
227*37748cd8SNickeau     * @param ATNConfigSet $configs the configuration set to test.
228*37748cd8SNickeau     *
229*37748cd8SNickeau     * @return bool If all configurations in  are in a if all configurations in
230*37748cd8SNickeau     *              `configs` are in a {@see RuleStopState}, otherwise `false`.
231*37748cd8SNickeau     */
232*37748cd8SNickeau    public static function allConfigsInRuleStopStates(ATNConfigSet $configs) : bool
233*37748cd8SNickeau    {
234*37748cd8SNickeau        foreach ($configs->elements() as $c) {
235*37748cd8SNickeau            if (!$c->state instanceof RuleStopState) {
236*37748cd8SNickeau                return false;
237*37748cd8SNickeau            }
238*37748cd8SNickeau        }
239*37748cd8SNickeau
240*37748cd8SNickeau        return true;
241*37748cd8SNickeau    }
242*37748cd8SNickeau
243*37748cd8SNickeau    /**
244*37748cd8SNickeau     * Full LL prediction termination.
245*37748cd8SNickeau     *
246*37748cd8SNickeau     * Can we stop looking ahead during ATN simulation or is there some
247*37748cd8SNickeau     * uncertainty as to which alternative we will ultimately pick, after
248*37748cd8SNickeau     * consuming more input? Even if there are partial conflicts, we might know
249*37748cd8SNickeau     * that everything is going to resolve to the same minimum alternative. That
250*37748cd8SNickeau     * means we can stop since no more lookahead will change that fact. On the
251*37748cd8SNickeau     * other hand, there might be multiple conflicts that resolve to different
252*37748cd8SNickeau     * minimums. That means we need more look ahead to decide which of those
253*37748cd8SNickeau     * alternatives we should predict.
254*37748cd8SNickeau     *
255*37748cd8SNickeau     * The basic idea is to split the set of configurations `C`, into
256*37748cd8SNickeau     * conflicting subsets `(s, _, ctx, _)` and singleton subsets with
257*37748cd8SNickeau     * non-conflicting configurations. Two configurations conflict if they have
258*37748cd8SNickeau     * identical {@see ATNConfig::state()} and {@see ATNConfig::context()} values
259*37748cd8SNickeau     * but different {@see ATNConfig::alt()} value, e.g. `(s, i, ctx, _)` and
260*37748cd8SNickeau     * `(s, j, ctx, _)` for `i!=j`.
261*37748cd8SNickeau     *
262*37748cd8SNickeau     * Reduce these configuration subsets to the set of possible alternatives.
263*37748cd8SNickeau     * You can compute the alternative subsets in one pass as follows:
264*37748cd8SNickeau     *
265*37748cd8SNickeau     * `A_s,ctx = {i | (s, i, ctx, _)}` for each configuration in `C` holding
266*37748cd8SNickeau     * `s` and `ctx` fixed.
267*37748cd8SNickeau     *
268*37748cd8SNickeau     * Or in pseudo-code, for each configuration `c` in `C`:
269*37748cd8SNickeau     *
270*37748cd8SNickeau     *     map[c] U= c.{@see ATNConfig::alt alt} # map hash/equals uses s and x,
271*37748cd8SNickeau     *     not alt and not pred
272*37748cd8SNickeau     *
273*37748cd8SNickeau     * The values in `map` are the set of `A_s,ctx` sets.
274*37748cd8SNickeau     *
275*37748cd8SNickeau     * If `|A_s,ctx|=1` then there is no conflict associated with `s` and `ctx`.
276*37748cd8SNickeau     *
277*37748cd8SNickeau     * Reduce the subsets to singletons by choosing a minimum of each subset. If
278*37748cd8SNickeau     * the union of these alternative subsets is a singleton, then no amount of
279*37748cd8SNickeau     * more lookahead will help us. We will always pick that alternative. If,
280*37748cd8SNickeau     * however, there is more than one alternative, then we are uncertain which
281*37748cd8SNickeau     * alternative to predict and must continue looking for resolution. We may
282*37748cd8SNickeau     * or may not discover an ambiguity in the future, even if there are no
283*37748cd8SNickeau     * conflicting subsets this round.
284*37748cd8SNickeau     *
285*37748cd8SNickeau     * The biggest sin is to terminate early because it means we've made a
286*37748cd8SNickeau     * decision but were uncertain as to the eventual outcome. We haven't used
287*37748cd8SNickeau     * enough lookahead. On the other hand, announcing a conflict too late is no
288*37748cd8SNickeau     * big deal; you will still have the conflict. It's just inefficient. It
289*37748cd8SNickeau     * might even look until the end of file.
290*37748cd8SNickeau     *
291*37748cd8SNickeau     * No special consideration for semantic predicates is required because
292*37748cd8SNickeau     * predicates are evaluated on-the-fly for full LL prediction, ensuring that
293*37748cd8SNickeau     * no configuration contains a semantic context during the termination
294*37748cd8SNickeau     * check.
295*37748cd8SNickeau     *
296*37748cd8SNickeau     * CONFLICTING CONFIGS
297*37748cd8SNickeau     *
298*37748cd8SNickeau     * Two configurations `(s, i, x)` and `(s, j, x')`, conflict when `i!=j`
299*37748cd8SNickeau     * but `x=x'`. Because we merge all `(s, i, _)` configurations together,
300*37748cd8SNickeau     * that means that there are at most `n` configurations associated with
301*37748cd8SNickeau     * state `s` for `n` possible alternatives in the decision. The merged stacks
302*37748cd8SNickeau     * complicate the comparison of configuration contexts `x` and `x'`.
303*37748cd8SNickeau     * Sam checks to see if one is a subset of the other by calling merge and
304*37748cd8SNickeau     * checking to see if the merged result is either `x` orv`x'`. If the `x`
305*37748cd8SNickeau     * associated with lowest alternative `i`vis the superset, then `i` is the
306*37748cd8SNickeau     * only possible prediction since the others resolve to `min(i)` as well.
307*37748cd8SNickeau     * However, if `x` is associated with `j>i` then at least one stack
308*37748cd8SNickeau     * configuration for `j` is not in conflict with alternative `i`. The algorithm
309*37748cd8SNickeau     * should keep going, looking for more lookahead due to the uncertainty.
310*37748cd8SNickeau     *
311*37748cd8SNickeau     * For simplicity, I'm doing a equality check between `x` and `x'` that lets
312*37748cd8SNickeau     * the algorithm continue to consume lookahead longer than necessary. The
313*37748cd8SNickeau     * reason I like the equality is of course the simplicity but also because
314*37748cd8SNickeau     * that is the test you need to detect the alternatives that are actually
315*37748cd8SNickeau     * in conflict.
316*37748cd8SNickeau     *
317*37748cd8SNickeau     * CONTINUE/STOP RULE
318*37748cd8SNickeau     *
319*37748cd8SNickeau     * Continue if union of resolved alternative sets from non-conflicting and
320*37748cd8SNickeau     * conflicting alternative subsets has more than one alternative. We are
321*37748cd8SNickeau     * uncertain about which alternative to predict.
322*37748cd8SNickeau     *
323*37748cd8SNickeau     * The complete set of alternatives, `[i for (_,i,_)]`, tells us which
324*37748cd8SNickeau     * alternatives are still in the running for the amount of input we've
325*37748cd8SNickeau     * consumed at this point. The conflicting sets let us to strip away
326*37748cd8SNickeau     * configurations that won't lead to more states because we resolve
327*37748cd8SNickeau     * conflicts to the configuration with a minimum alternate for the
328*37748cd8SNickeau     * conflicting set.
329*37748cd8SNickeau     *
330*37748cd8SNickeau     * CASES
331*37748cd8SNickeau     *
332*37748cd8SNickeau     * - no conflicts and more than 1 alternative in set => continue
333*37748cd8SNickeau     * - `(s, 1, x)}, `(s, 2, x)`, `(s, 3, z)`, `(s', 1, y)`, `(s', 2, y)`
334*37748cd8SNickeau     *   yields non-conflicting set `{3}} U conflicting sets `min({1,2})} U
335*37748cd8SNickeau     *   `min({1,2`)` = `{1,3}` => continue
336*37748cd8SNickeau     * - `(s, 1, x)}, `(s, 2, x)`, `(s', 1, y)`, `(s', 2, y)`, `(s'', 1, z)`
337*37748cd8SNickeau     *    yields non-conflicting set `{1}} U conflicting sets `min({1,2})} U
338*37748cd8SNickeau     *    `min({1,2`)` = `{1`` => stop and predict 1
339*37748cd8SNickeau     * - `(s, 1, x)}, `(s, 2, x)`, `(s', 1, y)`, `(s', 2, y)` yields conflicting,
340*37748cd8SNickeau     *    reduced sets `{1`` U `{1}} = `{1`` => stop and predict 1, can announce
341*37748cd8SNickeau     *    ambiguity `{1,2}`
342*37748cd8SNickeau     * - `(s, 1, x)}, `(s, 2, x)`, `(s', 2, y)`, `(s', 3, y)` yields conflicting,
343*37748cd8SNickeau     *    reduced sets `{1`` U `{2}} = `{1,2`` => continue
344*37748cd8SNickeau     * - `(s, 1, x)}, `(s, 2, x)`, `(s', 3, y)`, `(s', 4, y)` yields conflicting,
345*37748cd8SNickeau     *    reduced sets `{1`` U `{3}} = `{1,3`` => continue
346*37748cd8SNickeau     *
347*37748cd8SNickeau     * EXACT AMBIGUITY DETECTION
348*37748cd8SNickeau     *
349*37748cd8SNickeau     * If all states report the same conflicting set of alternatives, then we
350*37748cd8SNickeau     * know we have the exact ambiguity set.
351*37748cd8SNickeau     *
352*37748cd8SNickeau     * `|A_i|>1` and `A_i = A_j` for all i, j.
353*37748cd8SNickeau     *
354*37748cd8SNickeau     * In other words, we continue examining lookahead until all `A_i`
355*37748cd8SNickeau     * have more than one alternative and all `A_i` are the same. If
356*37748cd8SNickeau     * `A={{1,2}, {1,3}}`, then regular LL prediction would terminate
357*37748cd8SNickeau     * because the resolved set is `{1}`. To determine what the real
358*37748cd8SNickeau     * ambiguity is, we have to know whether the ambiguity is between one and
359*37748cd8SNickeau     * two or one and three so we keep going. We can only stop prediction when
360*37748cd8SNickeau     * we need exact ambiguity detection when the sets look like
361*37748cd8SNickeau     * `A={{1,2}}} or `{{1,2},{1,2}``, etc...
362*37748cd8SNickeau     *
363*37748cd8SNickeau     * @param array<BitSet> $altsets
364*37748cd8SNickeau     */
365*37748cd8SNickeau    public static function resolvesToJustOneViableAlt(array $altsets) : int
366*37748cd8SNickeau    {
367*37748cd8SNickeau        return self::getSingleViableAlt($altsets);
368*37748cd8SNickeau    }
369*37748cd8SNickeau
370*37748cd8SNickeau    /**
371*37748cd8SNickeau     * Determines if every alternative subset in `altsets` contains more
372*37748cd8SNickeau     * than one alternative.
373*37748cd8SNickeau     *
374*37748cd8SNickeau     * @param array<BitSet> $altsets a collection of alternative subsets
375*37748cd8SNickeau     *
376*37748cd8SNickeau     * @return bool If every >BitSet in `altsets` {@see BitSet::length()} > 1,
377*37748cd8SNickeau     *              otherwise `false`.
378*37748cd8SNickeau     */
379*37748cd8SNickeau    public static function allSubsetsConflict(array $altsets) : bool
380*37748cd8SNickeau    {
381*37748cd8SNickeau        return !self::hasNonConflictingAltSet($altsets);
382*37748cd8SNickeau    }
383*37748cd8SNickeau
384*37748cd8SNickeau    /**
385*37748cd8SNickeau     * Determines if any single alternative subset in `altsets` contains
386*37748cd8SNickeau     * exactly one alternative.
387*37748cd8SNickeau     *
388*37748cd8SNickeau     * @param array<BitSet> $altsets a collection of alternative subsets
389*37748cd8SNickeau     *
390*37748cd8SNickeau     * @return bool `true` if `altsets` contains a {@see BitSet} with
391*37748cd8SNickeau     *              {@see BitSet::length()} 1, otherwise `false`.
392*37748cd8SNickeau     */
393*37748cd8SNickeau    public static function hasNonConflictingAltSet(array $altsets) : bool
394*37748cd8SNickeau    {
395*37748cd8SNickeau        foreach ($altsets as $alts) {
396*37748cd8SNickeau            if ($alts->length() === 1) {
397*37748cd8SNickeau                return true;
398*37748cd8SNickeau            }
399*37748cd8SNickeau        }
400*37748cd8SNickeau
401*37748cd8SNickeau        return false;
402*37748cd8SNickeau    }
403*37748cd8SNickeau
404*37748cd8SNickeau    /**
405*37748cd8SNickeau     * Determines if any single alternative subset in `altsets` contains
406*37748cd8SNickeau     * more than one alternative.
407*37748cd8SNickeau     *
408*37748cd8SNickeau     * @param array<BitSet> $altsets a collection of alternative subsets
409*37748cd8SNickeau     *
410*37748cd8SNickeau     * @return bool `true` if `altsets` contains a {@see BitSet} with
411*37748cd8SNickeau     *              {@see BitSet::length()} > 1, otherwise `false`.
412*37748cd8SNickeau     */
413*37748cd8SNickeau    public static function hasConflictingAltSet(array $altsets) : bool
414*37748cd8SNickeau    {
415*37748cd8SNickeau        foreach ($altsets as $alts) {
416*37748cd8SNickeau            if ($alts->length() > 1) {
417*37748cd8SNickeau                return true;
418*37748cd8SNickeau            }
419*37748cd8SNickeau        }
420*37748cd8SNickeau
421*37748cd8SNickeau        return false;
422*37748cd8SNickeau    }
423*37748cd8SNickeau
424*37748cd8SNickeau    /**
425*37748cd8SNickeau     * Determines if every alternative subset in `altsets` is equivalent.
426*37748cd8SNickeau     *
427*37748cd8SNickeau     * @param array<BitSet> $altsets a collection of alternative subsets
428*37748cd8SNickeau     *
429*37748cd8SNickeau     * @return bool `true` if every member of `altsets` is equal to
430*37748cd8SNickeau     *              the others, otherwise `false`.
431*37748cd8SNickeau     */
432*37748cd8SNickeau    public static function allSubsetsEqual(array $altsets) : bool
433*37748cd8SNickeau    {
434*37748cd8SNickeau        $first = null;
435*37748cd8SNickeau
436*37748cd8SNickeau        foreach ($altsets as $alts) {
437*37748cd8SNickeau            if ($first === null) {
438*37748cd8SNickeau                $first = $alts;
439*37748cd8SNickeau            } elseif ($alts !== $first) {
440*37748cd8SNickeau                return false;
441*37748cd8SNickeau            }
442*37748cd8SNickeau        }
443*37748cd8SNickeau
444*37748cd8SNickeau        return true;
445*37748cd8SNickeau    }
446*37748cd8SNickeau
447*37748cd8SNickeau    /**
448*37748cd8SNickeau     * Returns the unique alternative predicted by all alternative subsets in
449*37748cd8SNickeau     * `altsets`. If no such alternative exists, this method returns
450*37748cd8SNickeau     * {@see ATN::INVALID_ALT_NUMBER}.
451*37748cd8SNickeau     *
452*37748cd8SNickeau     * @param array<BitSet> $altsets a collection of alternative subsets
453*37748cd8SNickeau     */
454*37748cd8SNickeau    public static function getUniqueAlt(array $altsets) : int
455*37748cd8SNickeau    {
456*37748cd8SNickeau        $all = self::getAlts($altsets);
457*37748cd8SNickeau
458*37748cd8SNickeau        if ($all->length() === 1) {
459*37748cd8SNickeau            return $all->minValue();
460*37748cd8SNickeau        }
461*37748cd8SNickeau
462*37748cd8SNickeau        return ATN::INVALID_ALT_NUMBER;
463*37748cd8SNickeau    }
464*37748cd8SNickeau
465*37748cd8SNickeau    /**
466*37748cd8SNickeau     * Gets the complete set of represented alternatives for a collection of
467*37748cd8SNickeau     * alternative subsets. This method returns the union of each {@see BitSet}
468*37748cd8SNickeau     * in `altsets`.
469*37748cd8SNickeau     *
470*37748cd8SNickeau     * @param array<BitSet> $altsets a collection of alternative subsets.
471*37748cd8SNickeau     *
472*37748cd8SNickeau     * @return BitSet the set of represented alternatives in `altsets`.
473*37748cd8SNickeau     */
474*37748cd8SNickeau    public static function getAlts(array $altsets) : BitSet
475*37748cd8SNickeau    {
476*37748cd8SNickeau        $all = new BitSet();
477*37748cd8SNickeau
478*37748cd8SNickeau        foreach ($altsets as $alts) {
479*37748cd8SNickeau            $all->or($alts);
480*37748cd8SNickeau        }
481*37748cd8SNickeau
482*37748cd8SNickeau        return $all;
483*37748cd8SNickeau    }
484*37748cd8SNickeau
485*37748cd8SNickeau    /**
486*37748cd8SNickeau     * This function gets the conflicting alt subsets from a configuration set.
487*37748cd8SNickeau     * For each configuration `c` in `configs`:
488*37748cd8SNickeau     *
489*37748cd8SNickeau     *     map[c] U= c.{@see ATNConfig::$alt} # map hash/equals uses s and x,
490*37748cd8SNickeau     *     not alt and not pred
491*37748cd8SNickeau     *
492*37748cd8SNickeau     * @return array<BitSet>
493*37748cd8SNickeau     */
494*37748cd8SNickeau    public static function getConflictingAltSubsets(ATNConfigSet $configs) : array
495*37748cd8SNickeau    {
496*37748cd8SNickeau        $configToAlts = new Map(new class implements Equivalence {
497*37748cd8SNickeau            public function equals(object $other) : bool
498*37748cd8SNickeau            {
499*37748cd8SNickeau                return $this instanceof self;
500*37748cd8SNickeau            }
501*37748cd8SNickeau
502*37748cd8SNickeau            public function equivalent(Hashable $left, Hashable $right) : bool
503*37748cd8SNickeau            {
504*37748cd8SNickeau                return $left instanceof ATNConfig
505*37748cd8SNickeau                    && $right instanceof ATNConfig
506*37748cd8SNickeau                    && $left->state->stateNumber === $right->state->stateNumber
507*37748cd8SNickeau                    && Equality::equals($left->context, $right->context);
508*37748cd8SNickeau            }
509*37748cd8SNickeau
510*37748cd8SNickeau            public function hash(Hashable $value) : int
511*37748cd8SNickeau            {
512*37748cd8SNickeau                if (!$value instanceof ATNConfig) {
513*37748cd8SNickeau                    throw new \InvalidArgumentException('Unsupported value.');
514*37748cd8SNickeau                }
515*37748cd8SNickeau
516*37748cd8SNickeau                return Hasher::hash($value->state->stateNumber, $value->context);
517*37748cd8SNickeau            }
518*37748cd8SNickeau        });
519*37748cd8SNickeau
520*37748cd8SNickeau        foreach ($configs->elements() as $cfg) {
521*37748cd8SNickeau            $alts = $configToAlts->get($cfg);
522*37748cd8SNickeau
523*37748cd8SNickeau            if ($alts === null) {
524*37748cd8SNickeau                $alts = new BitSet();
525*37748cd8SNickeau                $configToAlts->put($cfg, $alts);
526*37748cd8SNickeau            }
527*37748cd8SNickeau
528*37748cd8SNickeau            $alts->add($cfg->alt);
529*37748cd8SNickeau        }
530*37748cd8SNickeau
531*37748cd8SNickeau        return $configToAlts->getValues();
532*37748cd8SNickeau    }
533*37748cd8SNickeau
534*37748cd8SNickeau    /**
535*37748cd8SNickeau     * Get a map from state to alt subset from a configuration set. For each
536*37748cd8SNickeau     * configuration `c` in `configs`:
537*37748cd8SNickeau     *
538*37748cd8SNickeau     *     map[c.{@see ATNConfig::$state}] U= c.{@see ATNConfig::$alt}
539*37748cd8SNickeau     */
540*37748cd8SNickeau    public static function getStateToAltMap(ATNConfigSet $configs) : Map
541*37748cd8SNickeau    {
542*37748cd8SNickeau        $m = new Map();
543*37748cd8SNickeau
544*37748cd8SNickeau        foreach ($configs->elements() as $c) {
545*37748cd8SNickeau            $alts = $m->get($c->state);
546*37748cd8SNickeau
547*37748cd8SNickeau            if ($alts === null) {
548*37748cd8SNickeau                $alts = new BitSet();
549*37748cd8SNickeau                $m->put($c->state, $alts);
550*37748cd8SNickeau            }
551*37748cd8SNickeau
552*37748cd8SNickeau            $alts->add($c->alt);
553*37748cd8SNickeau        }
554*37748cd8SNickeau
555*37748cd8SNickeau        return $m;
556*37748cd8SNickeau    }
557*37748cd8SNickeau
558*37748cd8SNickeau    public static function hasStateAssociatedWithOneAlt(ATNConfigSet $configs) : bool
559*37748cd8SNickeau    {
560*37748cd8SNickeau        foreach (self::getStateToAltMap($configs)->getValues() as $value) {
561*37748cd8SNickeau            if ($value instanceof BitSet && $value->length() === 1) {
562*37748cd8SNickeau                return true;
563*37748cd8SNickeau            }
564*37748cd8SNickeau        }
565*37748cd8SNickeau
566*37748cd8SNickeau        return false;
567*37748cd8SNickeau    }
568*37748cd8SNickeau
569*37748cd8SNickeau    /**
570*37748cd8SNickeau     * @param array<BitSet> $altsets
571*37748cd8SNickeau     */
572*37748cd8SNickeau    public static function getSingleViableAlt(array $altsets) : int
573*37748cd8SNickeau    {
574*37748cd8SNickeau        $result = 0;
575*37748cd8SNickeau
576*37748cd8SNickeau        foreach ($altsets as $alts) {
577*37748cd8SNickeau            $minAlt = $alts->minValue();
578*37748cd8SNickeau
579*37748cd8SNickeau            if ($result === 0) {
580*37748cd8SNickeau                $result = (int) $minAlt;
581*37748cd8SNickeau            } elseif ($result !== $minAlt) {
582*37748cd8SNickeau                // more than 1 viable alt
583*37748cd8SNickeau                return ATN::INVALID_ALT_NUMBER;
584*37748cd8SNickeau            }
585*37748cd8SNickeau        }
586*37748cd8SNickeau
587*37748cd8SNickeau        return $result;
588*37748cd8SNickeau    }
589*37748cd8SNickeau}
590