1<?php
2
3declare(strict_types=1);
4
5namespace Antlr\Antlr4\Runtime\Atn;
6
7use Antlr\Antlr4\Runtime\Atn\SemanticContexts\SemanticContext;
8use Antlr\Antlr4\Runtime\Atn\States\RuleStopState;
9use Antlr\Antlr4\Runtime\Comparison\Equality;
10use Antlr\Antlr4\Runtime\Comparison\Equivalence;
11use Antlr\Antlr4\Runtime\Comparison\Hashable;
12use Antlr\Antlr4\Runtime\Comparison\Hasher;
13use Antlr\Antlr4\Runtime\Utils\BitSet;
14use Antlr\Antlr4\Runtime\Utils\Map;
15
16/**
17 * This enumeration defines the prediction modes available in ANTLR 4 along with
18 * utility methods for analyzing configuration sets for conflicts and/or
19 * ambiguities.
20 */
21final class PredictionMode
22{
23    /**
24     * The SLL(*) prediction mode. This prediction mode ignores the current
25     * parser context when making predictions. This is the fastest prediction
26     * mode, and provides correct results for many grammars. This prediction
27     * mode is more powerful than the prediction mode provided by ANTLR 3, but
28     * may result in syntax errors for grammar and input combinations which are
29     * not SLL.
30     *
31     * When using this prediction mode, the parser will either return a correct
32     * parse tree (i.e. the same parse tree that would be returned with the
33     * {@see PredictionMode::LL()} prediction mode), or it will report a syntax
34     * error. If a syntax error is encountered when using the
35     * {@see PredictionMode::SLL()} prediction mode, it may be due to either
36     * an actual syntax error in the input or indicate that the particular
37     * ombination of grammar and input requires the more powerful
38     * {@see PredictionMode::LL()} prediction abilities to complete successfully.
39     *
40     * This prediction mode does not provide any guarantees for prediction
41     * behavior for syntactically-incorrect inputs.
42     */
43    public const SLL = 0;
44
45    /**
46     * The LL(*) prediction mode. This prediction mode allows the current parser
47     * context to be used for resolving SLL conflicts that occur during
48     * prediction. This is the fastest prediction mode that guarantees correct
49     * parse results for all combinations of grammars with syntactically correct
50     * inputs.
51     *
52     * When using this prediction mode, the parser will make correct decisions
53     * for all syntactically-correct grammar and input combinations. However, in
54     * cases where the grammar is truly ambiguous this prediction mode might not
55     * report a precise answer for exactly which alternatives are ambiguous.
56     *
57     * This prediction mode does not provide any guarantees for prediction
58     * behavior for syntactically-incorrect inputs.
59     */
60    public const LL = 1;
61
62    /**
63     * The LL(*) prediction mode with exact ambiguity detection. In addition to
64     * the correctness guarantees provided by the {@see PredictionMode::LL}
65     * prediction mode, this prediction mode instructs the prediction algorithm
66     * to determine the complete and exact set of ambiguous alternatives for
67     * every ambiguous decision encountered while parsing.
68     *
69     * This prediction mode may be used for diagnosing ambiguities during
70     * grammar development. Due to the performance overhead of calculating sets
71     * of ambiguous alternatives, this prediction mode should be avoided when
72     * the exact results are not necessary.
73     *
74     * This prediction mode does not provide any guarantees for prediction
75     * behavior for syntactically-incorrect inputs.
76     */
77    public const LL_EXACT_AMBIG_DETECTION = 2;
78
79    /**
80     * Computes the SLL prediction termination condition.
81     *
82     * This method computes the SLL prediction termination condition for both of
83     * the following cases.
84     *
85     * - The usual SLL+LL fallback upon SLL conflict
86     * - Pure SLL without LL fallback
87     *
88     * COMBINED SLL+LL PARSING
89     *
90     * When LL-fallback is enabled upon SLL conflict, correct predictions are
91     * ensured regardless of how the termination condition is computed by this
92     * method. Due to the substantially higher cost of LL prediction, the
93     * prediction should only fall back to LL when the additional lookahead
94     * cannot lead to a unique SLL prediction.
95     *
96     * Assuming combined SLL+LL parsing, an SLL configuration set with only
97     * conflicting subsets should fall back to full LL, even if the
98     * configuration sets don't resolve to the same alternative (e.g.
99     * `{1,2}` and `{3,4}`. If there is at least one non-conflicting
100     * configuration, SLL could continue with the hopes that more lookahead will
101     * resolve via one of those non-conflicting configurations.
102     *
103     * Here's the prediction termination rule them: SLL (for SLL+LL parsing)
104     * stops when it sees only conflicting configuration subsets. In contrast,
105     * full LL keeps going when there is uncertainty.
106     *
107     * HEURISTIC
108     *
109     * As a heuristic, we stop prediction when we see any conflicting subset
110     * unless we see a state that only has one alternative associated with it.
111     * The single-alt-state thing lets prediction continue upon rules like
112     * (otherwise, it would admit defeat too soon):
113     *
114     * `[12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;`
115     *
116     * When the ATN simulation reaches the state before `';'`, it has a
117     * DFA state that looks like: `[12|1|[], 6|2|[], 12|2|[]]`. Naturally
118     * `12|1|[]` and `12|2|[]` conflict, but we cannot stop processing this
119     * node because alternative to has another way to continue, via `[6|2|[]]`.
120     *
121     * It also let's us continue for this rule: `[1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;`
122     *
123     * After matching input A, we reach the stop state for rule A, state 1.
124     * State 8 is the state right before B. Clearly alternatives 1 and 2
125     * conflict and no amount of further lookahead will separate the two.
126     * However, alternative 3 will be able to continue and so we do not stop
127     * working on this state. In the previous example, we're concerned with
128     * states associated with the conflicting alternatives. Here alt 3 is not
129     * associated with the conflicting configs, but since we can continue
130     * looking for input reasonably, don't declare the state done.
131     *
132     * PURE SLL PARSING
133     *
134     * To handle pure SLL parsing, all we have to do is make sure that we
135     * combine stack contexts for configurations that differ only by semantic
136     * predicate. From there, we can do the usual SLL termination heuristic.
137     *
138     * PREDICATES IN SLL+LL PARSING
139     *
140     * SLL decisions don't evaluate predicates until after they reach DFA stop
141     * states because they need to create the DFA cache that works in all
142     * semantic situations. In contrast, full LL evaluates predicates collected
143     * during start state computation so it can ignore predicates thereafter.
144     * This means that SLL termination detection can totally ignore semantic
145     * predicates.
146     *
147     * Implementation-wise, {@see ATNConfigSet} combines stack contexts but not
148     * semantic predicate contexts so we might see two configurations like the
149     * following.
150     *
151     * `s, 1, x, {}), (s, 1, x', {p})`
152     *
153     * Before testing these configurations against others, we have to merge
154     * `x` and `x'` (without modifying the existing configurations).
155     * For example, we test `(x+x') === x''` when looking for conflicts in
156     * the following configurations.
157     *
158     * `(s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})`
159     *
160     * If the configuration set has predicates (as indicated by
161     * {@see ATNConfigSet::hasSemanticContext()}), this algorithm makes a copy of
162     * the configurations to strip out all of the predicates so that a standard
163     * {@see ATNConfigSet} will merge everything ignoring predicates.
164     */
165    public static function hasSLLConflictTerminatingPrediction(int $mode, ATNConfigSet $configs) : bool
166    {
167        /* Configs in rule stop states indicate reaching the end of the decision
168         * rule (local context) or end of start rule (full context). If all
169         * configs meet this condition, then none of the configurations is able
170         * to match additional input so we terminate prediction.
171         */
172        if (self::allConfigsInRuleStopStates($configs)) {
173            return true;
174        }
175
176        // pure SLL mode parsing
177        if ($mode === self::SLL) {
178            // Don't bother with combining configs from different semantic
179            // contexts if we can fail over to full LL; costs more time
180            // since we'll often fail over anyway.
181            if ($configs->hasSemanticContext) {
182                // dup configs, tossing out semantic predicates
183                $dup = new ATNConfigSet();
184
185                foreach ($configs->elements() as $c) {
186                    $c = new ATNConfig($c, null, null, SemanticContext::none());
187                    $dup->add($c);
188                }
189
190                $configs = $dup;
191            }
192            // now we have combined contexts for configs with dissimilar preds
193        }
194
195        // pure SLL or combined SLL+LL mode parsing
196        $altsets = self::getConflictingAltSubsets($configs);
197
198        return self::hasConflictingAltSet($altsets) && !self::hasStateAssociatedWithOneAlt($configs);
199    }
200
201    /**
202     * Checks if any configuration in `configs` is in a {@see RuleStopState}.
203     * Configurations meeting this condition have reached the end of the decision
204     * rule (local context) or end of start rule (full context).
205     *
206     * @param ATNConfigSet $configs The configuration set to test.
207     *
208     * @return bool If any configuration in  is in a if any configuration in
209     *              `configs` is in a {@see RuleStopState}, otherwise `false`.
210     */
211    public static function hasConfigInRuleStopState(ATNConfigSet $configs) : bool
212    {
213        foreach ($configs->elements() as $c) {
214            if ($c->state instanceof RuleStopState) {
215                return true;
216            }
217        }
218
219        return false;
220    }
221
222    /**
223     * Checks if all configurations in `configs` are in a {@see RuleStopState}.
224     * Configurations meeting this condition have reached the end of the decision
225     * rule (local context) or end of start rule (full context).
226     *
227     * @param ATNConfigSet $configs the configuration set to test.
228     *
229     * @return bool If all configurations in  are in a if all configurations in
230     *              `configs` are in a {@see RuleStopState}, otherwise `false`.
231     */
232    public static function allConfigsInRuleStopStates(ATNConfigSet $configs) : bool
233    {
234        foreach ($configs->elements() as $c) {
235            if (!$c->state instanceof RuleStopState) {
236                return false;
237            }
238        }
239
240        return true;
241    }
242
243    /**
244     * Full LL prediction termination.
245     *
246     * Can we stop looking ahead during ATN simulation or is there some
247     * uncertainty as to which alternative we will ultimately pick, after
248     * consuming more input? Even if there are partial conflicts, we might know
249     * that everything is going to resolve to the same minimum alternative. That
250     * means we can stop since no more lookahead will change that fact. On the
251     * other hand, there might be multiple conflicts that resolve to different
252     * minimums. That means we need more look ahead to decide which of those
253     * alternatives we should predict.
254     *
255     * The basic idea is to split the set of configurations `C`, into
256     * conflicting subsets `(s, _, ctx, _)` and singleton subsets with
257     * non-conflicting configurations. Two configurations conflict if they have
258     * identical {@see ATNConfig::state()} and {@see ATNConfig::context()} values
259     * but different {@see ATNConfig::alt()} value, e.g. `(s, i, ctx, _)` and
260     * `(s, j, ctx, _)` for `i!=j`.
261     *
262     * Reduce these configuration subsets to the set of possible alternatives.
263     * You can compute the alternative subsets in one pass as follows:
264     *
265     * `A_s,ctx = {i | (s, i, ctx, _)}` for each configuration in `C` holding
266     * `s` and `ctx` fixed.
267     *
268     * Or in pseudo-code, for each configuration `c` in `C`:
269     *
270     *     map[c] U= c.{@see ATNConfig::alt alt} # map hash/equals uses s and x,
271     *     not alt and not pred
272     *
273     * The values in `map` are the set of `A_s,ctx` sets.
274     *
275     * If `|A_s,ctx|=1` then there is no conflict associated with `s` and `ctx`.
276     *
277     * Reduce the subsets to singletons by choosing a minimum of each subset. If
278     * the union of these alternative subsets is a singleton, then no amount of
279     * more lookahead will help us. We will always pick that alternative. If,
280     * however, there is more than one alternative, then we are uncertain which
281     * alternative to predict and must continue looking for resolution. We may
282     * or may not discover an ambiguity in the future, even if there are no
283     * conflicting subsets this round.
284     *
285     * The biggest sin is to terminate early because it means we've made a
286     * decision but were uncertain as to the eventual outcome. We haven't used
287     * enough lookahead. On the other hand, announcing a conflict too late is no
288     * big deal; you will still have the conflict. It's just inefficient. It
289     * might even look until the end of file.
290     *
291     * No special consideration for semantic predicates is required because
292     * predicates are evaluated on-the-fly for full LL prediction, ensuring that
293     * no configuration contains a semantic context during the termination
294     * check.
295     *
296     * CONFLICTING CONFIGS
297     *
298     * Two configurations `(s, i, x)` and `(s, j, x')`, conflict when `i!=j`
299     * but `x=x'`. Because we merge all `(s, i, _)` configurations together,
300     * that means that there are at most `n` configurations associated with
301     * state `s` for `n` possible alternatives in the decision. The merged stacks
302     * complicate the comparison of configuration contexts `x` and `x'`.
303     * Sam checks to see if one is a subset of the other by calling merge and
304     * checking to see if the merged result is either `x` orv`x'`. If the `x`
305     * associated with lowest alternative `i`vis the superset, then `i` is the
306     * only possible prediction since the others resolve to `min(i)` as well.
307     * However, if `x` is associated with `j>i` then at least one stack
308     * configuration for `j` is not in conflict with alternative `i`. The algorithm
309     * should keep going, looking for more lookahead due to the uncertainty.
310     *
311     * For simplicity, I'm doing a equality check between `x` and `x'` that lets
312     * the algorithm continue to consume lookahead longer than necessary. The
313     * reason I like the equality is of course the simplicity but also because
314     * that is the test you need to detect the alternatives that are actually
315     * in conflict.
316     *
317     * CONTINUE/STOP RULE
318     *
319     * Continue if union of resolved alternative sets from non-conflicting and
320     * conflicting alternative subsets has more than one alternative. We are
321     * uncertain about which alternative to predict.
322     *
323     * The complete set of alternatives, `[i for (_,i,_)]`, tells us which
324     * alternatives are still in the running for the amount of input we've
325     * consumed at this point. The conflicting sets let us to strip away
326     * configurations that won't lead to more states because we resolve
327     * conflicts to the configuration with a minimum alternate for the
328     * conflicting set.
329     *
330     * CASES
331     *
332     * - no conflicts and more than 1 alternative in set => continue
333     * - `(s, 1, x)}, `(s, 2, x)`, `(s, 3, z)`, `(s', 1, y)`, `(s', 2, y)`
334     *   yields non-conflicting set `{3}} U conflicting sets `min({1,2})} U
335     *   `min({1,2`)` = `{1,3}` => continue
336     * - `(s, 1, x)}, `(s, 2, x)`, `(s', 1, y)`, `(s', 2, y)`, `(s'', 1, z)`
337     *    yields non-conflicting set `{1}} U conflicting sets `min({1,2})} U
338     *    `min({1,2`)` = `{1`` => stop and predict 1
339     * - `(s, 1, x)}, `(s, 2, x)`, `(s', 1, y)`, `(s', 2, y)` yields conflicting,
340     *    reduced sets `{1`` U `{1}} = `{1`` => stop and predict 1, can announce
341     *    ambiguity `{1,2}`
342     * - `(s, 1, x)}, `(s, 2, x)`, `(s', 2, y)`, `(s', 3, y)` yields conflicting,
343     *    reduced sets `{1`` U `{2}} = `{1,2`` => continue
344     * - `(s, 1, x)}, `(s, 2, x)`, `(s', 3, y)`, `(s', 4, y)` yields conflicting,
345     *    reduced sets `{1`` U `{3}} = `{1,3`` => continue
346     *
347     * EXACT AMBIGUITY DETECTION
348     *
349     * If all states report the same conflicting set of alternatives, then we
350     * know we have the exact ambiguity set.
351     *
352     * `|A_i|>1` and `A_i = A_j` for all i, j.
353     *
354     * In other words, we continue examining lookahead until all `A_i`
355     * have more than one alternative and all `A_i` are the same. If
356     * `A={{1,2}, {1,3}}`, then regular LL prediction would terminate
357     * because the resolved set is `{1}`. To determine what the real
358     * ambiguity is, we have to know whether the ambiguity is between one and
359     * two or one and three so we keep going. We can only stop prediction when
360     * we need exact ambiguity detection when the sets look like
361     * `A={{1,2}}} or `{{1,2},{1,2}``, etc...
362     *
363     * @param array<BitSet> $altsets
364     */
365    public static function resolvesToJustOneViableAlt(array $altsets) : int
366    {
367        return self::getSingleViableAlt($altsets);
368    }
369
370    /**
371     * Determines if every alternative subset in `altsets` contains more
372     * than one alternative.
373     *
374     * @param array<BitSet> $altsets a collection of alternative subsets
375     *
376     * @return bool If every >BitSet in `altsets` {@see BitSet::length()} > 1,
377     *              otherwise `false`.
378     */
379    public static function allSubsetsConflict(array $altsets) : bool
380    {
381        return !self::hasNonConflictingAltSet($altsets);
382    }
383
384    /**
385     * Determines if any single alternative subset in `altsets` contains
386     * exactly one alternative.
387     *
388     * @param array<BitSet> $altsets a collection of alternative subsets
389     *
390     * @return bool `true` if `altsets` contains a {@see BitSet} with
391     *              {@see BitSet::length()} 1, otherwise `false`.
392     */
393    public static function hasNonConflictingAltSet(array $altsets) : bool
394    {
395        foreach ($altsets as $alts) {
396            if ($alts->length() === 1) {
397                return true;
398            }
399        }
400
401        return false;
402    }
403
404    /**
405     * Determines if any single alternative subset in `altsets` contains
406     * more than one alternative.
407     *
408     * @param array<BitSet> $altsets a collection of alternative subsets
409     *
410     * @return bool `true` if `altsets` contains a {@see BitSet} with
411     *              {@see BitSet::length()} > 1, otherwise `false`.
412     */
413    public static function hasConflictingAltSet(array $altsets) : bool
414    {
415        foreach ($altsets as $alts) {
416            if ($alts->length() > 1) {
417                return true;
418            }
419        }
420
421        return false;
422    }
423
424    /**
425     * Determines if every alternative subset in `altsets` is equivalent.
426     *
427     * @param array<BitSet> $altsets a collection of alternative subsets
428     *
429     * @return bool `true` if every member of `altsets` is equal to
430     *              the others, otherwise `false`.
431     */
432    public static function allSubsetsEqual(array $altsets) : bool
433    {
434        $first = null;
435
436        foreach ($altsets as $alts) {
437            if ($first === null) {
438                $first = $alts;
439            } elseif ($alts !== $first) {
440                return false;
441            }
442        }
443
444        return true;
445    }
446
447    /**
448     * Returns the unique alternative predicted by all alternative subsets in
449     * `altsets`. If no such alternative exists, this method returns
450     * {@see ATN::INVALID_ALT_NUMBER}.
451     *
452     * @param array<BitSet> $altsets a collection of alternative subsets
453     */
454    public static function getUniqueAlt(array $altsets) : int
455    {
456        $all = self::getAlts($altsets);
457
458        if ($all->length() === 1) {
459            return $all->minValue();
460        }
461
462        return ATN::INVALID_ALT_NUMBER;
463    }
464
465    /**
466     * Gets the complete set of represented alternatives for a collection of
467     * alternative subsets. This method returns the union of each {@see BitSet}
468     * in `altsets`.
469     *
470     * @param array<BitSet> $altsets a collection of alternative subsets.
471     *
472     * @return BitSet the set of represented alternatives in `altsets`.
473     */
474    public static function getAlts(array $altsets) : BitSet
475    {
476        $all = new BitSet();
477
478        foreach ($altsets as $alts) {
479            $all->or($alts);
480        }
481
482        return $all;
483    }
484
485    /**
486     * This function gets the conflicting alt subsets from a configuration set.
487     * For each configuration `c` in `configs`:
488     *
489     *     map[c] U= c.{@see ATNConfig::$alt} # map hash/equals uses s and x,
490     *     not alt and not pred
491     *
492     * @return array<BitSet>
493     */
494    public static function getConflictingAltSubsets(ATNConfigSet $configs) : array
495    {
496        $configToAlts = new Map(new class implements Equivalence {
497            public function equals(object $other) : bool
498            {
499                return $this instanceof self;
500            }
501
502            public function equivalent(Hashable $left, Hashable $right) : bool
503            {
504                return $left instanceof ATNConfig
505                    && $right instanceof ATNConfig
506                    && $left->state->stateNumber === $right->state->stateNumber
507                    && Equality::equals($left->context, $right->context);
508            }
509
510            public function hash(Hashable $value) : int
511            {
512                if (!$value instanceof ATNConfig) {
513                    throw new \InvalidArgumentException('Unsupported value.');
514                }
515
516                return Hasher::hash($value->state->stateNumber, $value->context);
517            }
518        });
519
520        foreach ($configs->elements() as $cfg) {
521            $alts = $configToAlts->get($cfg);
522
523            if ($alts === null) {
524                $alts = new BitSet();
525                $configToAlts->put($cfg, $alts);
526            }
527
528            $alts->add($cfg->alt);
529        }
530
531        return $configToAlts->getValues();
532    }
533
534    /**
535     * Get a map from state to alt subset from a configuration set. For each
536     * configuration `c` in `configs`:
537     *
538     *     map[c.{@see ATNConfig::$state}] U= c.{@see ATNConfig::$alt}
539     */
540    public static function getStateToAltMap(ATNConfigSet $configs) : Map
541    {
542        $m = new Map();
543
544        foreach ($configs->elements() as $c) {
545            $alts = $m->get($c->state);
546
547            if ($alts === null) {
548                $alts = new BitSet();
549                $m->put($c->state, $alts);
550            }
551
552            $alts->add($c->alt);
553        }
554
555        return $m;
556    }
557
558    public static function hasStateAssociatedWithOneAlt(ATNConfigSet $configs) : bool
559    {
560        foreach (self::getStateToAltMap($configs)->getValues() as $value) {
561            if ($value instanceof BitSet && $value->length() === 1) {
562                return true;
563            }
564        }
565
566        return false;
567    }
568
569    /**
570     * @param array<BitSet> $altsets
571     */
572    public static function getSingleViableAlt(array $altsets) : int
573    {
574        $result = 0;
575
576        foreach ($altsets as $alts) {
577            $minAlt = $alts->minValue();
578
579            if ($result === 0) {
580                $result = (int) $minAlt;
581            } elseif ($result !== $minAlt) {
582                // more than 1 viable alt
583                return ATN::INVALID_ALT_NUMBER;
584            }
585        }
586
587        return $result;
588    }
589}
590