1*37748cd8SNickeau<?php 2*37748cd8SNickeau 3*37748cd8SNickeaudeclare(strict_types=1); 4*37748cd8SNickeau 5*37748cd8SNickeaunamespace Antlr\Antlr4\Runtime\Atn; 6*37748cd8SNickeau 7*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\SemanticContexts\SemanticContext; 8*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Atn\States\RuleStopState; 9*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Equality; 10*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Equivalence; 11*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Hashable; 12*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Comparison\Hasher; 13*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Utils\BitSet; 14*37748cd8SNickeauuse Antlr\Antlr4\Runtime\Utils\Map; 15*37748cd8SNickeau 16*37748cd8SNickeau/** 17*37748cd8SNickeau * This enumeration defines the prediction modes available in ANTLR 4 along with 18*37748cd8SNickeau * utility methods for analyzing configuration sets for conflicts and/or 19*37748cd8SNickeau * ambiguities. 20*37748cd8SNickeau */ 21*37748cd8SNickeaufinal class PredictionMode 22*37748cd8SNickeau{ 23*37748cd8SNickeau /** 24*37748cd8SNickeau * The SLL(*) prediction mode. This prediction mode ignores the current 25*37748cd8SNickeau * parser context when making predictions. This is the fastest prediction 26*37748cd8SNickeau * mode, and provides correct results for many grammars. This prediction 27*37748cd8SNickeau * mode is more powerful than the prediction mode provided by ANTLR 3, but 28*37748cd8SNickeau * may result in syntax errors for grammar and input combinations which are 29*37748cd8SNickeau * not SLL. 30*37748cd8SNickeau * 31*37748cd8SNickeau * When using this prediction mode, the parser will either return a correct 32*37748cd8SNickeau * parse tree (i.e. the same parse tree that would be returned with the 33*37748cd8SNickeau * {@see PredictionMode::LL()} prediction mode), or it will report a syntax 34*37748cd8SNickeau * error. If a syntax error is encountered when using the 35*37748cd8SNickeau * {@see PredictionMode::SLL()} prediction mode, it may be due to either 36*37748cd8SNickeau * an actual syntax error in the input or indicate that the particular 37*37748cd8SNickeau * ombination of grammar and input requires the more powerful 38*37748cd8SNickeau * {@see PredictionMode::LL()} prediction abilities to complete successfully. 39*37748cd8SNickeau * 40*37748cd8SNickeau * This prediction mode does not provide any guarantees for prediction 41*37748cd8SNickeau * behavior for syntactically-incorrect inputs. 42*37748cd8SNickeau */ 43*37748cd8SNickeau public const SLL = 0; 44*37748cd8SNickeau 45*37748cd8SNickeau /** 46*37748cd8SNickeau * The LL(*) prediction mode. This prediction mode allows the current parser 47*37748cd8SNickeau * context to be used for resolving SLL conflicts that occur during 48*37748cd8SNickeau * prediction. This is the fastest prediction mode that guarantees correct 49*37748cd8SNickeau * parse results for all combinations of grammars with syntactically correct 50*37748cd8SNickeau * inputs. 51*37748cd8SNickeau * 52*37748cd8SNickeau * When using this prediction mode, the parser will make correct decisions 53*37748cd8SNickeau * for all syntactically-correct grammar and input combinations. However, in 54*37748cd8SNickeau * cases where the grammar is truly ambiguous this prediction mode might not 55*37748cd8SNickeau * report a precise answer for exactly which alternatives are ambiguous. 56*37748cd8SNickeau * 57*37748cd8SNickeau * This prediction mode does not provide any guarantees for prediction 58*37748cd8SNickeau * behavior for syntactically-incorrect inputs. 59*37748cd8SNickeau */ 60*37748cd8SNickeau public const LL = 1; 61*37748cd8SNickeau 62*37748cd8SNickeau /** 63*37748cd8SNickeau * The LL(*) prediction mode with exact ambiguity detection. In addition to 64*37748cd8SNickeau * the correctness guarantees provided by the {@see PredictionMode::LL} 65*37748cd8SNickeau * prediction mode, this prediction mode instructs the prediction algorithm 66*37748cd8SNickeau * to determine the complete and exact set of ambiguous alternatives for 67*37748cd8SNickeau * every ambiguous decision encountered while parsing. 68*37748cd8SNickeau * 69*37748cd8SNickeau * This prediction mode may be used for diagnosing ambiguities during 70*37748cd8SNickeau * grammar development. Due to the performance overhead of calculating sets 71*37748cd8SNickeau * of ambiguous alternatives, this prediction mode should be avoided when 72*37748cd8SNickeau * the exact results are not necessary. 73*37748cd8SNickeau * 74*37748cd8SNickeau * This prediction mode does not provide any guarantees for prediction 75*37748cd8SNickeau * behavior for syntactically-incorrect inputs. 76*37748cd8SNickeau */ 77*37748cd8SNickeau public const LL_EXACT_AMBIG_DETECTION = 2; 78*37748cd8SNickeau 79*37748cd8SNickeau /** 80*37748cd8SNickeau * Computes the SLL prediction termination condition. 81*37748cd8SNickeau * 82*37748cd8SNickeau * This method computes the SLL prediction termination condition for both of 83*37748cd8SNickeau * the following cases. 84*37748cd8SNickeau * 85*37748cd8SNickeau * - The usual SLL+LL fallback upon SLL conflict 86*37748cd8SNickeau * - Pure SLL without LL fallback 87*37748cd8SNickeau * 88*37748cd8SNickeau * COMBINED SLL+LL PARSING 89*37748cd8SNickeau * 90*37748cd8SNickeau * When LL-fallback is enabled upon SLL conflict, correct predictions are 91*37748cd8SNickeau * ensured regardless of how the termination condition is computed by this 92*37748cd8SNickeau * method. Due to the substantially higher cost of LL prediction, the 93*37748cd8SNickeau * prediction should only fall back to LL when the additional lookahead 94*37748cd8SNickeau * cannot lead to a unique SLL prediction. 95*37748cd8SNickeau * 96*37748cd8SNickeau * Assuming combined SLL+LL parsing, an SLL configuration set with only 97*37748cd8SNickeau * conflicting subsets should fall back to full LL, even if the 98*37748cd8SNickeau * configuration sets don't resolve to the same alternative (e.g. 99*37748cd8SNickeau * `{1,2}` and `{3,4}`. If there is at least one non-conflicting 100*37748cd8SNickeau * configuration, SLL could continue with the hopes that more lookahead will 101*37748cd8SNickeau * resolve via one of those non-conflicting configurations. 102*37748cd8SNickeau * 103*37748cd8SNickeau * Here's the prediction termination rule them: SLL (for SLL+LL parsing) 104*37748cd8SNickeau * stops when it sees only conflicting configuration subsets. In contrast, 105*37748cd8SNickeau * full LL keeps going when there is uncertainty. 106*37748cd8SNickeau * 107*37748cd8SNickeau * HEURISTIC 108*37748cd8SNickeau * 109*37748cd8SNickeau * As a heuristic, we stop prediction when we see any conflicting subset 110*37748cd8SNickeau * unless we see a state that only has one alternative associated with it. 111*37748cd8SNickeau * The single-alt-state thing lets prediction continue upon rules like 112*37748cd8SNickeau * (otherwise, it would admit defeat too soon): 113*37748cd8SNickeau * 114*37748cd8SNickeau * `[12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;` 115*37748cd8SNickeau * 116*37748cd8SNickeau * When the ATN simulation reaches the state before `';'`, it has a 117*37748cd8SNickeau * DFA state that looks like: `[12|1|[], 6|2|[], 12|2|[]]`. Naturally 118*37748cd8SNickeau * `12|1|[]` and `12|2|[]` conflict, but we cannot stop processing this 119*37748cd8SNickeau * node because alternative to has another way to continue, via `[6|2|[]]`. 120*37748cd8SNickeau * 121*37748cd8SNickeau * It also let's us continue for this rule: `[1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;` 122*37748cd8SNickeau * 123*37748cd8SNickeau * After matching input A, we reach the stop state for rule A, state 1. 124*37748cd8SNickeau * State 8 is the state right before B. Clearly alternatives 1 and 2 125*37748cd8SNickeau * conflict and no amount of further lookahead will separate the two. 126*37748cd8SNickeau * However, alternative 3 will be able to continue and so we do not stop 127*37748cd8SNickeau * working on this state. In the previous example, we're concerned with 128*37748cd8SNickeau * states associated with the conflicting alternatives. Here alt 3 is not 129*37748cd8SNickeau * associated with the conflicting configs, but since we can continue 130*37748cd8SNickeau * looking for input reasonably, don't declare the state done. 131*37748cd8SNickeau * 132*37748cd8SNickeau * PURE SLL PARSING 133*37748cd8SNickeau * 134*37748cd8SNickeau * To handle pure SLL parsing, all we have to do is make sure that we 135*37748cd8SNickeau * combine stack contexts for configurations that differ only by semantic 136*37748cd8SNickeau * predicate. From there, we can do the usual SLL termination heuristic. 137*37748cd8SNickeau * 138*37748cd8SNickeau * PREDICATES IN SLL+LL PARSING 139*37748cd8SNickeau * 140*37748cd8SNickeau * SLL decisions don't evaluate predicates until after they reach DFA stop 141*37748cd8SNickeau * states because they need to create the DFA cache that works in all 142*37748cd8SNickeau * semantic situations. In contrast, full LL evaluates predicates collected 143*37748cd8SNickeau * during start state computation so it can ignore predicates thereafter. 144*37748cd8SNickeau * This means that SLL termination detection can totally ignore semantic 145*37748cd8SNickeau * predicates. 146*37748cd8SNickeau * 147*37748cd8SNickeau * Implementation-wise, {@see ATNConfigSet} combines stack contexts but not 148*37748cd8SNickeau * semantic predicate contexts so we might see two configurations like the 149*37748cd8SNickeau * following. 150*37748cd8SNickeau * 151*37748cd8SNickeau * `s, 1, x, {}), (s, 1, x', {p})` 152*37748cd8SNickeau * 153*37748cd8SNickeau * Before testing these configurations against others, we have to merge 154*37748cd8SNickeau * `x` and `x'` (without modifying the existing configurations). 155*37748cd8SNickeau * For example, we test `(x+x') === x''` when looking for conflicts in 156*37748cd8SNickeau * the following configurations. 157*37748cd8SNickeau * 158*37748cd8SNickeau * `(s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})` 159*37748cd8SNickeau * 160*37748cd8SNickeau * If the configuration set has predicates (as indicated by 161*37748cd8SNickeau * {@see ATNConfigSet::hasSemanticContext()}), this algorithm makes a copy of 162*37748cd8SNickeau * the configurations to strip out all of the predicates so that a standard 163*37748cd8SNickeau * {@see ATNConfigSet} will merge everything ignoring predicates. 164*37748cd8SNickeau */ 165*37748cd8SNickeau public static function hasSLLConflictTerminatingPrediction(int $mode, ATNConfigSet $configs) : bool 166*37748cd8SNickeau { 167*37748cd8SNickeau /* Configs in rule stop states indicate reaching the end of the decision 168*37748cd8SNickeau * rule (local context) or end of start rule (full context). If all 169*37748cd8SNickeau * configs meet this condition, then none of the configurations is able 170*37748cd8SNickeau * to match additional input so we terminate prediction. 171*37748cd8SNickeau */ 172*37748cd8SNickeau if (self::allConfigsInRuleStopStates($configs)) { 173*37748cd8SNickeau return true; 174*37748cd8SNickeau } 175*37748cd8SNickeau 176*37748cd8SNickeau // pure SLL mode parsing 177*37748cd8SNickeau if ($mode === self::SLL) { 178*37748cd8SNickeau // Don't bother with combining configs from different semantic 179*37748cd8SNickeau // contexts if we can fail over to full LL; costs more time 180*37748cd8SNickeau // since we'll often fail over anyway. 181*37748cd8SNickeau if ($configs->hasSemanticContext) { 182*37748cd8SNickeau // dup configs, tossing out semantic predicates 183*37748cd8SNickeau $dup = new ATNConfigSet(); 184*37748cd8SNickeau 185*37748cd8SNickeau foreach ($configs->elements() as $c) { 186*37748cd8SNickeau $c = new ATNConfig($c, null, null, SemanticContext::none()); 187*37748cd8SNickeau $dup->add($c); 188*37748cd8SNickeau } 189*37748cd8SNickeau 190*37748cd8SNickeau $configs = $dup; 191*37748cd8SNickeau } 192*37748cd8SNickeau // now we have combined contexts for configs with dissimilar preds 193*37748cd8SNickeau } 194*37748cd8SNickeau 195*37748cd8SNickeau // pure SLL or combined SLL+LL mode parsing 196*37748cd8SNickeau $altsets = self::getConflictingAltSubsets($configs); 197*37748cd8SNickeau 198*37748cd8SNickeau return self::hasConflictingAltSet($altsets) && !self::hasStateAssociatedWithOneAlt($configs); 199*37748cd8SNickeau } 200*37748cd8SNickeau 201*37748cd8SNickeau /** 202*37748cd8SNickeau * Checks if any configuration in `configs` is in a {@see RuleStopState}. 203*37748cd8SNickeau * Configurations meeting this condition have reached the end of the decision 204*37748cd8SNickeau * rule (local context) or end of start rule (full context). 205*37748cd8SNickeau * 206*37748cd8SNickeau * @param ATNConfigSet $configs The configuration set to test. 207*37748cd8SNickeau * 208*37748cd8SNickeau * @return bool If any configuration in is in a if any configuration in 209*37748cd8SNickeau * `configs` is in a {@see RuleStopState}, otherwise `false`. 210*37748cd8SNickeau */ 211*37748cd8SNickeau public static function hasConfigInRuleStopState(ATNConfigSet $configs) : bool 212*37748cd8SNickeau { 213*37748cd8SNickeau foreach ($configs->elements() as $c) { 214*37748cd8SNickeau if ($c->state instanceof RuleStopState) { 215*37748cd8SNickeau return true; 216*37748cd8SNickeau } 217*37748cd8SNickeau } 218*37748cd8SNickeau 219*37748cd8SNickeau return false; 220*37748cd8SNickeau } 221*37748cd8SNickeau 222*37748cd8SNickeau /** 223*37748cd8SNickeau * Checks if all configurations in `configs` are in a {@see RuleStopState}. 224*37748cd8SNickeau * Configurations meeting this condition have reached the end of the decision 225*37748cd8SNickeau * rule (local context) or end of start rule (full context). 226*37748cd8SNickeau * 227*37748cd8SNickeau * @param ATNConfigSet $configs the configuration set to test. 228*37748cd8SNickeau * 229*37748cd8SNickeau * @return bool If all configurations in are in a if all configurations in 230*37748cd8SNickeau * `configs` are in a {@see RuleStopState}, otherwise `false`. 231*37748cd8SNickeau */ 232*37748cd8SNickeau public static function allConfigsInRuleStopStates(ATNConfigSet $configs) : bool 233*37748cd8SNickeau { 234*37748cd8SNickeau foreach ($configs->elements() as $c) { 235*37748cd8SNickeau if (!$c->state instanceof RuleStopState) { 236*37748cd8SNickeau return false; 237*37748cd8SNickeau } 238*37748cd8SNickeau } 239*37748cd8SNickeau 240*37748cd8SNickeau return true; 241*37748cd8SNickeau } 242*37748cd8SNickeau 243*37748cd8SNickeau /** 244*37748cd8SNickeau * Full LL prediction termination. 245*37748cd8SNickeau * 246*37748cd8SNickeau * Can we stop looking ahead during ATN simulation or is there some 247*37748cd8SNickeau * uncertainty as to which alternative we will ultimately pick, after 248*37748cd8SNickeau * consuming more input? Even if there are partial conflicts, we might know 249*37748cd8SNickeau * that everything is going to resolve to the same minimum alternative. That 250*37748cd8SNickeau * means we can stop since no more lookahead will change that fact. On the 251*37748cd8SNickeau * other hand, there might be multiple conflicts that resolve to different 252*37748cd8SNickeau * minimums. That means we need more look ahead to decide which of those 253*37748cd8SNickeau * alternatives we should predict. 254*37748cd8SNickeau * 255*37748cd8SNickeau * The basic idea is to split the set of configurations `C`, into 256*37748cd8SNickeau * conflicting subsets `(s, _, ctx, _)` and singleton subsets with 257*37748cd8SNickeau * non-conflicting configurations. Two configurations conflict if they have 258*37748cd8SNickeau * identical {@see ATNConfig::state()} and {@see ATNConfig::context()} values 259*37748cd8SNickeau * but different {@see ATNConfig::alt()} value, e.g. `(s, i, ctx, _)` and 260*37748cd8SNickeau * `(s, j, ctx, _)` for `i!=j`. 261*37748cd8SNickeau * 262*37748cd8SNickeau * Reduce these configuration subsets to the set of possible alternatives. 263*37748cd8SNickeau * You can compute the alternative subsets in one pass as follows: 264*37748cd8SNickeau * 265*37748cd8SNickeau * `A_s,ctx = {i | (s, i, ctx, _)}` for each configuration in `C` holding 266*37748cd8SNickeau * `s` and `ctx` fixed. 267*37748cd8SNickeau * 268*37748cd8SNickeau * Or in pseudo-code, for each configuration `c` in `C`: 269*37748cd8SNickeau * 270*37748cd8SNickeau * map[c] U= c.{@see ATNConfig::alt alt} # map hash/equals uses s and x, 271*37748cd8SNickeau * not alt and not pred 272*37748cd8SNickeau * 273*37748cd8SNickeau * The values in `map` are the set of `A_s,ctx` sets. 274*37748cd8SNickeau * 275*37748cd8SNickeau * If `|A_s,ctx|=1` then there is no conflict associated with `s` and `ctx`. 276*37748cd8SNickeau * 277*37748cd8SNickeau * Reduce the subsets to singletons by choosing a minimum of each subset. If 278*37748cd8SNickeau * the union of these alternative subsets is a singleton, then no amount of 279*37748cd8SNickeau * more lookahead will help us. We will always pick that alternative. If, 280*37748cd8SNickeau * however, there is more than one alternative, then we are uncertain which 281*37748cd8SNickeau * alternative to predict and must continue looking for resolution. We may 282*37748cd8SNickeau * or may not discover an ambiguity in the future, even if there are no 283*37748cd8SNickeau * conflicting subsets this round. 284*37748cd8SNickeau * 285*37748cd8SNickeau * The biggest sin is to terminate early because it means we've made a 286*37748cd8SNickeau * decision but were uncertain as to the eventual outcome. We haven't used 287*37748cd8SNickeau * enough lookahead. On the other hand, announcing a conflict too late is no 288*37748cd8SNickeau * big deal; you will still have the conflict. It's just inefficient. It 289*37748cd8SNickeau * might even look until the end of file. 290*37748cd8SNickeau * 291*37748cd8SNickeau * No special consideration for semantic predicates is required because 292*37748cd8SNickeau * predicates are evaluated on-the-fly for full LL prediction, ensuring that 293*37748cd8SNickeau * no configuration contains a semantic context during the termination 294*37748cd8SNickeau * check. 295*37748cd8SNickeau * 296*37748cd8SNickeau * CONFLICTING CONFIGS 297*37748cd8SNickeau * 298*37748cd8SNickeau * Two configurations `(s, i, x)` and `(s, j, x')`, conflict when `i!=j` 299*37748cd8SNickeau * but `x=x'`. Because we merge all `(s, i, _)` configurations together, 300*37748cd8SNickeau * that means that there are at most `n` configurations associated with 301*37748cd8SNickeau * state `s` for `n` possible alternatives in the decision. The merged stacks 302*37748cd8SNickeau * complicate the comparison of configuration contexts `x` and `x'`. 303*37748cd8SNickeau * Sam checks to see if one is a subset of the other by calling merge and 304*37748cd8SNickeau * checking to see if the merged result is either `x` orv`x'`. If the `x` 305*37748cd8SNickeau * associated with lowest alternative `i`vis the superset, then `i` is the 306*37748cd8SNickeau * only possible prediction since the others resolve to `min(i)` as well. 307*37748cd8SNickeau * However, if `x` is associated with `j>i` then at least one stack 308*37748cd8SNickeau * configuration for `j` is not in conflict with alternative `i`. The algorithm 309*37748cd8SNickeau * should keep going, looking for more lookahead due to the uncertainty. 310*37748cd8SNickeau * 311*37748cd8SNickeau * For simplicity, I'm doing a equality check between `x` and `x'` that lets 312*37748cd8SNickeau * the algorithm continue to consume lookahead longer than necessary. The 313*37748cd8SNickeau * reason I like the equality is of course the simplicity but also because 314*37748cd8SNickeau * that is the test you need to detect the alternatives that are actually 315*37748cd8SNickeau * in conflict. 316*37748cd8SNickeau * 317*37748cd8SNickeau * CONTINUE/STOP RULE 318*37748cd8SNickeau * 319*37748cd8SNickeau * Continue if union of resolved alternative sets from non-conflicting and 320*37748cd8SNickeau * conflicting alternative subsets has more than one alternative. We are 321*37748cd8SNickeau * uncertain about which alternative to predict. 322*37748cd8SNickeau * 323*37748cd8SNickeau * The complete set of alternatives, `[i for (_,i,_)]`, tells us which 324*37748cd8SNickeau * alternatives are still in the running for the amount of input we've 325*37748cd8SNickeau * consumed at this point. The conflicting sets let us to strip away 326*37748cd8SNickeau * configurations that won't lead to more states because we resolve 327*37748cd8SNickeau * conflicts to the configuration with a minimum alternate for the 328*37748cd8SNickeau * conflicting set. 329*37748cd8SNickeau * 330*37748cd8SNickeau * CASES 331*37748cd8SNickeau * 332*37748cd8SNickeau * - no conflicts and more than 1 alternative in set => continue 333*37748cd8SNickeau * - `(s, 1, x)}, `(s, 2, x)`, `(s, 3, z)`, `(s', 1, y)`, `(s', 2, y)` 334*37748cd8SNickeau * yields non-conflicting set `{3}} U conflicting sets `min({1,2})} U 335*37748cd8SNickeau * `min({1,2`)` = `{1,3}` => continue 336*37748cd8SNickeau * - `(s, 1, x)}, `(s, 2, x)`, `(s', 1, y)`, `(s', 2, y)`, `(s'', 1, z)` 337*37748cd8SNickeau * yields non-conflicting set `{1}} U conflicting sets `min({1,2})} U 338*37748cd8SNickeau * `min({1,2`)` = `{1`` => stop and predict 1 339*37748cd8SNickeau * - `(s, 1, x)}, `(s, 2, x)`, `(s', 1, y)`, `(s', 2, y)` yields conflicting, 340*37748cd8SNickeau * reduced sets `{1`` U `{1}} = `{1`` => stop and predict 1, can announce 341*37748cd8SNickeau * ambiguity `{1,2}` 342*37748cd8SNickeau * - `(s, 1, x)}, `(s, 2, x)`, `(s', 2, y)`, `(s', 3, y)` yields conflicting, 343*37748cd8SNickeau * reduced sets `{1`` U `{2}} = `{1,2`` => continue 344*37748cd8SNickeau * - `(s, 1, x)}, `(s, 2, x)`, `(s', 3, y)`, `(s', 4, y)` yields conflicting, 345*37748cd8SNickeau * reduced sets `{1`` U `{3}} = `{1,3`` => continue 346*37748cd8SNickeau * 347*37748cd8SNickeau * EXACT AMBIGUITY DETECTION 348*37748cd8SNickeau * 349*37748cd8SNickeau * If all states report the same conflicting set of alternatives, then we 350*37748cd8SNickeau * know we have the exact ambiguity set. 351*37748cd8SNickeau * 352*37748cd8SNickeau * `|A_i|>1` and `A_i = A_j` for all i, j. 353*37748cd8SNickeau * 354*37748cd8SNickeau * In other words, we continue examining lookahead until all `A_i` 355*37748cd8SNickeau * have more than one alternative and all `A_i` are the same. If 356*37748cd8SNickeau * `A={{1,2}, {1,3}}`, then regular LL prediction would terminate 357*37748cd8SNickeau * because the resolved set is `{1}`. To determine what the real 358*37748cd8SNickeau * ambiguity is, we have to know whether the ambiguity is between one and 359*37748cd8SNickeau * two or one and three so we keep going. We can only stop prediction when 360*37748cd8SNickeau * we need exact ambiguity detection when the sets look like 361*37748cd8SNickeau * `A={{1,2}}} or `{{1,2},{1,2}``, etc... 362*37748cd8SNickeau * 363*37748cd8SNickeau * @param array<BitSet> $altsets 364*37748cd8SNickeau */ 365*37748cd8SNickeau public static function resolvesToJustOneViableAlt(array $altsets) : int 366*37748cd8SNickeau { 367*37748cd8SNickeau return self::getSingleViableAlt($altsets); 368*37748cd8SNickeau } 369*37748cd8SNickeau 370*37748cd8SNickeau /** 371*37748cd8SNickeau * Determines if every alternative subset in `altsets` contains more 372*37748cd8SNickeau * than one alternative. 373*37748cd8SNickeau * 374*37748cd8SNickeau * @param array<BitSet> $altsets a collection of alternative subsets 375*37748cd8SNickeau * 376*37748cd8SNickeau * @return bool If every >BitSet in `altsets` {@see BitSet::length()} > 1, 377*37748cd8SNickeau * otherwise `false`. 378*37748cd8SNickeau */ 379*37748cd8SNickeau public static function allSubsetsConflict(array $altsets) : bool 380*37748cd8SNickeau { 381*37748cd8SNickeau return !self::hasNonConflictingAltSet($altsets); 382*37748cd8SNickeau } 383*37748cd8SNickeau 384*37748cd8SNickeau /** 385*37748cd8SNickeau * Determines if any single alternative subset in `altsets` contains 386*37748cd8SNickeau * exactly one alternative. 387*37748cd8SNickeau * 388*37748cd8SNickeau * @param array<BitSet> $altsets a collection of alternative subsets 389*37748cd8SNickeau * 390*37748cd8SNickeau * @return bool `true` if `altsets` contains a {@see BitSet} with 391*37748cd8SNickeau * {@see BitSet::length()} 1, otherwise `false`. 392*37748cd8SNickeau */ 393*37748cd8SNickeau public static function hasNonConflictingAltSet(array $altsets) : bool 394*37748cd8SNickeau { 395*37748cd8SNickeau foreach ($altsets as $alts) { 396*37748cd8SNickeau if ($alts->length() === 1) { 397*37748cd8SNickeau return true; 398*37748cd8SNickeau } 399*37748cd8SNickeau } 400*37748cd8SNickeau 401*37748cd8SNickeau return false; 402*37748cd8SNickeau } 403*37748cd8SNickeau 404*37748cd8SNickeau /** 405*37748cd8SNickeau * Determines if any single alternative subset in `altsets` contains 406*37748cd8SNickeau * more than one alternative. 407*37748cd8SNickeau * 408*37748cd8SNickeau * @param array<BitSet> $altsets a collection of alternative subsets 409*37748cd8SNickeau * 410*37748cd8SNickeau * @return bool `true` if `altsets` contains a {@see BitSet} with 411*37748cd8SNickeau * {@see BitSet::length()} > 1, otherwise `false`. 412*37748cd8SNickeau */ 413*37748cd8SNickeau public static function hasConflictingAltSet(array $altsets) : bool 414*37748cd8SNickeau { 415*37748cd8SNickeau foreach ($altsets as $alts) { 416*37748cd8SNickeau if ($alts->length() > 1) { 417*37748cd8SNickeau return true; 418*37748cd8SNickeau } 419*37748cd8SNickeau } 420*37748cd8SNickeau 421*37748cd8SNickeau return false; 422*37748cd8SNickeau } 423*37748cd8SNickeau 424*37748cd8SNickeau /** 425*37748cd8SNickeau * Determines if every alternative subset in `altsets` is equivalent. 426*37748cd8SNickeau * 427*37748cd8SNickeau * @param array<BitSet> $altsets a collection of alternative subsets 428*37748cd8SNickeau * 429*37748cd8SNickeau * @return bool `true` if every member of `altsets` is equal to 430*37748cd8SNickeau * the others, otherwise `false`. 431*37748cd8SNickeau */ 432*37748cd8SNickeau public static function allSubsetsEqual(array $altsets) : bool 433*37748cd8SNickeau { 434*37748cd8SNickeau $first = null; 435*37748cd8SNickeau 436*37748cd8SNickeau foreach ($altsets as $alts) { 437*37748cd8SNickeau if ($first === null) { 438*37748cd8SNickeau $first = $alts; 439*37748cd8SNickeau } elseif ($alts !== $first) { 440*37748cd8SNickeau return false; 441*37748cd8SNickeau } 442*37748cd8SNickeau } 443*37748cd8SNickeau 444*37748cd8SNickeau return true; 445*37748cd8SNickeau } 446*37748cd8SNickeau 447*37748cd8SNickeau /** 448*37748cd8SNickeau * Returns the unique alternative predicted by all alternative subsets in 449*37748cd8SNickeau * `altsets`. If no such alternative exists, this method returns 450*37748cd8SNickeau * {@see ATN::INVALID_ALT_NUMBER}. 451*37748cd8SNickeau * 452*37748cd8SNickeau * @param array<BitSet> $altsets a collection of alternative subsets 453*37748cd8SNickeau */ 454*37748cd8SNickeau public static function getUniqueAlt(array $altsets) : int 455*37748cd8SNickeau { 456*37748cd8SNickeau $all = self::getAlts($altsets); 457*37748cd8SNickeau 458*37748cd8SNickeau if ($all->length() === 1) { 459*37748cd8SNickeau return $all->minValue(); 460*37748cd8SNickeau } 461*37748cd8SNickeau 462*37748cd8SNickeau return ATN::INVALID_ALT_NUMBER; 463*37748cd8SNickeau } 464*37748cd8SNickeau 465*37748cd8SNickeau /** 466*37748cd8SNickeau * Gets the complete set of represented alternatives for a collection of 467*37748cd8SNickeau * alternative subsets. This method returns the union of each {@see BitSet} 468*37748cd8SNickeau * in `altsets`. 469*37748cd8SNickeau * 470*37748cd8SNickeau * @param array<BitSet> $altsets a collection of alternative subsets. 471*37748cd8SNickeau * 472*37748cd8SNickeau * @return BitSet the set of represented alternatives in `altsets`. 473*37748cd8SNickeau */ 474*37748cd8SNickeau public static function getAlts(array $altsets) : BitSet 475*37748cd8SNickeau { 476*37748cd8SNickeau $all = new BitSet(); 477*37748cd8SNickeau 478*37748cd8SNickeau foreach ($altsets as $alts) { 479*37748cd8SNickeau $all->or($alts); 480*37748cd8SNickeau } 481*37748cd8SNickeau 482*37748cd8SNickeau return $all; 483*37748cd8SNickeau } 484*37748cd8SNickeau 485*37748cd8SNickeau /** 486*37748cd8SNickeau * This function gets the conflicting alt subsets from a configuration set. 487*37748cd8SNickeau * For each configuration `c` in `configs`: 488*37748cd8SNickeau * 489*37748cd8SNickeau * map[c] U= c.{@see ATNConfig::$alt} # map hash/equals uses s and x, 490*37748cd8SNickeau * not alt and not pred 491*37748cd8SNickeau * 492*37748cd8SNickeau * @return array<BitSet> 493*37748cd8SNickeau */ 494*37748cd8SNickeau public static function getConflictingAltSubsets(ATNConfigSet $configs) : array 495*37748cd8SNickeau { 496*37748cd8SNickeau $configToAlts = new Map(new class implements Equivalence { 497*37748cd8SNickeau public function equals(object $other) : bool 498*37748cd8SNickeau { 499*37748cd8SNickeau return $this instanceof self; 500*37748cd8SNickeau } 501*37748cd8SNickeau 502*37748cd8SNickeau public function equivalent(Hashable $left, Hashable $right) : bool 503*37748cd8SNickeau { 504*37748cd8SNickeau return $left instanceof ATNConfig 505*37748cd8SNickeau && $right instanceof ATNConfig 506*37748cd8SNickeau && $left->state->stateNumber === $right->state->stateNumber 507*37748cd8SNickeau && Equality::equals($left->context, $right->context); 508*37748cd8SNickeau } 509*37748cd8SNickeau 510*37748cd8SNickeau public function hash(Hashable $value) : int 511*37748cd8SNickeau { 512*37748cd8SNickeau if (!$value instanceof ATNConfig) { 513*37748cd8SNickeau throw new \InvalidArgumentException('Unsupported value.'); 514*37748cd8SNickeau } 515*37748cd8SNickeau 516*37748cd8SNickeau return Hasher::hash($value->state->stateNumber, $value->context); 517*37748cd8SNickeau } 518*37748cd8SNickeau }); 519*37748cd8SNickeau 520*37748cd8SNickeau foreach ($configs->elements() as $cfg) { 521*37748cd8SNickeau $alts = $configToAlts->get($cfg); 522*37748cd8SNickeau 523*37748cd8SNickeau if ($alts === null) { 524*37748cd8SNickeau $alts = new BitSet(); 525*37748cd8SNickeau $configToAlts->put($cfg, $alts); 526*37748cd8SNickeau } 527*37748cd8SNickeau 528*37748cd8SNickeau $alts->add($cfg->alt); 529*37748cd8SNickeau } 530*37748cd8SNickeau 531*37748cd8SNickeau return $configToAlts->getValues(); 532*37748cd8SNickeau } 533*37748cd8SNickeau 534*37748cd8SNickeau /** 535*37748cd8SNickeau * Get a map from state to alt subset from a configuration set. For each 536*37748cd8SNickeau * configuration `c` in `configs`: 537*37748cd8SNickeau * 538*37748cd8SNickeau * map[c.{@see ATNConfig::$state}] U= c.{@see ATNConfig::$alt} 539*37748cd8SNickeau */ 540*37748cd8SNickeau public static function getStateToAltMap(ATNConfigSet $configs) : Map 541*37748cd8SNickeau { 542*37748cd8SNickeau $m = new Map(); 543*37748cd8SNickeau 544*37748cd8SNickeau foreach ($configs->elements() as $c) { 545*37748cd8SNickeau $alts = $m->get($c->state); 546*37748cd8SNickeau 547*37748cd8SNickeau if ($alts === null) { 548*37748cd8SNickeau $alts = new BitSet(); 549*37748cd8SNickeau $m->put($c->state, $alts); 550*37748cd8SNickeau } 551*37748cd8SNickeau 552*37748cd8SNickeau $alts->add($c->alt); 553*37748cd8SNickeau } 554*37748cd8SNickeau 555*37748cd8SNickeau return $m; 556*37748cd8SNickeau } 557*37748cd8SNickeau 558*37748cd8SNickeau public static function hasStateAssociatedWithOneAlt(ATNConfigSet $configs) : bool 559*37748cd8SNickeau { 560*37748cd8SNickeau foreach (self::getStateToAltMap($configs)->getValues() as $value) { 561*37748cd8SNickeau if ($value instanceof BitSet && $value->length() === 1) { 562*37748cd8SNickeau return true; 563*37748cd8SNickeau } 564*37748cd8SNickeau } 565*37748cd8SNickeau 566*37748cd8SNickeau return false; 567*37748cd8SNickeau } 568*37748cd8SNickeau 569*37748cd8SNickeau /** 570*37748cd8SNickeau * @param array<BitSet> $altsets 571*37748cd8SNickeau */ 572*37748cd8SNickeau public static function getSingleViableAlt(array $altsets) : int 573*37748cd8SNickeau { 574*37748cd8SNickeau $result = 0; 575*37748cd8SNickeau 576*37748cd8SNickeau foreach ($altsets as $alts) { 577*37748cd8SNickeau $minAlt = $alts->minValue(); 578*37748cd8SNickeau 579*37748cd8SNickeau if ($result === 0) { 580*37748cd8SNickeau $result = (int) $minAlt; 581*37748cd8SNickeau } elseif ($result !== $minAlt) { 582*37748cd8SNickeau // more than 1 viable alt 583*37748cd8SNickeau return ATN::INVALID_ALT_NUMBER; 584*37748cd8SNickeau } 585*37748cd8SNickeau } 586*37748cd8SNickeau 587*37748cd8SNickeau return $result; 588*37748cd8SNickeau } 589*37748cd8SNickeau} 590