1<?php 2 3declare(strict_types=1); 4 5namespace Antlr\Antlr4\Runtime\Atn; 6 7use Antlr\Antlr4\Runtime\Atn\SemanticContexts\SemanticContext; 8use Antlr\Antlr4\Runtime\Atn\States\RuleStopState; 9use Antlr\Antlr4\Runtime\Comparison\Equality; 10use Antlr\Antlr4\Runtime\Comparison\Equivalence; 11use Antlr\Antlr4\Runtime\Comparison\Hashable; 12use Antlr\Antlr4\Runtime\Comparison\Hasher; 13use Antlr\Antlr4\Runtime\Utils\BitSet; 14use Antlr\Antlr4\Runtime\Utils\Map; 15 16/** 17 * This enumeration defines the prediction modes available in ANTLR 4 along with 18 * utility methods for analyzing configuration sets for conflicts and/or 19 * ambiguities. 20 */ 21final class PredictionMode 22{ 23 /** 24 * The SLL(*) prediction mode. This prediction mode ignores the current 25 * parser context when making predictions. This is the fastest prediction 26 * mode, and provides correct results for many grammars. This prediction 27 * mode is more powerful than the prediction mode provided by ANTLR 3, but 28 * may result in syntax errors for grammar and input combinations which are 29 * not SLL. 30 * 31 * When using this prediction mode, the parser will either return a correct 32 * parse tree (i.e. the same parse tree that would be returned with the 33 * {@see PredictionMode::LL()} prediction mode), or it will report a syntax 34 * error. If a syntax error is encountered when using the 35 * {@see PredictionMode::SLL()} prediction mode, it may be due to either 36 * an actual syntax error in the input or indicate that the particular 37 * ombination of grammar and input requires the more powerful 38 * {@see PredictionMode::LL()} prediction abilities to complete successfully. 39 * 40 * This prediction mode does not provide any guarantees for prediction 41 * behavior for syntactically-incorrect inputs. 42 */ 43 public const SLL = 0; 44 45 /** 46 * The LL(*) prediction mode. This prediction mode allows the current parser 47 * context to be used for resolving SLL conflicts that occur during 48 * prediction. This is the fastest prediction mode that guarantees correct 49 * parse results for all combinations of grammars with syntactically correct 50 * inputs. 51 * 52 * When using this prediction mode, the parser will make correct decisions 53 * for all syntactically-correct grammar and input combinations. However, in 54 * cases where the grammar is truly ambiguous this prediction mode might not 55 * report a precise answer for exactly which alternatives are ambiguous. 56 * 57 * This prediction mode does not provide any guarantees for prediction 58 * behavior for syntactically-incorrect inputs. 59 */ 60 public const LL = 1; 61 62 /** 63 * The LL(*) prediction mode with exact ambiguity detection. In addition to 64 * the correctness guarantees provided by the {@see PredictionMode::LL} 65 * prediction mode, this prediction mode instructs the prediction algorithm 66 * to determine the complete and exact set of ambiguous alternatives for 67 * every ambiguous decision encountered while parsing. 68 * 69 * This prediction mode may be used for diagnosing ambiguities during 70 * grammar development. Due to the performance overhead of calculating sets 71 * of ambiguous alternatives, this prediction mode should be avoided when 72 * the exact results are not necessary. 73 * 74 * This prediction mode does not provide any guarantees for prediction 75 * behavior for syntactically-incorrect inputs. 76 */ 77 public const LL_EXACT_AMBIG_DETECTION = 2; 78 79 /** 80 * Computes the SLL prediction termination condition. 81 * 82 * This method computes the SLL prediction termination condition for both of 83 * the following cases. 84 * 85 * - The usual SLL+LL fallback upon SLL conflict 86 * - Pure SLL without LL fallback 87 * 88 * COMBINED SLL+LL PARSING 89 * 90 * When LL-fallback is enabled upon SLL conflict, correct predictions are 91 * ensured regardless of how the termination condition is computed by this 92 * method. Due to the substantially higher cost of LL prediction, the 93 * prediction should only fall back to LL when the additional lookahead 94 * cannot lead to a unique SLL prediction. 95 * 96 * Assuming combined SLL+LL parsing, an SLL configuration set with only 97 * conflicting subsets should fall back to full LL, even if the 98 * configuration sets don't resolve to the same alternative (e.g. 99 * `{1,2}` and `{3,4}`. If there is at least one non-conflicting 100 * configuration, SLL could continue with the hopes that more lookahead will 101 * resolve via one of those non-conflicting configurations. 102 * 103 * Here's the prediction termination rule them: SLL (for SLL+LL parsing) 104 * stops when it sees only conflicting configuration subsets. In contrast, 105 * full LL keeps going when there is uncertainty. 106 * 107 * HEURISTIC 108 * 109 * As a heuristic, we stop prediction when we see any conflicting subset 110 * unless we see a state that only has one alternative associated with it. 111 * The single-alt-state thing lets prediction continue upon rules like 112 * (otherwise, it would admit defeat too soon): 113 * 114 * `[12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;` 115 * 116 * When the ATN simulation reaches the state before `';'`, it has a 117 * DFA state that looks like: `[12|1|[], 6|2|[], 12|2|[]]`. Naturally 118 * `12|1|[]` and `12|2|[]` conflict, but we cannot stop processing this 119 * node because alternative to has another way to continue, via `[6|2|[]]`. 120 * 121 * It also let's us continue for this rule: `[1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;` 122 * 123 * After matching input A, we reach the stop state for rule A, state 1. 124 * State 8 is the state right before B. Clearly alternatives 1 and 2 125 * conflict and no amount of further lookahead will separate the two. 126 * However, alternative 3 will be able to continue and so we do not stop 127 * working on this state. In the previous example, we're concerned with 128 * states associated with the conflicting alternatives. Here alt 3 is not 129 * associated with the conflicting configs, but since we can continue 130 * looking for input reasonably, don't declare the state done. 131 * 132 * PURE SLL PARSING 133 * 134 * To handle pure SLL parsing, all we have to do is make sure that we 135 * combine stack contexts for configurations that differ only by semantic 136 * predicate. From there, we can do the usual SLL termination heuristic. 137 * 138 * PREDICATES IN SLL+LL PARSING 139 * 140 * SLL decisions don't evaluate predicates until after they reach DFA stop 141 * states because they need to create the DFA cache that works in all 142 * semantic situations. In contrast, full LL evaluates predicates collected 143 * during start state computation so it can ignore predicates thereafter. 144 * This means that SLL termination detection can totally ignore semantic 145 * predicates. 146 * 147 * Implementation-wise, {@see ATNConfigSet} combines stack contexts but not 148 * semantic predicate contexts so we might see two configurations like the 149 * following. 150 * 151 * `s, 1, x, {}), (s, 1, x', {p})` 152 * 153 * Before testing these configurations against others, we have to merge 154 * `x` and `x'` (without modifying the existing configurations). 155 * For example, we test `(x+x') === x''` when looking for conflicts in 156 * the following configurations. 157 * 158 * `(s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})` 159 * 160 * If the configuration set has predicates (as indicated by 161 * {@see ATNConfigSet::hasSemanticContext()}), this algorithm makes a copy of 162 * the configurations to strip out all of the predicates so that a standard 163 * {@see ATNConfigSet} will merge everything ignoring predicates. 164 */ 165 public static function hasSLLConflictTerminatingPrediction(int $mode, ATNConfigSet $configs) : bool 166 { 167 /* Configs in rule stop states indicate reaching the end of the decision 168 * rule (local context) or end of start rule (full context). If all 169 * configs meet this condition, then none of the configurations is able 170 * to match additional input so we terminate prediction. 171 */ 172 if (self::allConfigsInRuleStopStates($configs)) { 173 return true; 174 } 175 176 // pure SLL mode parsing 177 if ($mode === self::SLL) { 178 // Don't bother with combining configs from different semantic 179 // contexts if we can fail over to full LL; costs more time 180 // since we'll often fail over anyway. 181 if ($configs->hasSemanticContext) { 182 // dup configs, tossing out semantic predicates 183 $dup = new ATNConfigSet(); 184 185 foreach ($configs->elements() as $c) { 186 $c = new ATNConfig($c, null, null, SemanticContext::none()); 187 $dup->add($c); 188 } 189 190 $configs = $dup; 191 } 192 // now we have combined contexts for configs with dissimilar preds 193 } 194 195 // pure SLL or combined SLL+LL mode parsing 196 $altsets = self::getConflictingAltSubsets($configs); 197 198 return self::hasConflictingAltSet($altsets) && !self::hasStateAssociatedWithOneAlt($configs); 199 } 200 201 /** 202 * Checks if any configuration in `configs` is in a {@see RuleStopState}. 203 * Configurations meeting this condition have reached the end of the decision 204 * rule (local context) or end of start rule (full context). 205 * 206 * @param ATNConfigSet $configs The configuration set to test. 207 * 208 * @return bool If any configuration in is in a if any configuration in 209 * `configs` is in a {@see RuleStopState}, otherwise `false`. 210 */ 211 public static function hasConfigInRuleStopState(ATNConfigSet $configs) : bool 212 { 213 foreach ($configs->elements() as $c) { 214 if ($c->state instanceof RuleStopState) { 215 return true; 216 } 217 } 218 219 return false; 220 } 221 222 /** 223 * Checks if all configurations in `configs` are in a {@see RuleStopState}. 224 * Configurations meeting this condition have reached the end of the decision 225 * rule (local context) or end of start rule (full context). 226 * 227 * @param ATNConfigSet $configs the configuration set to test. 228 * 229 * @return bool If all configurations in are in a if all configurations in 230 * `configs` are in a {@see RuleStopState}, otherwise `false`. 231 */ 232 public static function allConfigsInRuleStopStates(ATNConfigSet $configs) : bool 233 { 234 foreach ($configs->elements() as $c) { 235 if (!$c->state instanceof RuleStopState) { 236 return false; 237 } 238 } 239 240 return true; 241 } 242 243 /** 244 * Full LL prediction termination. 245 * 246 * Can we stop looking ahead during ATN simulation or is there some 247 * uncertainty as to which alternative we will ultimately pick, after 248 * consuming more input? Even if there are partial conflicts, we might know 249 * that everything is going to resolve to the same minimum alternative. That 250 * means we can stop since no more lookahead will change that fact. On the 251 * other hand, there might be multiple conflicts that resolve to different 252 * minimums. That means we need more look ahead to decide which of those 253 * alternatives we should predict. 254 * 255 * The basic idea is to split the set of configurations `C`, into 256 * conflicting subsets `(s, _, ctx, _)` and singleton subsets with 257 * non-conflicting configurations. Two configurations conflict if they have 258 * identical {@see ATNConfig::state()} and {@see ATNConfig::context()} values 259 * but different {@see ATNConfig::alt()} value, e.g. `(s, i, ctx, _)` and 260 * `(s, j, ctx, _)` for `i!=j`. 261 * 262 * Reduce these configuration subsets to the set of possible alternatives. 263 * You can compute the alternative subsets in one pass as follows: 264 * 265 * `A_s,ctx = {i | (s, i, ctx, _)}` for each configuration in `C` holding 266 * `s` and `ctx` fixed. 267 * 268 * Or in pseudo-code, for each configuration `c` in `C`: 269 * 270 * map[c] U= c.{@see ATNConfig::alt alt} # map hash/equals uses s and x, 271 * not alt and not pred 272 * 273 * The values in `map` are the set of `A_s,ctx` sets. 274 * 275 * If `|A_s,ctx|=1` then there is no conflict associated with `s` and `ctx`. 276 * 277 * Reduce the subsets to singletons by choosing a minimum of each subset. If 278 * the union of these alternative subsets is a singleton, then no amount of 279 * more lookahead will help us. We will always pick that alternative. If, 280 * however, there is more than one alternative, then we are uncertain which 281 * alternative to predict and must continue looking for resolution. We may 282 * or may not discover an ambiguity in the future, even if there are no 283 * conflicting subsets this round. 284 * 285 * The biggest sin is to terminate early because it means we've made a 286 * decision but were uncertain as to the eventual outcome. We haven't used 287 * enough lookahead. On the other hand, announcing a conflict too late is no 288 * big deal; you will still have the conflict. It's just inefficient. It 289 * might even look until the end of file. 290 * 291 * No special consideration for semantic predicates is required because 292 * predicates are evaluated on-the-fly for full LL prediction, ensuring that 293 * no configuration contains a semantic context during the termination 294 * check. 295 * 296 * CONFLICTING CONFIGS 297 * 298 * Two configurations `(s, i, x)` and `(s, j, x')`, conflict when `i!=j` 299 * but `x=x'`. Because we merge all `(s, i, _)` configurations together, 300 * that means that there are at most `n` configurations associated with 301 * state `s` for `n` possible alternatives in the decision. The merged stacks 302 * complicate the comparison of configuration contexts `x` and `x'`. 303 * Sam checks to see if one is a subset of the other by calling merge and 304 * checking to see if the merged result is either `x` orv`x'`. If the `x` 305 * associated with lowest alternative `i`vis the superset, then `i` is the 306 * only possible prediction since the others resolve to `min(i)` as well. 307 * However, if `x` is associated with `j>i` then at least one stack 308 * configuration for `j` is not in conflict with alternative `i`. The algorithm 309 * should keep going, looking for more lookahead due to the uncertainty. 310 * 311 * For simplicity, I'm doing a equality check between `x` and `x'` that lets 312 * the algorithm continue to consume lookahead longer than necessary. The 313 * reason I like the equality is of course the simplicity but also because 314 * that is the test you need to detect the alternatives that are actually 315 * in conflict. 316 * 317 * CONTINUE/STOP RULE 318 * 319 * Continue if union of resolved alternative sets from non-conflicting and 320 * conflicting alternative subsets has more than one alternative. We are 321 * uncertain about which alternative to predict. 322 * 323 * The complete set of alternatives, `[i for (_,i,_)]`, tells us which 324 * alternatives are still in the running for the amount of input we've 325 * consumed at this point. The conflicting sets let us to strip away 326 * configurations that won't lead to more states because we resolve 327 * conflicts to the configuration with a minimum alternate for the 328 * conflicting set. 329 * 330 * CASES 331 * 332 * - no conflicts and more than 1 alternative in set => continue 333 * - `(s, 1, x)}, `(s, 2, x)`, `(s, 3, z)`, `(s', 1, y)`, `(s', 2, y)` 334 * yields non-conflicting set `{3}} U conflicting sets `min({1,2})} U 335 * `min({1,2`)` = `{1,3}` => continue 336 * - `(s, 1, x)}, `(s, 2, x)`, `(s', 1, y)`, `(s', 2, y)`, `(s'', 1, z)` 337 * yields non-conflicting set `{1}} U conflicting sets `min({1,2})} U 338 * `min({1,2`)` = `{1`` => stop and predict 1 339 * - `(s, 1, x)}, `(s, 2, x)`, `(s', 1, y)`, `(s', 2, y)` yields conflicting, 340 * reduced sets `{1`` U `{1}} = `{1`` => stop and predict 1, can announce 341 * ambiguity `{1,2}` 342 * - `(s, 1, x)}, `(s, 2, x)`, `(s', 2, y)`, `(s', 3, y)` yields conflicting, 343 * reduced sets `{1`` U `{2}} = `{1,2`` => continue 344 * - `(s, 1, x)}, `(s, 2, x)`, `(s', 3, y)`, `(s', 4, y)` yields conflicting, 345 * reduced sets `{1`` U `{3}} = `{1,3`` => continue 346 * 347 * EXACT AMBIGUITY DETECTION 348 * 349 * If all states report the same conflicting set of alternatives, then we 350 * know we have the exact ambiguity set. 351 * 352 * `|A_i|>1` and `A_i = A_j` for all i, j. 353 * 354 * In other words, we continue examining lookahead until all `A_i` 355 * have more than one alternative and all `A_i` are the same. If 356 * `A={{1,2}, {1,3}}`, then regular LL prediction would terminate 357 * because the resolved set is `{1}`. To determine what the real 358 * ambiguity is, we have to know whether the ambiguity is between one and 359 * two or one and three so we keep going. We can only stop prediction when 360 * we need exact ambiguity detection when the sets look like 361 * `A={{1,2}}} or `{{1,2},{1,2}``, etc... 362 * 363 * @param array<BitSet> $altsets 364 */ 365 public static function resolvesToJustOneViableAlt(array $altsets) : int 366 { 367 return self::getSingleViableAlt($altsets); 368 } 369 370 /** 371 * Determines if every alternative subset in `altsets` contains more 372 * than one alternative. 373 * 374 * @param array<BitSet> $altsets a collection of alternative subsets 375 * 376 * @return bool If every >BitSet in `altsets` {@see BitSet::length()} > 1, 377 * otherwise `false`. 378 */ 379 public static function allSubsetsConflict(array $altsets) : bool 380 { 381 return !self::hasNonConflictingAltSet($altsets); 382 } 383 384 /** 385 * Determines if any single alternative subset in `altsets` contains 386 * exactly one alternative. 387 * 388 * @param array<BitSet> $altsets a collection of alternative subsets 389 * 390 * @return bool `true` if `altsets` contains a {@see BitSet} with 391 * {@see BitSet::length()} 1, otherwise `false`. 392 */ 393 public static function hasNonConflictingAltSet(array $altsets) : bool 394 { 395 foreach ($altsets as $alts) { 396 if ($alts->length() === 1) { 397 return true; 398 } 399 } 400 401 return false; 402 } 403 404 /** 405 * Determines if any single alternative subset in `altsets` contains 406 * more than one alternative. 407 * 408 * @param array<BitSet> $altsets a collection of alternative subsets 409 * 410 * @return bool `true` if `altsets` contains a {@see BitSet} with 411 * {@see BitSet::length()} > 1, otherwise `false`. 412 */ 413 public static function hasConflictingAltSet(array $altsets) : bool 414 { 415 foreach ($altsets as $alts) { 416 if ($alts->length() > 1) { 417 return true; 418 } 419 } 420 421 return false; 422 } 423 424 /** 425 * Determines if every alternative subset in `altsets` is equivalent. 426 * 427 * @param array<BitSet> $altsets a collection of alternative subsets 428 * 429 * @return bool `true` if every member of `altsets` is equal to 430 * the others, otherwise `false`. 431 */ 432 public static function allSubsetsEqual(array $altsets) : bool 433 { 434 $first = null; 435 436 foreach ($altsets as $alts) { 437 if ($first === null) { 438 $first = $alts; 439 } elseif ($alts !== $first) { 440 return false; 441 } 442 } 443 444 return true; 445 } 446 447 /** 448 * Returns the unique alternative predicted by all alternative subsets in 449 * `altsets`. If no such alternative exists, this method returns 450 * {@see ATN::INVALID_ALT_NUMBER}. 451 * 452 * @param array<BitSet> $altsets a collection of alternative subsets 453 */ 454 public static function getUniqueAlt(array $altsets) : int 455 { 456 $all = self::getAlts($altsets); 457 458 if ($all->length() === 1) { 459 return $all->minValue(); 460 } 461 462 return ATN::INVALID_ALT_NUMBER; 463 } 464 465 /** 466 * Gets the complete set of represented alternatives for a collection of 467 * alternative subsets. This method returns the union of each {@see BitSet} 468 * in `altsets`. 469 * 470 * @param array<BitSet> $altsets a collection of alternative subsets. 471 * 472 * @return BitSet the set of represented alternatives in `altsets`. 473 */ 474 public static function getAlts(array $altsets) : BitSet 475 { 476 $all = new BitSet(); 477 478 foreach ($altsets as $alts) { 479 $all->or($alts); 480 } 481 482 return $all; 483 } 484 485 /** 486 * This function gets the conflicting alt subsets from a configuration set. 487 * For each configuration `c` in `configs`: 488 * 489 * map[c] U= c.{@see ATNConfig::$alt} # map hash/equals uses s and x, 490 * not alt and not pred 491 * 492 * @return array<BitSet> 493 */ 494 public static function getConflictingAltSubsets(ATNConfigSet $configs) : array 495 { 496 $configToAlts = new Map(new class implements Equivalence { 497 public function equals(object $other) : bool 498 { 499 return $this instanceof self; 500 } 501 502 public function equivalent(Hashable $left, Hashable $right) : bool 503 { 504 return $left instanceof ATNConfig 505 && $right instanceof ATNConfig 506 && $left->state->stateNumber === $right->state->stateNumber 507 && Equality::equals($left->context, $right->context); 508 } 509 510 public function hash(Hashable $value) : int 511 { 512 if (!$value instanceof ATNConfig) { 513 throw new \InvalidArgumentException('Unsupported value.'); 514 } 515 516 return Hasher::hash($value->state->stateNumber, $value->context); 517 } 518 }); 519 520 foreach ($configs->elements() as $cfg) { 521 $alts = $configToAlts->get($cfg); 522 523 if ($alts === null) { 524 $alts = new BitSet(); 525 $configToAlts->put($cfg, $alts); 526 } 527 528 $alts->add($cfg->alt); 529 } 530 531 return $configToAlts->getValues(); 532 } 533 534 /** 535 * Get a map from state to alt subset from a configuration set. For each 536 * configuration `c` in `configs`: 537 * 538 * map[c.{@see ATNConfig::$state}] U= c.{@see ATNConfig::$alt} 539 */ 540 public static function getStateToAltMap(ATNConfigSet $configs) : Map 541 { 542 $m = new Map(); 543 544 foreach ($configs->elements() as $c) { 545 $alts = $m->get($c->state); 546 547 if ($alts === null) { 548 $alts = new BitSet(); 549 $m->put($c->state, $alts); 550 } 551 552 $alts->add($c->alt); 553 } 554 555 return $m; 556 } 557 558 public static function hasStateAssociatedWithOneAlt(ATNConfigSet $configs) : bool 559 { 560 foreach (self::getStateToAltMap($configs)->getValues() as $value) { 561 if ($value instanceof BitSet && $value->length() === 1) { 562 return true; 563 } 564 } 565 566 return false; 567 } 568 569 /** 570 * @param array<BitSet> $altsets 571 */ 572 public static function getSingleViableAlt(array $altsets) : int 573 { 574 $result = 0; 575 576 foreach ($altsets as $alts) { 577 $minAlt = $alts->minValue(); 578 579 if ($result === 0) { 580 $result = (int) $minAlt; 581 } elseif ($result !== $minAlt) { 582 // more than 1 viable alt 583 return ATN::INVALID_ALT_NUMBER; 584 } 585 } 586 587 return $result; 588 } 589} 590