1<?php 2 3declare(strict_types=1); 4 5namespace Antlr\Antlr4\Runtime\Atn; 6 7use Antlr\Antlr4\Runtime\Atn\SemanticContexts\SemanticContext; 8use Antlr\Antlr4\Runtime\Atn\States\ATNState; 9use Antlr\Antlr4\Runtime\Atn\States\BlockEndState; 10use Antlr\Antlr4\Runtime\Atn\States\BlockStartState; 11use Antlr\Antlr4\Runtime\Atn\States\DecisionState; 12use Antlr\Antlr4\Runtime\Atn\States\RuleStopState; 13use Antlr\Antlr4\Runtime\Atn\States\StarLoopEntryState; 14use Antlr\Antlr4\Runtime\Atn\Transitions\ActionTransition; 15use Antlr\Antlr4\Runtime\Atn\Transitions\EpsilonTransition; 16use Antlr\Antlr4\Runtime\Atn\Transitions\PrecedencePredicateTransition; 17use Antlr\Antlr4\Runtime\Atn\Transitions\PredicateTransition; 18use Antlr\Antlr4\Runtime\Atn\Transitions\RuleTransition; 19use Antlr\Antlr4\Runtime\Atn\Transitions\Transition; 20use Antlr\Antlr4\Runtime\Dfa\DFA; 21use Antlr\Antlr4\Runtime\Dfa\DFAState; 22use Antlr\Antlr4\Runtime\Dfa\PredPrediction; 23use Antlr\Antlr4\Runtime\Error\Exceptions\NoViableAltException; 24use Antlr\Antlr4\Runtime\Interval; 25use Antlr\Antlr4\Runtime\IntervalSet; 26use Antlr\Antlr4\Runtime\IntStream; 27use Antlr\Antlr4\Runtime\Parser; 28use Antlr\Antlr4\Runtime\ParserRuleContext; 29use Antlr\Antlr4\Runtime\PredictionContexts\PredictionContext; 30use Antlr\Antlr4\Runtime\PredictionContexts\PredictionContextCache; 31use Antlr\Antlr4\Runtime\PredictionContexts\SingletonPredictionContext; 32use Antlr\Antlr4\Runtime\RuleContext; 33use Antlr\Antlr4\Runtime\Token; 34use Antlr\Antlr4\Runtime\TokenStream; 35use Antlr\Antlr4\Runtime\Utils\BitSet; 36use Antlr\Antlr4\Runtime\Utils\DoubleKeyMap; 37use Antlr\Antlr4\Runtime\Utils\Set; 38use Antlr\Antlr4\Runtime\VocabularyImpl; 39 40/** 41 * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. 42 * 43 * The basic complexity of the adaptive strategy makes it harder to understand. 44 * We begin with ATN simulation to build paths in a DFA. Subsequent prediction 45 * requests go through the DFA first. If they reach a state without an edge for 46 * the current symbol, the algorithm fails over to the ATN simulation to 47 * complete the DFA path for the current input (until it finds a conflict state 48 * or uniquely predicting state). 49 * 50 * All of that is done without using the outer context because we want to create 51 * a DFA that is not dependent upon the rule invocation stack when we do a 52 * prediction. One DFA works in all contexts. We avoid using context not 53 * necessarily because it's slower, although it can be, but because of the DFA 54 * caching problem. The closure routine only considers the rule invocation stack 55 * created during prediction beginning in the decision rule. For example, if 56 * prediction occurs without invoking another rule's ATN, there are no context 57 * stacks in the configurations. When lack of context leads to a conflict, we 58 * don't know if it's an ambiguity or a weakness in the strong LL(*) parsing 59 * strategy (versus full LL(*)). 60 * 61 * When SLL yields a configuration set with conflict, we rewind the input and 62 * retry the ATN simulation, this time using full outer context without adding 63 * to the DFA. Configuration context stacks will be the full invocation stacks 64 * from the start rule. If we get a conflict using full context, then we can 65 * definitively say we have a true ambiguity for that input sequence. If we 66 * don't get a conflict, it implies that the decision is sensitive to the outer 67 * context. (It is not context-sensitive in the sense of context-sensitive 68 * grammars.) 69 * 70 * The next time we reach this DFA state with an SLL conflict, through DFA 71 * simulation, we will again retry the ATN simulation using full context mode. 72 * This is slow because we can't save the results and have to "interpret" the 73 * ATN each time we get that input. 74 * 75 * CACHING FULL CONTEXT PREDICTIONS 76 * 77 * We could cache results from full context to predicted alternative easily and 78 * that saves a lot of time but doesn't work in presence of predicates. The set 79 * of visible predicates from the ATN start state changes depending on the 80 * context, because closure can fall off the end of a rule. I tried to cache 81 * tuples (stack context, semantic context, predicted alt) but it was slower 82 * than interpreting and much more complicated. Also required a huge amount of 83 * memory. The goal is not to create the world's fastest parser anyway. I'd like 84 * to keep this algorithm simple. By launching multiple threads, we can improve 85 * the speed of parsing across a large number of files. 86 * 87 * There is no strict ordering between the amount of input used by SLL vs LL, 88 * which makes it really hard to build a cache for full context. Let's say that 89 * we have input A B C that leads to an SLL conflict with full context X. That 90 * implies that using X we might only use A B but we could also use A B C D to 91 * resolve conflict. Input A B C D could predict alternative 1 in one position 92 * in the input and A B C E could predict alternative 2 in another position in 93 * input. The conflicting SLL configurations could still be non-unique in the 94 * full context prediction, which would lead us to requiring more input than the 95 * original A B C. To make a prediction cache work, we have to track the exact 96 * input used during the previous prediction. That amounts to a cache that maps 97 * X to a specific DFA for that context. 98 * 99 * Something should be done for left-recursive expression predictions. They are 100 * likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry 101 * with full LL thing Sam does. 102 * 103 * AVOIDING FULL CONTEXT PREDICTION 104 * 105 * We avoid doing full context retry when the outer context is empty, we did not 106 * dip into the outer context by falling off the end of the decision state rule, 107 * or when we force SLL mode. 108 * 109 * As an example of the not dip into outer context case, consider as super 110 * constructor calls versus function calls. One grammar might look like 111 * this: 112 * 113 * ctorBody 114 * : '{' superCall? stat* '}' 115 * ; 116 * 117 * 118 * Or, you might see something like 119 * 120 * stat 121 * : superCall ';' 122 * | expression ';' 123 * | ... 124 * ; 125 * 126 * 127 * In both cases I believe that no closure operations will dip into the outer 128 * context. In the first case ctorBody in the worst case will stop at the '}'. 129 * In the 2nd case it should stop at the ';'. Both cases should stay within the 130 * entry rule and not dip into the outer context. 131 * 132 * PREDICATES 133 * 134 * Predicates are always evaluated if present in either SLL or LL both. SLL and 135 * LL simulation deals with predicates differently. SLL collects predicates as 136 * it performs closure operations like ANTLR v3 did. It delays predicate 137 * evaluation until it reaches and accept state. This allows us to cache the SLL 138 * ATN simulation whereas, if we had evaluated predicates on-the-fly during 139 * closure, the DFA state configuration sets would be different and we couldn't 140 * build up a suitable DFA. 141 * 142 * When building a DFA accept state during ATN simulation, we evaluate any 143 * predicates and return the sole semantically valid alternative. If there is 144 * more than 1 alternative, we report an ambiguity. If there are 0 alternatives, 145 * we throw an exception. Alternatives without predicates act like they have 146 * true predicates. The simple way to think about it is to strip away all 147 * alternatives with false predicates and choose the minimum alternative that 148 * remains. 149 * 150 * When we start in the DFA and reach an accept state that's predicated, we test 151 * those and return the minimum semantically viable alternative. If no 152 * alternatives are viable, we throw an exception. 153 * 154 * During full LL ATN simulation, closure always evaluates predicates and 155 * on-the-fly. This is crucial to reducing the configuration set size during 156 * closure. It hits a landmine when parsing with the Java grammar, for example, 157 * without this on-the-fly evaluation. 158 * 159 * SHARING DFA 160 * 161 * All instances of the same parser share the same decision DFAs through a 162 * static field. Each instance gets its own ATN simulator but they share the 163 * same {@see ParserATNSimulator::$decisionToDFA} field. They also share a 164 * {@see PredictionContextCache} object that makes sure that all 165 * {@see PredictionContext} objects are shared among the DFA states. This makes 166 * a big size difference. 167 * 168 * THREAD SAFETY 169 * 170 * The {@see ParserATNSimulator} locks on the {@see ParserATNSimulator::$decisionToDFA} 171 * field when it adds a new DFA object to that array. 172 * {@see ParserATNSimulator::$addDFAEdge} locks on the DFA for the current 173 * decision when setting the {@see DFAState::$edges} field. 174 * {@see ParserATNSimulator::addDFAState()} locks on the DFA for the current 175 * decision when looking up a DFA state to see if it already exists. We must 176 * make sure that all requests to add DFA states that are equivalent result in 177 * the same shared DFA object. This is because lots of threads will be trying 178 * to update the DFA at once. {@see ParserATNSimulator::addDFAState()} also 179 * locks inside the DFA lock but this time on the shared context cache when it 180 * rebuilds the configurations' {@see PredictionContext} objects using cached 181 * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is 182 * safe as long as we can guarantee that all threads referencing 183 * `s.edge[t]` get the same physical target {@see DFAState}, or `null`. Once 184 * into the DFA, the DFA simulation does not reference the {@see DFA::$states} map. 185 * It follows the {@see DFAState::$edges} field to new targets. The DFA simulator 186 * will either find {@see DFAState::$edges} to be `null`, to be non-`null` and 187 * `dfa.edges[t]` null, or `dfa.edges[t]` to be non-null. The 188 * {@see ParserATNSimulator::addDFAEdge()} method could be racing to set the field 189 * but in either case the DFA simulator works; if `null`, and requests ATN 190 * simulation. It could also race trying to get `dfa.edges[t]`, but either 191 * way it will work because it's not doing a test and set operation. 192 * 193 * tarting with SLL then failing to combined SLL/LL (Two-Stage Parsing) 194 * 195 * Sam pointed out that if SLL does not give a syntax error, then there is no 196 * point in doing full LL, which is slower. We only have to try LL if we get a 197 * syntax error. For maximum speed, Sam starts the parser set to pure SLL 198 * mode with the {@see BailErrorStrategy}: 199 * 200 * parser.{@see Parser::getInterpreter()}. 201 * {@see ParserATNSimulator::setPredictionMode()}`(}{@see PredictionMode::$SLL})`; 202 * parser->{@see Parser::setErrorHandler()}(new {@see BailErrorStrategy}()); 203 * 204 * If it does not get a syntax error, then we're done. If it does get a syntax 205 * error, we need to retry with the combined SLL/LL strategy. 206 * 207 * The reason this works is as follows. If there are no SLL conflicts, then the 208 * grammar is SLL (at least for that input set). If there is an SLL conflict, 209 * the full LL analysis must yield a set of viable alternatives which is a 210 * subset of the alternatives reported by SLL. If the LL set is a singleton, 211 * then the grammar is LL but not SLL. If the LL set is the same size as the SLL 212 * set, the decision is SLL. If the LL set has size > 1, then that decision 213 * is truly ambiguous on the current input. If the LL set is smaller, then the 214 * SLL conflict resolution might choose an alternative that the full LL would 215 * rule out as a possibility based upon better context information. If that's 216 * the case, then the SLL parse will definitely get an error because the full LL 217 * analysis says it's not viable. If SLL conflict resolution chooses an 218 * alternative within the LL set, them both SLL and LL would choose the same 219 * alternative because they both choose the minimum of multiple conflicting 220 * alternatives. 221 * 222 * Let's say we have a set of SLL conflicting alternatives `{1, 2, 3}` and 223 * a smaller LL set called s. If s is `{2, 3}`, then SLL parsing will 224 * get an error because SLL will pursue alternative 1. If s is `{1, 2}` or 225 * `{1, 3}` then both SLL and LL will choose the same alternative because 226 * alternative one is the minimum of either set. If s is `{2}` or `{3}` then 227 * SLL will get a syntax error. If s is `{1}` then SLL will succeed. 228 * 229 * Of course, if the input is invalid, then we will get an error for sure in 230 * both SLL and LL parsing. Erroneous input will therefore require 2 passes over 231 * the input. 232 */ 233final class ParserATNSimulator extends ATNSimulator 234{ 235 /** @var bool */ 236 public static $debug = false; 237 238 /** @var bool */ 239 public static $debug_closure = false; 240 241 /** @var bool */ 242 public static $debug_add = false; 243 244 /** @var bool */ 245 public static $debug_list_atn_decisions = false; 246 247 /** @var bool */ 248 public static $dfa_debug = false; 249 250 /** @var bool */ 251 public static $retry_debug = false; 252 253 /** @var array<string> */ 254 public $log = []; 255 256 /** @var Parser */ 257 protected $parser; 258 259 /** @var array<DFA> */ 260 public $decisionToDFA = []; 261 262 /** @var int */ 263 private $mode = PredictionMode::LL; 264 265 /** 266 * Each prediction operation uses a cache for merge of prediction contexts. 267 * Don't keep around as it wastes huge amounts of memory. DoubleKeyMap 268 * isn't synchronized but we're ok since two threads shouldn't reuse same 269 * parser/atnsim object because it can only handle one input at a time. 270 * This maps graphs a and b to merged result c. (a,b)→c. We can avoid 271 * the merge if we ever see a and b again. Note that (b,a)→c should 272 * also be examined during cache lookup. 273 * 274 * @var DoubleKeyMap|null 275 */ 276 protected $mergeCache; 277 278 /** 279 * LAME globals to avoid parameters!!!!! I need these down deep in predTransition. 280 * 281 * @var TokenStream 282 */ 283 protected $input; 284 285 /** @var int */ 286 protected $startIndex = 0; 287 288 /** @var ParserRuleContext|null */ 289 protected $outerContext; 290 291 /** @var DFA|null */ 292 protected $dfa; 293 294 /** 295 * @param array<DFA> $decisionToDFA 296 */ 297 public function __construct( 298 Parser $parser, 299 ATN $atn, 300 array $decisionToDFA, 301 PredictionContextCache $sharedContextCache 302 ) { 303 parent::__construct($atn, $sharedContextCache); 304 305 $this->parser = $parser; 306 $this->decisionToDFA = $decisionToDFA; 307 } 308 309 public function reset() : void 310 { 311 } 312 313 public function clearDFA() : void 314 { 315 for ($d = 0, $count = \count($this->decisionToDFA); $d < $count; $d++) { 316 $decisionState = $this->atn->getDecisionState($d); 317 318 if ($decisionState !== null) { 319 $this->decisionToDFA[$d] = new DFA($decisionState, $d); 320 } 321 } 322 } 323 324 public function adaptivePredict(TokenStream $input, int $decision, ParserRuleContext $outerContext) : int 325 { 326 if (self::$debug || self::$debug_list_atn_decisions) { 327 $token = $input->LT(1); 328 329 $this->log[] = \sprintf( 330 'adaptivePredict decision %d exec LA(1)==%s line %d:%d', 331 $decision, 332 $this->getLookaheadName($input), 333 $token === null ? '' : $token->getLine(), 334 $token === null ? '' : $token->getCharPositionInLine() 335 ); 336 } 337 338 $this->input = $input; 339 $this->startIndex = $input->getIndex(); 340 $this->outerContext = $outerContext; 341 342 $dfa = $this->decisionToDFA[$decision]; 343 $this->dfa = $dfa; 344 345 $m = $input->mark(); 346 $index = $this->startIndex; 347 348 // Now we are certain to have a specific decision's DFA, but do we still need an initial state? 349 try { 350 if ($dfa->isPrecedenceDfa()) { 351 // The start state for a precedence DFA depends on the current 352 // parser precedence, and is provided by a DFA method. 353 354 $s0 = $dfa->getPrecedenceStartState($this->parser->getPrecedence()); 355 } else { 356 // The start state for a "regular" DFA is just s0. 357 $s0 = $dfa->s0; 358 } 359 360 if ($s0 === null) { 361 if (self::$debug || self::$debug_list_atn_decisions) { 362 $this->log[] = \sprintf( 363 'predictATN decision %d exec LA(1)==%s, outerContext=%s', 364 $dfa->decision, 365 $this->getLookaheadName($input), 366 $outerContext->toString($this->parser->getRuleNames()) 367 ); 368 } 369 370 $fullCtx = false; 371 372 if ($dfa->atnStartState === null) { 373 throw new \RuntimeException('ATN Start State cannot be null.'); 374 } 375 376 $s0_closure = $this->computeStartState( 377 $dfa->atnStartState, 378 ParserRuleContext::emptyContext(), 379 $fullCtx 380 ); 381 382 if ($dfa->isPrecedenceDfa()) { 383 /* 384 * If this is a precedence DFA, we use applyPrecedenceFilter 385 * to convert the computed start state to a precedence start 386 * state. We then use DFA.setPrecedenceStartState to set the 387 * appropriate start state for the precedence level rather 388 * than simply setting DFA.s0. 389 */ 390 391 if ($dfa->s0 === null) { 392 throw new \RuntimeException('DFA.s0 cannot be null.'); 393 } 394 395 $dfa->s0->configs = $s0_closure; // not used for prediction but useful to know start configs anyway 396 397 $s0_closure = $this->applyPrecedenceFilter($s0_closure); 398 399 $s0 = $this->addDFAState($dfa, new DFAState($s0_closure)); 400 401 $dfa->setPrecedenceStartState($this->parser->getPrecedence(), $s0); 402 } else { 403 $s0 = $this->addDFAState($dfa, new DFAState($s0_closure)); 404 $dfa->s0 = $s0; 405 } 406 } 407 408 $alt = $this->execATN($dfa, $s0, $input, $index, $outerContext); 409 410 if (self::$debug) { 411 $this->log[] = \sprintf('DFA after predictATN: %s', $dfa->toString($this->parser->getVocabulary())); 412 } 413 414 return $alt ?? 0; 415 } finally { 416 $this->mergeCache = null; // wack cache after each prediction 417 $this->dfa = null; 418 $input->seek($index); 419 $input->release($m); 420 } 421 } 422 423 /** 424 * Performs ATN simulation to compute a predicted alternative based 425 * upon the remaining input, but also updates the DFA cache to avoid 426 * having to traverse the ATN again for the same input sequence. 427 * 428 * There are some key conditions we're looking for after computing a new 429 * set of ATN configs (proposed DFA state): 430 * if the set is empty, there is no viable alternative for current symbol 431 * does the state uniquely predict an alternative? 432 * does the state have a conflict that would prevent us from 433 * putting it on the work list? 434 * 435 * We also have some key operations to do: 436 * - add an edge from previous DFA state to potentially new DFA state, D, 437 * upon current symbol but only if adding to work list, which means in all 438 * cases except no viable alternative (and possibly non-greedy decisions?) 439 * - collecting predicates and adding semantic context to DFA accept states 440 * - adding rule context to context-sensitive DFA accept states 441 * - consuming an input symbol 442 * - reporting a conflict 443 * - reporting an ambiguity 444 * - reporting a context sensitivity 445 * - reporting insufficient predicates 446 * 447 * cover these cases: 448 * - dead end 449 * - single alt 450 * - single alt + preds 451 * - conflict 452 * - conflict + preds 453 */ 454 public function execATN( 455 DFA $dfa, 456 DFAState $s0, 457 TokenStream $input, 458 int $startIndex, 459 ParserRuleContext $outerContext 460 ) : ?int { 461 if (self::$debug || self::$debug_list_atn_decisions) { 462 $token = $input->LT(1); 463 464 $this->log[] = \sprintf( 465 'execATN decision %d exec LA(1)==%s line %d:%d', 466 $dfa->decision, 467 $this->getLookaheadName($input), 468 $token === null ? '' : $token->getLine(), 469 $token === null ? '' : $token->getCharPositionInLine() 470 ); 471 } 472 473 $previousD = $s0; 474 475 if (self::$debug) { 476 $this->log[] = 's0 = ' . $s0; 477 } 478 479 $t = $input->LA(1); 480 481 while (true) { 482 $D = $this->getExistingTargetState($previousD, $t); 483 484 if ($D === null) { 485 $D = $this->computeTargetState($dfa, $previousD, $t); 486 } 487 488 if ($D === null) { 489 throw new \RuntimeException('DFA State cannot be null.'); 490 } 491 492 if ($D === self::error()) { 493 /* If any configs in previous dipped into outer context, that 494 * means that input up to t actually finished entry rule 495 * at least for SLL decision. Full LL doesn't dip into outer 496 * so don't need special case. 497 * We will get an error no matter what so delay until after 498 * decision; better error message. Also, no reachable target 499 * ATN states in SLL implies LL will also get nowhere. 500 * If conflict in states that dip out, choose min since we 501 * will get error no matter what. 502 */ 503 504 $e = $this->noViableAlt($input, $outerContext, $previousD->configs, $startIndex); 505 506 $input->seek($startIndex); 507 508 $alt = $this->getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule( 509 $previousD->configs, 510 $outerContext 511 ); 512 513 if ($alt !== ATN::INVALID_ALT_NUMBER) { 514 return $alt; 515 } 516 517 throw $e; 518 } 519 520 if ($D->requiresFullContext && $this->mode !== PredictionMode::SLL) { 521 // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) 522 523 $conflictingAlts = $D->configs->getConflictingAlts(); 524 525 if ($D->predicates !== null) { 526 if (self::$debug) { 527 $this->log[] = 'DFA state has preds in DFA sim LL failover'; 528 } 529 530 $conflictIndex = $input->getIndex(); 531 532 if ($conflictIndex !== $startIndex) { 533 $input->seek($startIndex); 534 } 535 536 $conflictingAlts = $this->evalSemanticContextMany($D->predicates, $outerContext, true); 537 538 if ($conflictingAlts->length() === 1) { 539 if (self::$debug) { 540 $this->log[] = 'Full LL avoided'; 541 } 542 543 return $conflictingAlts->minValue(); 544 } 545 546 if ($conflictIndex !== $startIndex) { 547 // Restore the index so reporting the fallback to full 548 // context occurs with the index at the correct spot 549 550 $input->seek($conflictIndex); 551 } 552 } 553 554 if (self::$dfa_debug) { 555 $this->log[] = \sprintf( 556 'Ctx sensitive state %s in %s', 557 (string) $outerContext, 558 (string) $D 559 ); 560 } 561 562 if ($dfa->atnStartState === null) { 563 throw new \RuntimeException('ATN Start State cannot be null.'); 564 } 565 566 $s0_closure = $this->computeStartState($dfa->atnStartState, $outerContext, true); 567 568 $this->reportAttemptingFullContext( 569 $dfa, 570 $conflictingAlts, 571 $D->configs, 572 $startIndex, 573 $input->getIndex() 574 ); 575 576 return $this->execATNWithFullContext($dfa, $D, $s0_closure, $input, $startIndex, $outerContext); 577 } 578 579 if ($D->isAcceptState) { 580 if ($D->predicates === null) { 581 return $D->prediction; 582 } 583 584 $stopIndex = $input->getIndex(); 585 $input->seek($startIndex); 586 $alts = $this->evalSemanticContextMany($D->predicates, $outerContext, true); 587 588 switch ($alts->length()) { 589 case 0: 590 throw $this->noViableAlt($input, $outerContext, $D->configs, $startIndex); 591 592 case 1: 593 return $alts->minValue(); 594 595 default: 596 // Report ambiguity after predicate evaluation to make sure 597 // the correct set of ambig alts is reported. 598 $this->reportAmbiguity($dfa, $D, $startIndex, $stopIndex, false, $alts, $D->configs); 599 600 return $alts->minValue(); 601 } 602 } 603 604 $previousD = $D; 605 606 if ($t !== IntStream::EOF) { 607 $input->consume(); 608 $t = $input->LA(1); 609 } 610 } 611 } 612 613 /** 614 * Get an existing target state for an edge in the DFA. If the target state 615 * for the edge has not yet been computed or is otherwise not available, 616 * this method returns `null`. 617 * 618 * @param DFAState $previousD The current DFA state 619 * @param int $t The next input symbol 620 * 621 * @return DFAState|null The existing target DFA state for the given input 622 * symbol `t`, or `null` if the target state for 623 * this edge is not already cached. 624 */ 625 public function getExistingTargetState(DFAState $previousD, int $t) : ?DFAState 626 { 627 $edges = $previousD->edges; 628 629 if ($edges === null || $t + 1 < 0 || $t + 1 >= $edges->count()) { 630 return null; 631 } 632 633 return $edges[$t + 1]; 634 } 635 636 /** 637 * Compute a target state for an edge in the DFA, and attempt to add 638 * the computed state and corresponding edge to the DFA. 639 * 640 * @param DFA $dfa The DFA 641 * @param DFAState $previousD The current DFA state 642 * @param int $t The next input symbol 643 * 644 * @return DFAState|null The computed target DFA state for the given input 645 * symbol `t`. If `t` does not lead to a valid DFA 646 * state, this method returns 647 * {@see ParserATNSimulator::error()}. 648 */ 649 public function computeTargetState(DFA $dfa, DFAState $previousD, int $t) : ?DFAState 650 { 651 $reach = $this->computeReachSet($previousD->configs, $t, false); 652 653 if ($reach === null) { 654 $this->addDFAEdge($dfa, $previousD, $t, self::error()); 655 656 return self::error(); 657 } 658 659 // Create new target state; we'll add to DFA after it's complete 660 $D = new DFAState($reach); 661 662 $predictedAlt = self::getUniqueAlt($reach); 663 664 if (self::$debug) { 665 $altSubSets = PredictionMode::getConflictingAltSubsets($reach); 666 667 $this->log[] = \sprintf( 668 'SLL altSubSets=[%s], previous=%s, configs=%s, predict=%d, allSubsetsConflict=%s, conflictingAlts=%s', 669 \implode(', ', $altSubSets), 670 (string) $previousD->configs, 671 (string) $reach, 672 $predictedAlt, 673 PredictionMode::allSubsetsConflict($altSubSets), 674 $this->getConflictingAlts($reach) 675 ); 676 } 677 678 if ($predictedAlt !== ATN::INVALID_ALT_NUMBER) { 679 // NO CONFLICT, UNIQUELY PREDICTED ALT 680 681 $D->isAcceptState = true; 682 $D->configs->uniqueAlt = $predictedAlt; 683 $D->prediction = $predictedAlt; 684 } elseif (PredictionMode::hasSLLConflictTerminatingPrediction($this->mode, $reach)) { 685 // MORE THAN ONE VIABLE ALTERNATIVE 686 687 $D->configs->setConflictingAlts($this->getConflictingAlts($reach)); 688 $D->requiresFullContext = true; 689 690 // in SLL-only mode, we will stop at this state and return the minimum alt 691 $D->isAcceptState = true; 692 693 $conflictingAlts = $D->configs->getConflictingAlts(); 694 695 if ($conflictingAlts === null) { 696 throw new \RuntimeException('Unexpected null conflicting alternatives.'); 697 } 698 699 $D->prediction = $conflictingAlts->minValue(); 700 } 701 702 if ($D->isAcceptState && $D->configs->hasSemanticContext) { 703 $decisionState = $this->atn->getDecisionState($dfa->decision); 704 705 if ($decisionState !== null) { 706 $this->predicateDFAState($D, $decisionState); 707 } 708 709 if ($D->predicates !== null) { 710 $D->prediction = ATN::INVALID_ALT_NUMBER; 711 } 712 } 713 714 // All adds to dfa are done after we've created full D state 715 $D = $this->addDFAEdge($dfa, $previousD, $t, $D); 716 717 return $D; 718 } 719 720 protected function predicateDFAState(DFAState $dfaState, DecisionState $decisionState) : void 721 { 722 // We need to test all predicates, even in DFA states that uniquely predict alternative. 723 $nalts = $decisionState->getNumberOfTransitions(); 724 725 // Update DFA so reach becomes accept state with (predicate,alt) pairs 726 // if preds found for conflicting alts. 727 728 $altsToCollectPredsFrom = $this->getConflictingAltsOrUniqueAlt($dfaState->configs); 729 $altToPred = $altsToCollectPredsFrom === null ? 730 null : 731 $this->getPredsForAmbigAlts($altsToCollectPredsFrom, $dfaState->configs, $nalts); 732 733 if ($altToPred !== null) { 734 $dfaState->predicates = $this->getPredicatePredictions($altsToCollectPredsFrom, $altToPred); 735 $dfaState->prediction = ATN::INVALID_ALT_NUMBER; // make sure we use preds 736 } else { 737 // There are preds in configs but they might go away when 738 // OR'd together like {p}? || NONE == NONE. If neither alt has preds, 739 // resolve to min alt. 740 741 if ($altsToCollectPredsFrom === null) { 742 throw new \RuntimeException('Unexpected null alternatives to collect predicates'); 743 } 744 745 $dfaState->prediction = $altsToCollectPredsFrom->minValue(); 746 } 747 } 748 749 /** 750 * Comes back with reach.uniqueAlt set to a valid alt. 751 */ 752 protected function execATNWithFullContext( 753 DFA $dfa, 754 DFAState $D, // how far we got before failing over 755 ATNConfigSet $s0, 756 TokenStream $input, 757 int $startIndex, 758 ParserRuleContext $outerContext 759 ) : int { 760 if (self::$debug || self::$debug_list_atn_decisions) { 761 $this->log[] = \sprintf('ExecATNWithFullContext %s', (string) $s0); 762 } 763 764 $fullCtx = true; 765 $foundExactAmbig = false; 766 $reach = null; 767 $previous = $s0; 768 $input->seek($startIndex); 769 $t = $input->LA(1); 770 $predictedAlt = 0; 771 772 while (true) { // while more work 773 $reach = $this->computeReachSet($previous, $t, $fullCtx); 774 775 if ($reach === null) { 776 /* I any configs in previous dipped into outer context, that 777 * means that input up to t actually finished entry rule 778 * at least for LL decision. Full LL doesn't dip into outer 779 * so don't need special case. 780 * We will get an error no matter what so delay until after 781 * decision; better error message. Also, no reachable target 782 * ATN states in SLL implies LL will also get nowhere. 783 * If conflict in states that dip out, choose min since we 784 * will get error no matter what. 785 */ 786 787 $e = $this->noViableAlt($input, $outerContext, $previous, $startIndex); 788 789 $input->seek($startIndex); 790 791 $alt = $this->getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule($previous, $outerContext); 792 793 if ($alt !== ATN::INVALID_ALT_NUMBER) { 794 return $alt; 795 } 796 797 throw $e; 798 } 799 800 $altSubSets = PredictionMode::getConflictingAltSubsets($reach); 801 802 if (self::$debug) { 803 $this->log[] = \sprintf( 804 'LL altSubSets=%s, predict=%d, resolvesToJustOneViableAlt=%d', 805 \implode(',', $altSubSets), 806 PredictionMode::getUniqueAlt($altSubSets), 807 PredictionMode::resolvesToJustOneViableAlt($altSubSets) 808 ); 809 } 810 811 $reach->uniqueAlt = self::getUniqueAlt($reach); 812 813 // unique prediction? 814 if ($reach->uniqueAlt !== ATN::INVALID_ALT_NUMBER) { 815 $predictedAlt = $reach->uniqueAlt; 816 817 break; 818 } 819 820 if ($this->mode !== PredictionMode::LL_EXACT_AMBIG_DETECTION) { 821 $predictedAlt = PredictionMode::resolvesToJustOneViableAlt($altSubSets); 822 823 if ($predictedAlt !== ATN::INVALID_ALT_NUMBER) { 824 break; 825 } 826 } else { 827 // In exact ambiguity mode, we never try to terminate early. 828 // Just keeps scarfing until we know what the conflict is 829 if (PredictionMode::allSubsetsConflict($altSubSets) && PredictionMode::allSubsetsEqual($altSubSets)) { 830 $foundExactAmbig = true; 831 $predictedAlt = PredictionMode::getSingleViableAlt($altSubSets); 832 833 break; 834 } 835 836 // Else there are multiple non-conflicting subsets or 837 // we're not sure what the ambiguity is yet. 838 // So, keep going. 839 } 840 841 $previous = $reach; 842 843 if ($t !== IntStream::EOF) { 844 $input->consume(); 845 $t = $input->LA(1); 846 } 847 } 848 849 // If the configuration set uniquely predicts an alternative, 850 // without conflict, then we know that it's a full LL decision not SLL. 851 if ($reach->uniqueAlt !== ATN::INVALID_ALT_NUMBER) { 852 $this->reportContextSensitivity($dfa, $predictedAlt, $reach, $startIndex, $input->getIndex()); 853 854 return $predictedAlt; 855 } 856 857 /* We do not check predicates here because we have checked them on-the-fly 858 * when doing full context prediction. 859 * 860 * In non-exact ambiguity detection mode, we might actually be able to 861 * detect an exact ambiguity, but I'm not going to spend the cycles 862 * needed to check. We only emit ambiguity warnings in exact ambiguity 863 * mode. 864 * 865 * For example, we might know that we have conflicting configurations. 866 * But, that does not mean that there is no way forward without a 867 * conflict. It's possible to have nonconflicting alt subsets as in: 868 * 869 * altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] 870 * 871 * from 872 * [ 873 * (17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), 874 * (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$]) 875 * ] 876 * 877 * In this case, (17,1,[5 $]) indicates there is some next sequence that 878 * would resolve this without conflict to alternative 1. Any other viable 879 * next sequence, however, is associated with a conflict. We stop 880 * looking for input because no amount of further lookahead will alter 881 * the fact that we should predict alternative 1. We just can't say for 882 * sure that there is an ambiguity without looking further. 883 */ 884 885 $this->reportAmbiguity($dfa, $D, $startIndex, $input->getIndex(), $foundExactAmbig, null, $reach); 886 887 return $predictedAlt; 888 } 889 890 protected function computeReachSet(ATNConfigSet $closure, int $t, bool $fullCtx) : ?ATNConfigSet 891 { 892 if (self::$debug) { 893 $this->log[] = \sprintf('In computeReachSet, starting closure: %s', (string) $closure); 894 } 895 896 if ($this->mergeCache === null) { 897 $this->mergeCache = new DoubleKeyMap(); 898 } 899 900 $intermediate = new ATNConfigSet($fullCtx); 901 902 /* 903 * Configurations already in a rule stop state indicate reaching the end 904 * of the decision rule (local context) or end of the start rule (full 905 * context). Once reached, these configurations are never updated by a 906 * closure operation, so they are handled separately for the performance 907 * advantage of having a smaller intermediate set when calling closure. 908 * 909 * For full-context reach operations, separate handling is required to 910 * ensure that the alternative matching the longest overall sequence is 911 * chosen when multiple such configurations can match the input. 912 */ 913 $skippedStopStates = null; 914 915 // First figure out where we can reach on input t 916 foreach ($closure->elements() as $c) { 917 if (self::$debug_add) { 918 $this->log[] = \sprintf('Testing %s at %s', $this->getTokenName($t), (string) $c); 919 } 920 921 if ($c->state instanceof RuleStopState) { 922 if ($c->context !== null && !$c->context->isEmpty()) { 923 throw new \RuntimeException('Context cannot be empty.'); 924 } 925 926 if ($fullCtx || $t === IntStream::EOF) { 927 if ($skippedStopStates === null) { 928 $skippedStopStates = []; 929 } 930 931 $skippedStopStates[] = $c; 932 933 if (self::$debug_add) { 934 $this->log[] = \sprintf('Added %s to skippedStopStates', (string) $c); 935 } 936 } 937 938 continue; 939 } 940 941 foreach ($c->state->getTransitions() as $trans) { 942 $target = $this->getReachableTarget($trans, $t); 943 944 if ($target !== null) { 945 $cfg = new ATNConfig($c, $target); 946 $intermediate->add($cfg, $this->mergeCache); 947 948 if (self::$debug_add) { 949 $this->log[] = \sprintf('Added %s to intermediate', (string) $cfg); 950 } 951 } 952 } 953 } 954 955 // Now figure out where the reach operation can take us... 956 957 $reach = null; 958 959 /* This block optimizes the reach operation for intermediate sets which 960 * trivially indicate a termination state for the overall 961 * adaptivePredict operation. 962 * 963 * The conditions assume that intermediate contains all configurations 964 * relevant to the reach set, but this condition is not true when one 965 * or more configurations have been withheld in skippedStopStates, or 966 * when the current symbol is EOF. 967 */ 968 if ($skippedStopStates === null && $t !== Token::EOF) { 969 if (\count($intermediate->elements()) === 1) { 970 // Don't pursue the closure if there is just one state. 971 // It can only have one alternative; just add to result 972 // Also don't pursue the closure if there is unique alternative 973 // among the configurations. 974 975 $reach = $intermediate; 976 } elseif (self::getUniqueAlt($intermediate) !== ATN::INVALID_ALT_NUMBER) { 977 // Also don't pursue the closure if there is unique alternative among the configurations. 978 $reach = $intermediate; 979 } 980 } 981 982 // If the reach set could not be trivially determined, perform a closure 983 // operation on the intermediate set to compute its initial value. 984 if ($reach === null) { 985 $reach = new ATNConfigSet($fullCtx); 986 $closureBusy = new Set(); 987 $treatEofAsEpsilon = $t === Token::EOF; 988 989 foreach ($intermediate->elements() as $item) { 990 $this->closure($item, $reach, $closureBusy, false, $fullCtx, $treatEofAsEpsilon); 991 } 992 } 993 994 if ($t === IntStream::EOF) { 995 /* After consuming EOF no additional input is possible, so we are 996 * only interested in configurations which reached the end of the 997 * decision rule (local context) or end of the start rule (full 998 * context). Update reach to contain only these configurations. This 999 * handles both explicit EOF transitions in the grammar and implicit 1000 * EOF transitions following the end of the decision or start rule. 1001 * 1002 * When reach==intermediate, no closure operation was performed. In 1003 * this case, removeAllConfigsNotInRuleStopState needs to check for 1004 * reachable rule stop states as well as configurations already in 1005 * a rule stop state. 1006 * 1007 * This is handled before the configurations in skippedStopStates, 1008 * because any configurations potentially added from that list are 1009 * already guaranteed to meet this condition whether or not it's 1010 * required. 1011 */ 1012 1013 $reach = $this->removeAllConfigsNotInRuleStopState($reach, $reach->equals($intermediate)); 1014 } 1015 1016 /* If `skippedStopStates !== null`, then it contains at least one 1017 * configuration. For full-context reach operations, these 1018 * configurations reached the end of the start rule, in which case we 1019 * only add them back to reach if no configuration during the current 1020 * closure operation reached such a state. This ensures adaptivePredict 1021 * chooses an alternative matching the longest overall sequence when 1022 * multiple alternatives are viable.*/ 1023 1024 if ($skippedStopStates !== null && (!$fullCtx || !PredictionMode::hasConfigInRuleStopState($reach))) { 1025 if (\count($skippedStopStates) === 0) { 1026 throw new \RuntimeException('Skipped stop states cannot be empty.'); 1027 } 1028 1029 foreach ($skippedStopStates as $lValue) { 1030 $reach->add($lValue, $this->mergeCache); 1031 } 1032 } 1033 1034 if ($reach->isEmpty()) { 1035 return null; 1036 } 1037 1038 return $reach; 1039 } 1040 1041 /** 1042 * Return a configuration set containing only the configurations from 1043 * `configs` which are in a {@see RuleStopState}. If all configurations in 1044 * `configs` are already in a rule stop state, this method simply returns 1045 * `configs`. 1046 * 1047 * When `lookToEndOfRule` is true, this method uses {@see ATN::nextTokens()} 1048 * for each configuration in `configs` which is not already in a rule stop 1049 * state to see if a rule stop state is reachable from the configuration via 1050 * epsilon-only transitions. 1051 * 1052 * @param ATNConfigSet $configs The configuration set to update 1053 * @param bool $lookToEndOfRule When true, this method checks for 1054 * rule stop states reachable by 1055 * epsilon-only transitions from each 1056 * configuration in `configs`. 1057 * 1058 * @return ATNConfigSet `configs` if all configurations in `configs` are 1059 * in a rule stop state, otherwise return a new 1060 * configuration set containing only the configurations 1061 * from `configs` which are in a rule stop state. 1062 * 1063 * @throws \InvalidArgumentException 1064 */ 1065 protected function removeAllConfigsNotInRuleStopState(ATNConfigSet $configs, bool $lookToEndOfRule) : ATNConfigSet 1066 { 1067 if (PredictionMode::allConfigsInRuleStopStates($configs)) { 1068 return $configs; 1069 } 1070 1071 $result = new ATNConfigSet($configs->fullCtx); 1072 1073 foreach ($configs->elements() as $config) { 1074 if ($config->state instanceof RuleStopState) { 1075 $result->add($config, $this->mergeCache); 1076 1077 continue; 1078 } 1079 1080 if ($lookToEndOfRule && $config->state->onlyHasEpsilonTransitions()) { 1081 $nextTokens = $this->atn->nextTokens($config->state); 1082 1083 if ($nextTokens->contains(Token::EPSILON)) { 1084 $endOfRuleState = $this->atn->ruleToStopState[$config->state->ruleIndex]; 1085 $result->add(new ATNConfig($config, $endOfRuleState), $this->mergeCache); 1086 } 1087 } 1088 } 1089 1090 return $result; 1091 } 1092 1093 protected function computeStartState(ATNState $p, RuleContext $ctx, bool $fullCtx) : ATNConfigSet 1094 { 1095 // Always at least the implicit call to start rule 1096 $initialContext = PredictionContext::fromRuleContext($this->atn, $ctx); 1097 $configs = new ATNConfigSet($fullCtx); 1098 1099 foreach ($p->getTransitions() as $i => $t) { 1100 $c = new ATNConfig(null, $t->target, $initialContext, null, $i + 1); 1101 $closureBusy = new Set(); 1102 1103 $this->closure($c, $configs, $closureBusy, true, $fullCtx, false); 1104 } 1105 1106 return $configs; 1107 } 1108 1109 /* 1110 * parrt internal source braindump that doesn't mess up external API spec. 1111 * 1112 * Context-sensitive in that they can only be properly evaluated in the context 1113 * of the proper prec argument. Without pruning, these predicates are normal 1114 * predicates evaluated when we reach conflict state (or unique prediction). 1115 * As we cannot evaluate these predicates out of context, the resulting 1116 * conflict leads to full LL evaluation and nonlinear prediction which 1117 * shows up very clearly with fairly large expressions. 1118 * 1119 * Example grammar: 1120 * e 1121 * : e '*' e 1122 * | e '+' e 1123 * | INT 1124 * ; 1125 * 1126 * We convert that to the following: 1127 * 1128 * e[int prec] 1129 * : INT ( {3>=prec}? '*' e[4] | {2>=prec}? '+' e[3] )* 1130 * ; 1131 * 1132 * The (..)* loop has a decision for the inner block as well as an enter 1133 * or exit decision, which is what concerns us here. At the 1st + of input 1134 * 1+2+3, the loop entry sees both predicates and the loop exit also sees 1135 * both predicates by falling off the edge of e. This is because we have 1136 * no stack information with SLL and find the follow of e, which will 1137 * hit the return states inside the loop after e[4] and e[3], which brings 1138 * it back to the enter or exit decision. In this case, we know that we 1139 * cannot evaluate those predicates because we have fallen off the edge 1140 * of the stack and will in general not know which prec parameter is 1141 * the right one to use in the predicate. 1142 * 1143 * Because we have special information, that these are precedence predicates, 1144 * we can resolve them without failing over to full LL despite their context 1145 * sensitive nature. We make an assumption that prec[-1] <= prec[0], meaning 1146 * that the current precedence level is greater than or equal to the precedence 1147 * level of recursive invocations above us in the stack. For example, if 1148 * predicate {3>=prec}? is true of the current prec, then one option is to 1149 * enter the loop to match it now. The other option is to exit the loop and 1150 * the left recursive rule to match the current operator in rule invocation 1151 * further up the stack. But, we know that all of those prec are lower or 1152 * the same value and so we can decide to enter the loop instead of matching 1153 * it later. That means we can strip out the other configuration for the exit branch. 1154 * 1155 * So imagine we have (14,1,$,{2>=prec}?) and then 1156 * (14,2,$-dipsIntoOuterContext,{2>=prec}?). The optimization allows us to 1157 * collapse these two configurations. We know that if {2>=prec}? is true 1158 * for the current prec parameter, it will also be true for any precfrom 1159 * an invoking e call, indicated by dipsIntoOuterContext. As the predicates 1160 * are both true, we have the option to evaluate them early in the decision 1161 * start state. We do this by stripping both predicates and choosing to 1162 * enter the loop as it is consistent with the notion of operator precedence. 1163 * It's also how the full LL conflict resolution would work. 1164 * 1165 * The solution requires a different DFA start state for each precedence level. 1166 * 1167 * The basic filter mechanism is to remove configurations of the form (p, 2, pi) 1168 * if (p, 1, pi) exists for the same p and pi. In other words, for the same 1169 * ATN state and predicate context, remove any configuration associated with 1170 * an exit branch if there is a configuration associated with the enter branch. 1171 * 1172 * It's also the case that the filter evaluates precedence predicates and 1173 * resolves conflicts according to precedence levels. For example, for input 1174 * 1+2+3 at the first +, we see prediction filtering. 1175 * 1176 * [(11,1,[$],{3>=prec}?), (14,1,[$],{2>=prec}?), (5,2,[$],up=1), 1177 * (11,2,[$],up=1), (14,2,[$],up=1)], hasSemanticContext=true,dipsIntoOuterContext 1178 * 1179 * to 1180 * 1181 * [(11,1,[$]), (14,1,[$]), (5,2,[$],up=1)],dipsIntoOuterContext 1182 * 1183 * This filters because {3>=prec}? evals to true and collapses 1184 * (11,1,[$],{3>=prec}?) and (11,2,[$],up=1) since early conflict 1185 * resolution based upon rules of operator precedence fits with our 1186 * usual match first alt upon conflict. 1187 * 1188 * We noticed a problem where a recursive call resets precedence to 0. 1189 * Sam's fix: each config has flag indicating if it has returned from 1190 * an expr[0] call. then just don't filter any config with that flag set. 1191 * flag is carried along in closure(). so to avoid adding field, set bit 1192 * just under sign bit of dipsIntoOuterContext (SUPPRESS_PRECEDENCE_FILTER). 1193 * With the change you filter "unless (p, 2, pi) was reached after leaving 1194 * the rule stop state of the LR rule containing state p, corresponding 1195 * to a rule invocation with precedence level 0". 1196 */ 1197 1198 /** 1199 * This method transforms the start state computed by 1200 * {@see ParserATNSimulator::computeStartState()} to the special start state 1201 * used by a precedence DFA for a particular precedence value. The transformation 1202 * process applies the following changes to the start state's configuration 1203 * set. 1204 * 1205 * - Evaluate the precedence predicates for each configuration using 1206 * {@see SemanticContext//evalPrecedence}. 1207 * - Remove all configurations which predict an alternative greater than 1208 * 1, for which another configuration that predicts alternative 1 is in the 1209 * same ATN state with the same prediction context. This transformation is 1210 * valid for the following reasons: 1211 * - The closure block cannot contain any epsilon transitions which bypass 1212 * the body of the closure, so all states reachable via alternative 1 are 1213 * part of the precedence alternatives of the transformed left-recursive 1214 * rule. 1215 * - The "primary" portion of a left recursive rule cannot contain an 1216 * epsilon transition, so the only way an alternative other than 1 can exist 1217 * in a state that is also reachable via alternative 1 is by nesting calls 1218 * to the left-recursive rule, with the outer calls not being at the 1219 * preferred precedence level. 1220 * 1221 * The prediction context must be considered by this filter to address 1222 * situations like the following. 1223 * 1224 * grammar TA; 1225 * prog: statement* EOF; 1226 * statement: letterA | statement letterA 'b' ; 1227 * letterA: 'a'; 1228 * 1229 * If the above grammar, the ATN state immediately before the token 1230 * reference `'a'` in `letterA` is reachable from the left edge 1231 * of both the primary and closure blocks of the left-recursive rule 1232 * `statement`. The prediction context associated with each of these 1233 * configurations distinguishes between them, and prevents the alternative 1234 * which stepped out to `prog` (and then back in to `statement` from being 1235 * eliminated by the filter. 1236 * 1237 * @param ATNConfigSet $configs The configuration set computed by 1238 * {@see ParserATNSimulator::computeStartState()} 1239 * as the start state for the DFA. 1240 * 1241 * @return ATNConfigSet The transformed configuration set representing the start state 1242 * for a precedence DFA at a particular precedence level 1243 * (determined by calling {@see Parser::getPrecedence()}). 1244 * 1245 * @throws \InvalidArgumentException 1246 */ 1247 protected function applyPrecedenceFilter(ATNConfigSet $configs) : ATNConfigSet 1248 { 1249 /** @var array<PredictionContext> $statesFromAlt1 */ 1250 $statesFromAlt1 = []; 1251 $configSet = new ATNConfigSet($configs->fullCtx); 1252 1253 foreach ($configs->elements() as $config) { 1254 // handle alt 1 first 1255 if ($config->alt !== 1) { 1256 continue; 1257 } 1258 1259 $updatedContext = $this->outerContext !== null ? 1260 $config->semanticContext->evalPrecedence($this->parser, $this->outerContext) : 1261 null; 1262 1263 if ($updatedContext === null) { 1264 continue; 1265 } 1266 1267 $statesFromAlt1[$config->state->stateNumber] = $config->context; 1268 1269 if (!$updatedContext->equals($config->semanticContext)) { 1270 $configSet->add( 1271 new ATNConfig($config, null, null, $updatedContext), 1272 $this->mergeCache 1273 ); 1274 } else { 1275 $configSet->add($config, $this->mergeCache); 1276 } 1277 } 1278 1279 foreach ($configs->elements() as $config) { 1280 if ($config->alt === 1) { 1281 continue; // already handled 1282 } 1283 1284 /* In the future, this elimination step could be updated to also 1285 * filter the prediction context for alternatives predicting alt>1 1286 * (basically a graph subtraction algorithm). 1287 */ 1288 if (!$config->isPrecedenceFilterSuppressed()) { 1289 $context = $statesFromAlt1[$config->state->stateNumber] ?? null; 1290 1291 if ($context !== null && $config->context !== null && $context->equals($config->context)) { 1292 continue; // eliminated 1293 } 1294 } 1295 1296 $configSet->add($config, $this->mergeCache); 1297 } 1298 1299 return $configSet; 1300 } 1301 1302 protected function getReachableTarget(Transition $trans, int $ttype) : ?ATNState 1303 { 1304 return $trans->matches($ttype, 0, $this->atn->maxTokenType) ? $trans->target : null; 1305 } 1306 1307 /** 1308 * @return array<SemanticContext>|null 1309 */ 1310 protected function getPredsForAmbigAlts(BitSet $ambigAlts, ATNConfigSet $configs, int $nalts) : ?array 1311 { 1312 /* REACH=[1|1|[]|0:0, 1|2|[]|0:1] 1313 * altToPred starts as an array of all null contexts. The entry at index i 1314 * corresponds to alternative i. altToPred[i] may have one of three values: 1315 * 1. null: no ATNConfig c is found such that c.alt==i 1316 * 2. SemanticContext.NONE: At least one ATNConfig c exists such that 1317 * c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, 1318 * alt i has at least one unpredicated config. 1319 * 3. Non-NONE Semantic Context: There exists at least one, and for all 1320 * ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. 1321 * 1322 * From this, it is clear that NONE||anything==NONE. 1323 */ 1324 1325 $altToPred = new \SplFixedArray($nalts + 1); 1326 1327 foreach ($configs->elements() as $c) { 1328 if ($ambigAlts->contains($c->alt)) { 1329 $altToPred[$c->alt] = SemanticContext::orContext($altToPred[$c->alt] ?? null, $c->semanticContext); 1330 } 1331 } 1332 1333 $nPredAlts = 0; 1334 1335 for ($i = 1; $i <= $nalts; $i++) { 1336 $pred = $altToPred[$i]; 1337 1338 if ($pred === null) { 1339 $altToPred[$i] = SemanticContext::none(); 1340 } elseif ($pred !== SemanticContext::none()) { 1341 $nPredAlts++; 1342 } 1343 } 1344 1345 // nonambig alts are null in altToPred 1346 if ($nPredAlts === 0) { 1347 $altToPred = null; 1348 } else { 1349 $altToPred = $altToPred->toArray(); 1350 } 1351 1352 if (self::$debug) { 1353 $this->log[] = \sprintf( 1354 'getPredsForAmbigAlts result [%s]', 1355 $altToPred === null ? '' : \implode(', ', $altToPred) 1356 ); 1357 } 1358 1359 return $altToPred; 1360 } 1361 1362 /** 1363 * @param array<SemanticContext> $altToPred 1364 * 1365 * @return array<PredPrediction>|null 1366 */ 1367 protected function getPredicatePredictions(?BitSet $ambigAlts, array $altToPred) : ?array 1368 { 1369 $pairs = []; 1370 $containsPredicate = false; 1371 $count = \count($altToPred); 1372 1373 for ($i = 1; $i < $count; $i++) { 1374 $pred = $altToPred[$i]; 1375 1376 // unpredicated is indicated by SemanticContext.NONE 1377 if ($ambigAlts !== null && $ambigAlts->contains($i)) { 1378 $pairs[] = new PredPrediction($pred, $i); 1379 } 1380 1381 if ($pred !== SemanticContext::none()) { 1382 $containsPredicate = true; 1383 } 1384 } 1385 1386 if (!$containsPredicate) { 1387 return null; 1388 } 1389 1390 return $pairs; 1391 } 1392 1393 /** 1394 * This method is used to improve the localization of error messages by 1395 * choosing an alternative rather than throwing a 1396 * {@see NoViableAltException} in particular prediction scenarios where the 1397 * {@see ParserATNSimulator::error()} state was reached during ATN simulation. 1398 * 1399 * The default implementation of this method uses the following 1400 * algorithm to identify an ATN configuration which successfully parsed the 1401 * decision entry rule. Choosing such an alternative ensures that the 1402 * {@see ParserRuleContext} returned by the calling rule will be complete 1403 * and valid, and the syntax error will be reported later at a more 1404 * localized location. 1405 * 1406 * - If a syntactically valid path or paths reach the end of the decision rule and 1407 * they are semantically valid if predicated, return the min associated alt. 1408 * - Else, if a semantically invalid but syntactically valid path exist 1409 * or paths exist, return the minimum associated alt. 1410 * - Otherwise, return {@see ATN::INVALID_ALT_NUMBER}. 1411 * 1412 * In some scenarios, the algorithm described above could predict an 1413 * alternative which will result in a {@see FailedPredicateException} in 1414 * the parser. Specifically, this could occur if the only configuration 1415 * capable of successfully parsing to the end of the decision rule is 1416 * blocked by a semantic predicate. By choosing this alternative within 1417 * {@see ParserATNSimulator::adaptivePredict()} instead of throwing a 1418 * {@see NoViableAltException}, the resulting {@see FailedPredicateException} 1419 * in the parser will identify the specific predicate which is preventing 1420 * the parser from successfully parsing the decision rule, which helps 1421 * developers identify and correct logic errors in semantic predicates. 1422 * 1423 * @param ATNConfigSet $configs The ATN configurations which were valid 1424 * immediately before the 1425 * {@see ParserATNSimulator::error()} 1426 * state was reached. 1427 * @param ParserRuleContext $outerContext The \gamma_0 initial parser context 1428 * from the paper or the parser stack 1429 * at the instant before prediction commences. 1430 * 1431 * @return int The value to return from {@see ParserATNSimulator::adaptivePredict()}, 1432 * or {@see ATN::INVALID_ALT_NUMBER} if a suitable alternative 1433 * was not identified and {@see ParserATNSimulator::::adaptivePredict()} 1434 * should report an error instead. 1435 * 1436 * @throws \Exception 1437 */ 1438 protected function getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule( 1439 ATNConfigSet $configs, 1440 ParserRuleContext $outerContext 1441 ) : int { 1442 /** @var ATNConfigSet $semValidConfigs */ 1443 /** @var ATNConfigSet $semInvalidConfigs */ 1444 [$semValidConfigs, $semInvalidConfigs] = $this->splitAccordingToSemanticValidity($configs, $outerContext); 1445 1446 $alt = $this->getAltThatFinishedDecisionEntryRule($semValidConfigs); 1447 1448 if ($alt !== ATN::INVALID_ALT_NUMBER) { 1449 // semantically/syntactically viable path exists 1450 1451 return $alt; 1452 } 1453 1454 // Is there a syntactically valid path with a failed pred? 1455 if ($semInvalidConfigs->getLength() > 0) { 1456 $alt = $this->getAltThatFinishedDecisionEntryRule($semInvalidConfigs); 1457 1458 if ($alt !== ATN::INVALID_ALT_NUMBER) { 1459 // syntactically viable path exists 1460 1461 return $alt; 1462 } 1463 } 1464 1465 return ATN::INVALID_ALT_NUMBER; 1466 } 1467 1468 protected function getAltThatFinishedDecisionEntryRule(ATNConfigSet $configs) : int 1469 { 1470 $alts = new IntervalSet(); 1471 /** @var ATNConfig $c */ 1472 foreach ($configs->elements() as $c) { 1473 if ($c->getOuterContextDepth() > 0 1474 || ($c->state instanceof RuleStopState && $c->context !== null && $c->context->hasEmptyPath())) { 1475 $alts->addOne($c->alt); 1476 } 1477 } 1478 1479 return $alts->length() === 0 ? ATN::INVALID_ALT_NUMBER : $alts->getMinElement(); 1480 } 1481 1482 /** 1483 * Walk the list of configurations and split them according to 1484 * those that have preds evaluating to true/false. If no pred, assume 1485 * true pred and include in succeeded set. Returns Pair of sets. 1486 * 1487 * Create a new set so as not to alter the incoming parameter. 1488 * 1489 * Assumption: the input stream has been restored to the starting point 1490 * prediction, which is where predicates need to evaluate. 1491 * 1492 * @return array<ATNConfigSet> 1493 1494 * @throws \Exception 1495 */ 1496 protected function splitAccordingToSemanticValidity(ATNConfigSet $configs, ParserRuleContext $outerContext) : array 1497 { 1498 $succeeded = new ATNConfigSet($configs->fullCtx); 1499 $failed = new ATNConfigSet($configs->fullCtx); 1500 1501 foreach ($configs->elements() as $c) { 1502 if ($c->semanticContext !== SemanticContext::none()) { 1503 $predicateEvaluationResult = $c->semanticContext->eval($this->parser, $outerContext); 1504 1505 if ($predicateEvaluationResult) { 1506 $succeeded->add($c); 1507 } else { 1508 $failed->add($c); 1509 } 1510 } else { 1511 $succeeded->add($c); 1512 } 1513 } 1514 1515 return [$succeeded, $failed]; 1516 } 1517 1518 /** 1519 * Look through a list of predicate/alt pairs, returning alts for the 1520 * pairs that win. A `NONE` predicate indicates an alt containing an 1521 * unpredicated config which behaves as "always true." If !complete 1522 * then we stop at the first predicate that evaluates to true. This 1523 * includes pairs with null predicates. 1524 * 1525 * @param array<PredPrediction> $predPredictions 1526 */ 1527 protected function evalSemanticContextMany( 1528 array $predPredictions, 1529 ParserRuleContext $outerContext, 1530 bool $complete 1531 ) : BitSet { 1532 $predictions = new BitSet(); 1533 1534 foreach ($predPredictions as $pair) { 1535 if ($pair->pred === SemanticContext::none()) { 1536 $predictions->add($pair->alt); 1537 1538 if (!$complete) { 1539 break; 1540 } 1541 1542 continue; 1543 } 1544 1545 $fullCtx = false; // in dfa 1546 1547 $predicateEvaluationResult = $this->evalSemanticContextOne( 1548 $pair->pred, 1549 $outerContext, 1550 $pair->alt, 1551 $fullCtx 1552 ); 1553 1554 if (self::$debug || self::$dfa_debug) { 1555 $this->log[] = \sprintf('eval pred $pair = %s"', $predicateEvaluationResult); 1556 } 1557 1558 if ($predicateEvaluationResult) { 1559 if (self::$debug || self::$dfa_debug) { 1560 $this->log[] = \sprintf('PREDICT %d', $pair->alt); 1561 } 1562 1563 $predictions->add($pair->alt); 1564 1565 if (!$complete) { 1566 break; 1567 } 1568 } 1569 } 1570 1571 return $predictions; 1572 } 1573 1574 protected function evalSemanticContextOne( 1575 SemanticContext $pred, 1576 ParserRuleContext $parserCallStack, 1577 int $alt, 1578 bool $fullCtx 1579 ) : bool { 1580 return $pred->eval($this->parser, $parserCallStack); 1581 } 1582 1583 /** 1584 * TODO: If we are doing predicates, there is no point in pursuing 1585 * closure operations if we reach a DFA state that uniquely predicts 1586 * alternative. We will not be caching that DFA state and it is a 1587 * waste to pursue the closure. Might have to advance when we do 1588 * ambig detection thought :( 1589 */ 1590 protected function closure( 1591 ATNConfig $config, 1592 ATNConfigSet $configs, 1593 Set $closureBusy, 1594 bool $collectPredicates, 1595 bool $fullCtx, 1596 bool $treatEofAsEpsilon 1597 ) : void { 1598 $initialDepth = 0; 1599 1600 $this->closureCheckingStopState( 1601 $config, 1602 $configs, 1603 $closureBusy, 1604 $collectPredicates, 1605 $fullCtx, 1606 $initialDepth, 1607 $treatEofAsEpsilon 1608 ); 1609 1610 if ($fullCtx && $configs->dipsIntoOuterContext) { 1611 throw new \RuntimeException('Error.'); 1612 } 1613 } 1614 1615 protected function closureCheckingStopState( 1616 ATNConfig $config, 1617 ATNConfigSet $configs, 1618 Set $closureBusy, 1619 bool $collectPredicates, 1620 bool $fullCtx, 1621 int $depth, 1622 bool $treatEofAsEpsilon 1623 ) : void { 1624 if (self::$debug || self::$debug_closure) { 1625 $this->log[] = \sprintf('closure(%s)', $config->toString(true)); 1626 1627 if ($config->reachesIntoOuterContext > 50) { 1628 throw new \RuntimeException('problem'); 1629 } 1630 } 1631 1632 if ($config->state instanceof RuleStopState) { 1633 // We hit rule end. If we have context info, use it run thru all possible stack tops in ctx 1634 1635 if ($config->context !== null && !$config->context->isEmpty()) { 1636 for ($i =0; $i < $config->context->getLength(); $i++) { 1637 if ($config->context->getReturnState($i) === PredictionContext::EMPTY_RETURN_STATE) { 1638 if ($fullCtx) { 1639 $configs->add( 1640 new ATNConfig($config, $config->state, PredictionContext::empty(), null, null), 1641 $this->mergeCache 1642 ); 1643 } else { 1644 // we have no context info, just chase follow links (if greedy) 1645 if (self::$debug) { 1646 $this->log[] = \sprintf( 1647 'FALLING off rule %s', 1648 $this->getRuleName($config->state->ruleIndex) 1649 ); 1650 } 1651 1652 $this->closure_( 1653 $config, 1654 $configs, 1655 $closureBusy, 1656 $collectPredicates, 1657 $fullCtx, 1658 $depth, 1659 $treatEofAsEpsilon 1660 ); 1661 } 1662 1663 continue; 1664 } 1665 1666 $returnState = $this->atn->states[$config->context->getReturnState($i)]; 1667 $newContext = $config->context->getParent($i);// "pop" return state 1668 1669 $c = new ATNConfig(null, $returnState, $newContext, $config->semanticContext, $config->alt); 1670 1671 // While we have context to pop back from, we may have 1672 // gotten that context AFTER having falling off a rule. 1673 // Make sure we track that we are now out of context. 1674 // 1675 // This assignment also propagates the 1676 // isPrecedenceFilterSuppressed() value to the new 1677 // configuration. 1678 $c->reachesIntoOuterContext = $config->reachesIntoOuterContext; 1679 1680 $this->closureCheckingStopState( 1681 $c, 1682 $configs, 1683 $closureBusy, 1684 $collectPredicates, 1685 $fullCtx, 1686 $depth - 1, 1687 $treatEofAsEpsilon 1688 ); 1689 } 1690 1691 return; 1692 } elseif ($fullCtx) { 1693 // Reached end of start rule 1694 $configs->add($config, $this->mergeCache); 1695 1696 return; 1697 } else { 1698 // Else if we have no context info, just chase follow links (if greedy) 1699 if (self::$debug) { 1700 $this->log[] = \sprintf('FALLING off rule %s.', $this->getRuleName($config->state->ruleIndex)); 1701 } 1702 } 1703 } 1704 1705 $this->closure_($config, $configs, $closureBusy, $collectPredicates, $fullCtx, $depth, $treatEofAsEpsilon); 1706 } 1707 1708 /** 1709 * Do the actual work of walking epsilon edges. 1710 */ 1711 protected function closure_( 1712 ATNConfig $config, 1713 ATNConfigSet $configs, 1714 Set $closureBusy, 1715 bool $collectPredicates, 1716 bool $fullCtx, 1717 int $depth, 1718 bool $treatEofAsEpsilon 1719 ) : void { 1720 $p = $config->state; 1721 1722 // optimization 1723 if (!$p->onlyHasEpsilonTransitions()) { 1724 // make sure to not return here, because EOF transitions can act as 1725 // both epsilon transitions and non-epsilon transitions. 1726 1727 $configs->add($config, $this->mergeCache); 1728 } 1729 1730 foreach ($p->getTransitions() as $i => $t) { 1731 if ($i === 0 && $this->canDropLoopEntryEdgeInLeftRecursiveRule($config)) { 1732 continue; 1733 } 1734 1735 $continueCollecting = $collectPredicates && !$t instanceof ActionTransition; 1736 $c = $this->getEpsilonTarget($config, $t, $continueCollecting, $depth === 0, $fullCtx, $treatEofAsEpsilon); 1737 1738 if ($c !== null) { 1739 $newDepth = $depth; 1740 1741 if ($config->state instanceof RuleStopState) { 1742 if ($fullCtx) { 1743 throw new \RuntimeException('Error.'); 1744 } 1745 1746 // Target fell off end of rule; mark resulting c as having dipped into outer context 1747 // We can't get here if incoming config was rule stop and we had context 1748 // track how far we dip into outer context. Might 1749 // come in handy and we avoid evaluating context dependent 1750 // preds if this is > 0. 1751 1752 if ($this->dfa && $this->dfa->isPrecedenceDfa()) { 1753 if ($t instanceof EpsilonTransition 1754 && $this->dfa->atnStartState !== null 1755 && $t->getOutermostPrecedenceReturn() === $this->dfa->atnStartState->ruleIndex) { 1756 $c->setPrecedenceFilterSuppressed(true); 1757 } 1758 } 1759 1760 $c->reachesIntoOuterContext++; 1761 1762 if (!$closureBusy->add($c)) { 1763 // avoid infinite recursion for right-recursive rules 1764 1765 continue; 1766 } 1767 1768 // TODO: can remove? only care when we add to set per middle of this method 1769 $configs->dipsIntoOuterContext = true; 1770 $newDepth--; 1771 1772 if (self::$debug) { 1773 $this->log[] = \sprintf('dips into outer ctx: %s', $c); 1774 } 1775 } else { 1776 if (!$t->isEpsilon() && !$closureBusy->add($c)) { 1777 // avoid infinite recursion for EOF* and EOF+ 1778 1779 continue; 1780 } 1781 1782 if ($t instanceof RuleTransition) { 1783 // latch when newDepth goes negative - once we step out of the entry context we can't return 1784 1785 if ($newDepth >= 0) { 1786 $newDepth++; 1787 } 1788 } 1789 } 1790 1791 $this->closureCheckingStopState( 1792 $c, 1793 $configs, 1794 $closureBusy, 1795 $continueCollecting, 1796 $fullCtx, 1797 $newDepth, 1798 $treatEofAsEpsilon 1799 ); 1800 } 1801 } 1802 } 1803 1804 /** 1805 * Implements first-edge (loop entry) elimination as an optimization 1806 * during closure operations. See antlr/antlr4#1398. 1807 * 1808 * The optimization is to avoid adding the loop entry config when 1809 * the exit path can only lead back to the same 1810 * StarLoopEntryState after popping context at the rule end state 1811 * (traversing only epsilon edges, so we're still in closure, in 1812 * this same rule). 1813 * 1814 * We need to detect any state that can reach loop entry on 1815 * epsilon w/o exiting rule. We don't have to look at FOLLOW 1816 * links, just ensure that all stack tops for config refer to key 1817 * states in LR rule. 1818 * 1819 * To verify we are in the right situation we must first check 1820 * closure is at a StarLoopEntryState generated during LR removal. 1821 * Then we check that each stack top of context is a return state 1822 * from one of these cases: 1823 * 1824 * 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state 1825 * 2. expr op expr. The return state is the block end of internal block of (...)* 1826 * 3. 'between' expr 'and' expr. The return state of 2nd expr reference. 1827 * That state points at block end of internal block of (...)*. 1828 * 4. expr '?' expr ':' expr. The return state points at block end, 1829 * which points at loop entry state. 1830 * 1831 * If any is true for each stack top, then closure does not add a 1832 * config to the current config set for edge[0], the loop entry branch. 1833 * 1834 * Conditions fail if any context for the current config is: 1835 * 1836 * a. empty (we'd fall out of expr to do a global FOLLOW which could 1837 * even be to some weird spot in expr) or, 1838 * b. lies outside of expr or, 1839 * c. lies within expr but at a state not the BlockEndState 1840 * generated during LR removal 1841 * 1842 * Do we need to evaluate predicates ever in closure for this case? 1843 * 1844 * No. Predicates, including precedence predicates, are only 1845 * evaluated when computing a DFA start state. I.e., only before 1846 * the lookahead (but not parser) consumes a token. 1847 * 1848 * There are no epsilon edges allowed in LR rule alt blocks or in 1849 * the "primary" part (ID here). If closure is in 1850 * StarLoopEntryState any lookahead operation will have consumed a 1851 * token as there are no epsilon-paths that lead to 1852 * StarLoopEntryState. We do not have to evaluate predicates 1853 * therefore if we are in the generated StarLoopEntryState of a LR 1854 * rule. Note that when making a prediction starting at that 1855 * decision point, decision d=2, compute-start-state performs 1856 * closure starting at edges[0], edges[1] emanating from 1857 * StarLoopEntryState. That means it is not performing closure on 1858 * StarLoopEntryState during compute-start-state. 1859 * 1860 * How do we know this always gives same prediction answer? 1861 * 1862 * Without predicates, loop entry and exit paths are ambiguous 1863 * upon remaining input +b (in, say, a+b). Either paths lead to 1864 * valid parses. Closure can lead to consuming + immediately or by 1865 * falling out of this call to expr back into expr and loop back 1866 * again to StarLoopEntryState to match +b. In this special case, 1867 * we choose the more efficient path, which is to take the bypass 1868 * path. 1869 * 1870 * The lookahead language has not changed because closure chooses 1871 * one path over the other. Both paths lead to consuming the same 1872 * remaining input during a lookahead operation. If the next token 1873 * is an operator, lookahead will enter the choice block with 1874 * operators. If it is not, lookahead will exit expr. Same as if 1875 * closure had chosen to enter the choice block immediately. 1876 * 1877 * Closure is examining one config (some loopentrystate, some alt, 1878 * context) which means it is considering exactly one alt. Closure 1879 * always copies the same alt to any derived configs. 1880 * 1881 * How do we know this optimization doesn't mess up precedence in 1882 * our parse trees? 1883 * 1884 * Looking through expr from left edge of stat only has to confirm 1885 * that an input, say, a+b+c; begins with any valid interpretation 1886 * of an expression. The precedence actually doesn't matter when 1887 * making a decision in stat seeing through expr. It is only when 1888 * parsing rule expr that we must use the precedence to get the 1889 * right interpretation and, hence, parse tree. 1890 * 1891 * @since 4.6 1892 */ 1893 protected function canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig $config) : bool 1894 { 1895 $p = $config->state; 1896 1897 /* First check to see if we are in StarLoopEntryState generated during 1898 * left-recursion elimination. For efficiency, also check if 1899 * the context has an empty stack case. If so, it would mean 1900 * global FOLLOW so we can't perform optimization 1901 * Are we the special loop entry/exit state? or SLL wildcard 1902 */ 1903 1904 if ($config->context === null) { 1905 throw new \RuntimeException('Prediction context cannot be null.'); 1906 } 1907 1908 if ($p->getStateType() !== ATNState::STAR_LOOP_ENTRY 1909 || ($p instanceof StarLoopEntryState && !$p->isPrecedenceDecision) 1910 || $config->context->isEmpty() 1911 || $config->context->hasEmptyPath()) { 1912 return false; 1913 } 1914 1915 // Require all return states to return back to the same rule that p is in. 1916 $numCtxs = $config->context->getLength(); 1917 1918 for ($i = 0; $i < $numCtxs; $i++) { 1919 // For each stack context 1920 $returnState = $this->atn->states[$config->context->getReturnState($i)]; 1921 1922 if ($returnState->ruleIndex !== $p->ruleIndex) { 1923 return false; 1924 } 1925 } 1926 1927 $decisionStartState = $p->getTransition(0)->target; 1928 1929 if (!$decisionStartState instanceof BlockStartState || $decisionStartState->endState === null) { 1930 throw new \RuntimeException('Unexpected transition type.'); 1931 } 1932 1933 $blockEndStateNum = $decisionStartState->endState->stateNumber; 1934 $blockEndState = $this->atn->states[$blockEndStateNum]; 1935 1936 if (!$blockEndState instanceof BlockEndState) { 1937 throw new \RuntimeException('Unexpected transition type.'); 1938 } 1939 1940 // Verify that the top of each stack context leads to loop entry/exit 1941 // state through epsilon edges and w/o leaving rule. 1942 for ($i = 0; $i < $numCtxs; $i++) { 1943 // For each stack context 1944 1945 $returnStateNumber = $config->context->getReturnState($i); 1946 $returnState = $this->atn->states[$returnStateNumber]; 1947 1948 // All states must have single outgoing epsilon edge 1949 if ($returnState->getNumberOfTransitions() !== 1 || !$returnState->getTransition(0)->isEpsilon()) { 1950 return false; 1951 } 1952 1953 // Look for prefix op case like 'not expr', (' type ')' expr 1954 $returnStateTarget = $returnState->getTransition(0)->target; 1955 1956 if ($returnState->getStateType() === ATNState::BLOCK_END && $returnStateTarget->equals($p)) { 1957 continue; 1958 } 1959 1960 // Look for 'expr op expr' or case where expr's return state is block end 1961 // of (...)* internal block; the block end points to loop back 1962 // which points to p but we don't need to check that 1963 if ($returnState->equals($blockEndState)) { 1964 continue; 1965 } 1966 1967 // Look for ternary expr ? expr : expr. The return state points at block end, 1968 // which points at loop entry state 1969 if ($returnStateTarget->equals($blockEndState)) { 1970 continue; 1971 } 1972 1973 // Look for complex prefix 'between expr and expr' case where 2nd expr's 1974 // return state points at block end state of (...)* internal block 1975 if ($returnStateTarget->getStateType() === ATNState::BLOCK_END 1976 && $returnStateTarget->getNumberOfTransitions() === 1 1977 && $returnStateTarget->getTransition(0)->isEpsilon() 1978 && $returnStateTarget->getTransition(0)->target->equals($p)) { 1979 continue; 1980 } 1981 1982 // anything else ain't conforming 1983 return false; 1984 } 1985 1986 return true; 1987 } 1988 1989 public function getRuleName(int $index) : string 1990 { 1991 if ($this->parser !== null && $index >= 0) { 1992 return $this->parser->getRuleNames()[$index]; 1993 } 1994 1995 return '<rule $index>'; 1996 } 1997 1998 protected function getEpsilonTarget( 1999 ATNConfig $config, 2000 Transition $t, 2001 bool $collectPredicates, 2002 bool $inContext, 2003 bool $fullCtx, 2004 bool $treatEofAsEpsilon 2005 ) : ?ATNConfig { 2006 switch ($t->getSerializationType()) { 2007 case Transition::RULE: 2008 if (!$t instanceof RuleTransition) { 2009 throw new \RuntimeException('Unexpected transition type.'); 2010 } 2011 2012 return $this->ruleTransition($config, $t); 2013 2014 case Transition::PRECEDENCE: 2015 if (!$t instanceof PrecedencePredicateTransition) { 2016 throw new \RuntimeException('Unexpected transition type.'); 2017 } 2018 2019 return $this->precedenceTransition($config, $t, $collectPredicates, $inContext, $fullCtx); 2020 2021 case Transition::PREDICATE: 2022 if (!$t instanceof PredicateTransition) { 2023 throw new \RuntimeException('Unexpected transition type.'); 2024 } 2025 2026 return $this->predTransition($config, $t, $collectPredicates, $inContext, $fullCtx); 2027 2028 case Transition::ACTION: 2029 if (!$t instanceof ActionTransition) { 2030 throw new \RuntimeException('Unexpected transition type.'); 2031 } 2032 2033 return $this->actionTransition($config, $t); 2034 2035 case Transition::EPSILON: 2036 return new ATNConfig($config, $t->target); 2037 2038 case Transition::ATOM: 2039 case Transition::RANGE: 2040 case Transition::SET: 2041 // EOF transitions act like epsilon transitions after the first EOF transition is traversed 2042 2043 if ($treatEofAsEpsilon) { 2044 if ($t->matches(Token::EOF, 0, 1)) { 2045 return new ATNConfig($config, $t->target); 2046 } 2047 } 2048 2049 return null; 2050 2051 default: 2052 return null; 2053 } 2054 } 2055 2056 protected function actionTransition(ATNConfig $config, ActionTransition $t) : ATNConfig 2057 { 2058 if (self::$debug) { 2059 $this->log[] = \sprintf('ACTION edge %d:%d', $t->ruleIndex, $t->actionIndex); 2060 } 2061 2062 return new ATNConfig($config, $t->target); 2063 } 2064 2065 public function precedenceTransition( 2066 ATNConfig $config, 2067 PrecedencePredicateTransition $pt, 2068 bool $collectPredicates, 2069 bool $inContext, 2070 bool $fullCtx 2071 ) : ?ATNConfig { 2072 if (self::$debug) { 2073 $this->log[] = \sprintf( 2074 'PRED (collectPredicates=%s) %d>=_p, ctx dependent=true', 2075 $collectPredicates, 2076 $pt->precedence 2077 ); 2078 2079 if ($this->parser !== null) { 2080 $this->log[] = \sprintf( 2081 'context surrounding pred is [%s]', 2082 \implode(', ', $this->parser->getRuleInvocationStack()) 2083 ); 2084 } 2085 } 2086 2087 $c = null; 2088 2089 if ($collectPredicates && $inContext) { 2090 if ($fullCtx) { 2091 /* In full context mode, we can evaluate predicates on-the-fly 2092 * during closure, which dramatically reduces the size of 2093 * the config sets. It also obviates the need to test predicates 2094 * later during conflict resolution. 2095 */ 2096 2097 $currentPosition = $this->input->getIndex(); 2098 2099 $this->input->seek($this->startIndex); 2100 2101 $predSucceeds = $this->outerContext !== null ? 2102 $pt->getPredicate()->eval($this->parser, $this->outerContext) : 2103 false; 2104 2105 $this->input->seek($currentPosition); 2106 2107 if ($predSucceeds) { 2108 $c = new ATNConfig($config, $pt->target);// no pred context 2109 } 2110 } else { 2111 $newSemCtx = SemanticContext::andContext($config->semanticContext, $pt->getPredicate()); 2112 $c = new ATNConfig($config, $pt->target, null, $newSemCtx); 2113 } 2114 } else { 2115 $c = new ATNConfig($config, $pt->target); 2116 } if (self::$debug) { 2117 $this->log[] = \sprintf('Config from pred transition=%s', (string) $c); 2118 } 2119 2120 return $c; 2121 } 2122 2123 protected function predTransition( 2124 ATNConfig $config, 2125 PredicateTransition $pt, 2126 bool $collectPredicates, 2127 bool $inContext, 2128 bool $fullCtx 2129 ) : ?ATNConfig { 2130 if (self::$debug) { 2131 $this->log[] = \sprintf( 2132 'PRED (collectPredicates=%s) %d:%d, ctx dependent=%s', 2133 $collectPredicates, 2134 $pt->ruleIndex, 2135 $pt->predIndex, 2136 $pt->isCtxDependent 2137 ); 2138 2139 if ($this->parser !== null) { 2140 $this->log[] = \sprintf( 2141 'Context surrounding pred is [%s]', 2142 \implode(', ', $this->parser->getRuleInvocationStack()) 2143 ); 2144 } 2145 } 2146 2147 $c = null; 2148 2149 if ($collectPredicates && (!$pt->isCtxDependent || $inContext)) { 2150 if ($fullCtx) { 2151 // In full context mode, we can evaluate predicates on-the-fly 2152 // during closure, which dramatically reduces the size of 2153 // the config sets. It also obviates the need to test predicates 2154 // later during conflict resolution. 2155 2156 $currentPosition = $this->input->getIndex(); 2157 2158 $this->input->seek($this->startIndex); 2159 2160 $predSucceeds = $this->outerContext !== null ? 2161 $pt->getPredicate()->eval($this->parser, $this->outerContext) : 2162 false; 2163 2164 $this->input->seek($currentPosition); 2165 2166 if ($predSucceeds) { 2167 $c = new ATNConfig($config, $pt->target);// no pred context 2168 } 2169 } else { 2170 $newSemCtx = SemanticContext::andContext($config->semanticContext, $pt->getPredicate()); 2171 $c = new ATNConfig($config, $pt->target, null, $newSemCtx); 2172 } 2173 } else { 2174 $c = new ATNConfig($config, $pt->target); 2175 } 2176 2177 if (self::$debug) { 2178 $this->log[] = \sprintf('Config from pred transition=%s', (string) $c); 2179 } 2180 2181 return $c; 2182 } 2183 2184 protected function ruleTransition(ATNConfig $config, RuleTransition $t) : ATNConfig 2185 { 2186 if (self::$debug) { 2187 $this->log[] = \sprintf( 2188 'CALL rule %s, ctx=%s', 2189 $this->getRuleName($t->target->ruleIndex), 2190 $config->context 2191 ); 2192 } 2193 2194 $returnState = $t->followState; 2195 $newContext = SingletonPredictionContext::create($config->context, $returnState->stateNumber); 2196 2197 return new ATNConfig($config, $t->target, $newContext); 2198 } 2199 2200 /** 2201 * Gets a {@see BitSet} containing the alternatives in `configs` 2202 * which are part of one or more conflicting alternative subsets. 2203 * 2204 * @param ATNConfigSet $configs The {@see ATNConfigSet} to analyze. 2205 * 2206 * @return BitSet The alternatives in `configs` which are part of one or 2207 * more conflicting alternative subsets. If `configs` does 2208 * not contain any conflicting subsets, this method returns 2209 * an empty {@see BitSet}. 2210 */ 2211 protected function getConflictingAlts(ATNConfigSet $configs) : BitSet 2212 { 2213 $altsets = PredictionMode::getConflictingAltSubsets($configs); 2214 2215 return PredictionMode::getAlts($altsets); 2216 } 2217 2218 /** 2219 Sam pointed out a problem with the previous definition, v3, of 2220 ambiguous states. If we have another state associated with conflicting 2221 alternatives, we should keep going. For example, the following grammar 2222 2223 s : (ID | ID ID?) ';' ; 2224 2225 When the ATN simulation reaches the state before ';', it has a DFA 2226 state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally 2227 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node 2228 because alternative to has another way to continue, via [6|2|[]]. 2229 The key is that we have a single state that has config's only associated 2230 with a single alternative, 2, and crucially the state transitions 2231 among the configurations are all non-epsilon transitions. That means 2232 we don't consider any conflicts that include alternative 2. So, we 2233 ignore the conflict between alts 1 and 2. We ignore a set of 2234 conflicting alts when there is an intersection with an alternative 2235 associated with a single alt state in the state→config-list map. 2236 2237 It's also the case that we might have two conflicting configurations but 2238 also a 3rd nonconflicting configuration for a different alternative: 2239 [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: 2240 2241 a : A | A | A B ; 2242 2243 After matching input A, we reach the stop state for rule A, state 1. 2244 State 8 is the state right before B. Clearly alternatives 1 and 2 2245 conflict and no amount of further lookahead will separate the two. 2246 However, alternative 3 will be able to continue and so we do not 2247 stop working on this state. In the previous example, we're concerned 2248 with states associated with the conflicting alternatives. Here alt 2249 3 is not associated with the conflicting configs, but since we can continue 2250 looking for input reasonably, I don't declare the state done. We 2251 ignore a set of conflicting alts when we have an alternative 2252 that we still need to pursue. 2253 */ 2254 protected function getConflictingAltsOrUniqueAlt(ATNConfigSet $configs) : ?BitSet 2255 { 2256 if ($configs->uniqueAlt !== ATN::INVALID_ALT_NUMBER) { 2257 $conflictingAlts = new BitSet(); 2258 2259 $conflictingAlts->add($configs->uniqueAlt); 2260 2261 return $conflictingAlts; 2262 } 2263 2264 return $configs->getConflictingAlts(); 2265 } 2266 2267 public function getTokenName(int $t) : string 2268 { 2269 if ($t === Token::EOF) { 2270 return 'EOF'; 2271 } 2272 2273 $vocabulary = $this->parser !== null ? $this->parser->getVocabulary() : VocabularyImpl::emptyVocabulary(); 2274 $displayName = $vocabulary->getDisplayName($t); 2275 2276 if ($displayName === (string) $t) { 2277 return $displayName; 2278 } 2279 2280 return \sprintf('%s<%d>', $displayName, $t); 2281 } 2282 2283 public function getLookaheadName(TokenStream $input) : string 2284 { 2285 return $this->getTokenName($input->LA(1)); 2286 } 2287 2288 protected function noViableAlt( 2289 TokenStream $input, 2290 $outerContext, 2291 ?ATNConfigSet $configs, 2292 int $startIndex 2293 ) : NoViableAltException { 2294 return new NoViableAltException( 2295 $this->parser, 2296 $input, 2297 $input->get($startIndex), 2298 $input->LT(1), 2299 $configs, 2300 $outerContext 2301 ); 2302 } 2303 2304 protected static function getUniqueAlt(ATNConfigSet $configs) : int 2305 { 2306 $alt = ATN::INVALID_ALT_NUMBER; 2307 2308 foreach ($configs->elements() as $c) { 2309 if ($alt === ATN::INVALID_ALT_NUMBER) { 2310 $alt = $c->alt; // found first alt 2311 } elseif ($c->alt !== $alt) { 2312 return ATN::INVALID_ALT_NUMBER; 2313 } 2314 } 2315 2316 return $alt; 2317 } 2318 2319 /** 2320 * Add an edge to the DFA, if possible. This method calls 2321 * {@see ParserATNSimulator::addDFAState()} to ensure the `to` state is 2322 * present in the DFA. If `from` is `null`, or if `t` is outside the 2323 * range of edges that can be represented in the DFA tables, this method 2324 * returns without adding the edge to the DFA. 2325 * 2326 * If `to` is `null`, this method returns `null`. Otherwise, this method 2327 * returns the {@see DFAState} returned by calling 2328 * {@see ParserATNSimulator::addDFAState()} for the `to` state. 2329 * 2330 * @param DFA $dfa The DFA 2331 * @param DFAState|null $from The source state for the edge 2332 * @param int $t The input symbol 2333 * @param DFAState|null $to The target state for the edge 2334 * 2335 * @return DFAState If `to` is `null` this method returns `null`, 2336 * otherwise this method returns the result of calling 2337 * {@see ParserATNSimulator::addDFAState()} on `to`. 2338 */ 2339 protected function addDFAEdge(DFA $dfa, ?DFAState $from, int $t, ?DFAState $to) : ?DFAState 2340 { 2341 if (self::$debug) { 2342 $this->log[] = \sprintf('EDGE %s -> %s upon %s', (string) $from, (string) $to, $this->getTokenName($t)); 2343 } 2344 2345 if ($to === null) { 2346 return null; 2347 } 2348 2349 $to = $this->addDFAState($dfa, $to);// used existing if possible not incoming 2350 2351 if ($from === null || $t < -1 || $t > $this->atn->maxTokenType) { 2352 return $to; 2353 } 2354 2355 if ($from->edges === null) { 2356 $from->edges = new \SplFixedArray($this->atn->maxTokenType + 1 + 1); 2357 } 2358 2359 $from->edges[$t + 1] = $to; 2360 2361 if (self::$debug) { 2362 $this->log[] = 'DFA =' . \PHP_EOL . $dfa->toString($this->parser->getVocabulary()); 2363 } 2364 2365 return $to; 2366 } 2367 2368 /** 2369 * Add state `D` to the DFA if it is not already present, and return 2370 * the actual instance stored in the DFA. If a state equivalent to `D` 2371 * is already in the DFA, the existing state is returned. Otherwise this 2372 * method returns `D` after adding it to the DFA. 2373 * 2374 * If `D` is {@see ParserATNSimulator::error()}, this method returns 2375 * {@see ParserATNSimulator::error()} and does not change the DFA. 2376 * 2377 * @param DFA $dfa The dfa 2378 * @param DFAState $D The DFA state to add 2379 * 2380 * @return DFAState The state stored in the DFA. This will be either 2381 * the existing state if `D` is already in the DFA, or `D` 2382 * itself if the state was not already present. 2383 * 2384 * @throws \InvalidArgumentException 2385 */ 2386 protected function addDFAState(DFA $dfa, DFAState $D) : DFAState 2387 { 2388 if ($D === self::error()) { 2389 return $D; 2390 } 2391 2392 $existing = $dfa->states->get($D); 2393 2394 if ($existing !== null && $existing instanceof DFAState) { 2395 return $existing; 2396 } 2397 2398 $D->stateNumber = $dfa->states->count(); 2399 2400 if (!$D->configs->isReadOnly()) { 2401 $D->configs->optimizeConfigs($this); 2402 $D->configs->setReadonly(true); 2403 } 2404 2405 $dfa->states->add($D); 2406 2407 if (self::$debug) { 2408 $this->log[] = \sprintf('Adding new DFA state: %s', (string) $D); 2409 } 2410 2411 return $D; 2412 } 2413 2414 protected function reportAttemptingFullContext( 2415 DFA $dfa, 2416 ?BitSet $conflictingAlts, 2417 ATNConfigSet $configs, 2418 int $startIndex, 2419 int $stopIndex 2420 ) : void { 2421 if (self::$debug || self::$retry_debug) { 2422 $interval = new Interval($startIndex, $stopIndex); 2423 $tokenStream = $this->parser->getTokenStream(); 2424 2425 $this->log[] = \sprintf( 2426 'reportAttemptingFullContext decision = %d:%s, input = %s', 2427 $dfa->decision, 2428 (string) $configs, 2429 $tokenStream === null ? '' : $tokenStream->getTextByInterval($interval) 2430 ); 2431 } 2432 2433 if ($this->parser !== null) { 2434 $this->parser->getErrorListenerDispatch()->reportAttemptingFullContext( 2435 $this->parser, 2436 $dfa, 2437 $startIndex, 2438 $stopIndex, 2439 $conflictingAlts, 2440 $configs 2441 ); 2442 } 2443 } 2444 2445 protected function reportContextSensitivity( 2446 DFA $dfa, 2447 int $prediction, 2448 ATNConfigSet $configs, 2449 int $startIndex, 2450 int $stopIndex 2451 ) : void { 2452 if (self::$debug || self::$retry_debug) { 2453 $interval = new Interval($startIndex, $stopIndex); 2454 $tokenStream = $this->parser->getTokenStream(); 2455 2456 $this->log[] = \sprintf( 2457 'reportContextSensitivity decision = %d:%s, input = %s', 2458 $dfa->decision, 2459 (string) $configs, 2460 $tokenStream === null ? '' : $tokenStream->getTextByInterval($interval) 2461 ); 2462 } 2463 2464 if ($this->parser !== null) { 2465 $this->parser->getErrorListenerDispatch()->reportContextSensitivity( 2466 $this->parser, 2467 $dfa, 2468 $startIndex, 2469 $stopIndex, 2470 $prediction, 2471 $configs 2472 ); 2473 } 2474 } 2475 2476 /** 2477 * If context sensitive parsing, we know it's ambiguity not conflict. 2478 */ 2479 protected function reportAmbiguity( 2480 DFA $dfa, 2481 DFAState $D, 2482 int $startIndex, 2483 int $stopIndex, 2484 bool $exact, 2485 ?BitSet $ambigAlts, 2486 ATNConfigSet $configs 2487 ) : void { 2488 if (self::$debug || self::$retry_debug) { 2489 $interval = new Interval($startIndex, $stopIndex); 2490 $tokenStream = $this->parser->getTokenStream(); 2491 2492 $this->log[] = \sprintf( 2493 'reportAmbiguity %s:%s, input = %s', 2494 (string) $ambigAlts, 2495 (string) $configs, 2496 $tokenStream === null ? '' : $tokenStream->getTextByInterval($interval) 2497 ); 2498 } 2499 2500 if ($this->parser !== null) { 2501 $this->parser->getErrorListenerDispatch()->reportAmbiguity( 2502 $this->parser, 2503 $dfa, 2504 $startIndex, 2505 $stopIndex, 2506 $exact, 2507 $ambigAlts, 2508 $configs 2509 ); 2510 } 2511 } 2512 2513 public function setPredictionMode(int $mode) : void 2514 { 2515 $this->mode = $mode; 2516 } 2517 2518 public function getPredictionMode() : int 2519 { 2520 return $this->mode; 2521 } 2522 2523 public function getParser() : Parser 2524 { 2525 return $this->parser; 2526 } 2527} 2528